Created
July 19, 2017 19:58
-
-
Save oroszl/26d82c52e5b4ed98ce9d33282fde78a0 to your computer and use it in GitHub Desktop.
further optimizing dense sisl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"%pylab inline\n", | |
"import sisl" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# importing the necessary structures\n", | |
"dat=sisl.get_sile('/tmp/Work/jij/runs/Fe/2atuc/Fe.nc')\n", | |
"dh=dat.read_hamiltonian()\n", | |
"dg=dat.read_geometry()\n", | |
"ds=dat.read_supercell()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# get hamiltonian and overlap matrices\n", | |
"# important to note that we force \n", | |
"# complex data type and we prepare \n", | |
"# the shape of our objects to accomodate\n", | |
"# fast dot products \n", | |
"# if doing k integrals then this step needs to be outside of the k loop\n", | |
"dh.hup=dh.tocsr(0).toarray().reshape(dh.no,dh.n_s,dh.no).transpose(0,2,1).astype('complex128')\n", | |
"dh.hdo=dh.tocsr(1).toarray().reshape(dh.no,dh.n_s,dh.no).transpose(0,2,1).astype('complex128')\n", | |
"dh.sov=dh.tocsr(2).toarray().reshape(dh.no,dh.n_s,dh.no).transpose(0,2,1).astype('complex128')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"k=(0.1,0.2,0.3) # a generic k pont to test \n", | |
"k = np.asarray(k, np.float64) # this two conversion lines are from the sisl source \n", | |
"k.shape = (-1,) # \n", | |
"phases = np.exp(-1j * dot(dot(dot(dh.rcell, k), dh.cell), dh.sc.sc_off.T)) # this generates the list of phases" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"True\n", | |
"True\n", | |
"True\n" | |
] | |
} | |
], | |
"source": [ | |
"HKdotresh=(dh.hup.reshape(dg.no**2,-1).dot(phases)).reshape(dg.no,dg.no) #1 reshaping till stuff fits\n", | |
"HKein=einsum('abc,c->ab',dh.hup,phases) #2 index-ed notation galore \n", | |
"HKst=(dh.hup*phases).sum(axis=-1) #3 phases is a simple objects\n", | |
"HKK=dh.Hk(k=k,format='array',spin=0) # original sisl way of doing things\n", | |
"# as shown below all are close :)\n", | |
"print(allclose(HKdotresh,HKein))\n", | |
"print(allclose(HKdotresh,HKst))\n", | |
"print(allclose(HKdotresh,HKK))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.12 ms ± 4.07 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit (dh.hup.reshape(dh.no**2,-1).dot(phases)).reshape(dh.no,dh.no) #1 reshaping till stuff fits" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.1 ms ± 1.71 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit einsum('abc,c->ab',dh.hup,phases) #2 index-ed notation galore " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.14 ms ± 8.05 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit (dh.hup*phases).sum(axis=-1) #3 phases is a simple objects" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"18.4 ms ± 29.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit dh.Hk(k=(0.1,0.2,0.3),format='array') # this is the original roughly 20x slower than the above" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"273 µs ± 959 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit dh.hup.sum(axis=1) # gamma point potentially faster with extra factor 5" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment