Skip to content

Instantly share code, notes, and snippets.

@oroszl
Created July 19, 2017 19:58
Show Gist options
  • Save oroszl/26d82c52e5b4ed98ce9d33282fde78a0 to your computer and use it in GitHub Desktop.
Save oroszl/26d82c52e5b4ed98ce9d33282fde78a0 to your computer and use it in GitHub Desktop.
further optimizing dense sisl
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline\n",
"import sisl"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# importing the necessary structures\n",
"dat=sisl.get_sile('/tmp/Work/jij/runs/Fe/2atuc/Fe.nc')\n",
"dh=dat.read_hamiltonian()\n",
"dg=dat.read_geometry()\n",
"ds=dat.read_supercell()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# get hamiltonian and overlap matrices\n",
"# important to note that we force \n",
"# complex data type and we prepare \n",
"# the shape of our objects to accomodate\n",
"# fast dot products \n",
"# if doing k integrals then this step needs to be outside of the k loop\n",
"dh.hup=dh.tocsr(0).toarray().reshape(dh.no,dh.n_s,dh.no).transpose(0,2,1).astype('complex128')\n",
"dh.hdo=dh.tocsr(1).toarray().reshape(dh.no,dh.n_s,dh.no).transpose(0,2,1).astype('complex128')\n",
"dh.sov=dh.tocsr(2).toarray().reshape(dh.no,dh.n_s,dh.no).transpose(0,2,1).astype('complex128')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"k=(0.1,0.2,0.3) # a generic k pont to test \n",
"k = np.asarray(k, np.float64) # this two conversion lines are from the sisl source \n",
"k.shape = (-1,) # \n",
"phases = np.exp(-1j * dot(dot(dot(dh.rcell, k), dh.cell), dh.sc.sc_off.T)) # this generates the list of phases"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"HKdotresh=(dh.hup.reshape(dg.no**2,-1).dot(phases)).reshape(dg.no,dg.no) #1 reshaping till stuff fits\n",
"HKein=einsum('abc,c->ab',dh.hup,phases) #2 index-ed notation galore \n",
"HKst=(dh.hup*phases).sum(axis=-1) #3 phases is a simple objects\n",
"HKK=dh.Hk(k=k,format='array',spin=0) # original sisl way of doing things\n",
"# as shown below all are close :)\n",
"print(allclose(HKdotresh,HKein))\n",
"print(allclose(HKdotresh,HKst))\n",
"print(allclose(HKdotresh,HKK))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.12 ms ± 4.07 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit (dh.hup.reshape(dh.no**2,-1).dot(phases)).reshape(dh.no,dh.no) #1 reshaping till stuff fits"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.1 ms ± 1.71 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit einsum('abc,c->ab',dh.hup,phases) #2 index-ed notation galore "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.14 ms ± 8.05 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit (dh.hup*phases).sum(axis=-1) #3 phases is a simple objects"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"18.4 ms ± 29.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%timeit dh.Hk(k=(0.1,0.2,0.3),format='array') # this is the original roughly 20x slower than the above"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"273 µs ± 959 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit dh.hup.sum(axis=1) # gamma point potentially faster with extra factor 5"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment