Created
March 9, 2023 22:17
-
-
Save andyfaff/798dc630efebda33419e64fece4e6d58 to your computer and use it in GitHub Desktop.
Numba reflectivity kernel investigation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "dc9bf6df", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import numba\n", | |
"from numba import guvectorize, prange\n", | |
"import matplotlib.pyplot as plt\n", | |
"from refnx.reflect import available_backends, use_reflect_backend, abeles, Structure, SLD\n", | |
"from refl1d.reflectivity import reflectivity" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "9aade67e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@guvectorize([\"void(float64[:], float64[:,:], float64[:])\"],\n", | |
" \"(m),(n,p)->(m)\", cache=True)\n", | |
"def f(q, layers, R):\n", | |
" nlayers = layers.shape[0] - 2\n", | |
" npnts = q.size\n", | |
" \n", | |
" kn = np.zeros((npnts, nlayers + 2), np.complex128)\n", | |
" mi00 = np.ones((npnts, nlayers + 1), np.complex128)\n", | |
"\n", | |
" sld = np.zeros(nlayers + 2, np.complex128)\n", | |
" TINY = 1e-30\n", | |
" # addition of TINY is to ensure the correct branch cut\n", | |
" # in the complex sqrt calculation of kn.\n", | |
" sld[1:] += (\n", | |
" (layers[1:, 1] - layers[0, 1]) + 1j * (np.abs(layers[1:, 2]) + TINY)\n", | |
" ) * 1.0e-6\n", | |
"\n", | |
" # kn is a 2D array. Rows are Q points, columns are kn in a layer.\n", | |
" # calculate wavevector in each layer, for each Q point.\n", | |
" q = np.ascontiguousarray(q)\n", | |
" tempq = np.reshape(q, (q.shape[0], 1))\n", | |
" kn[:] = np.sqrt(tempq ** 2.0 / 4.0 - 4.0 * np.pi * sld)\n", | |
"\n", | |
" # reflectances for each layer\n", | |
" # rj.shape = (npnts, nlayers + 1)\n", | |
" rj = kn[:, :-1] - kn[:, 1:]\n", | |
" rj /= kn[:, :-1] + kn[:, 1:]\n", | |
" rj *= np.exp(-2.0 * kn[:, :-1] * kn[:, 1:] * layers[1:, 3] ** 2)\n", | |
"\n", | |
" # characteristic matrices for each layer\n", | |
" # miNN.shape = (npnts, nlayers + 1)\n", | |
" if nlayers:\n", | |
" mi00[:, 1:] = np.exp(kn[:, 1:-1] * 1j * np.fabs(layers[1:-1, 0]))\n", | |
" mi11 = 1.0 / mi00\n", | |
" mi10 = rj * mi00\n", | |
" mi01 = rj * mi11\n", | |
"\n", | |
" # initialise matrix total\n", | |
" mrtot00 = mi00[:, 0]\n", | |
" mrtot01 = mi01[:, 0]\n", | |
" mrtot10 = mi10[:, 0]\n", | |
" mrtot11 = mi11[:, 0]\n", | |
"\n", | |
" # propagate characteristic matrices\n", | |
" for idx in range(1, nlayers + 1):\n", | |
" # matrix multiply mrtot by characteristic matrix\n", | |
" p0 = mrtot00 * mi00[:, idx] + mrtot10 * mi01[:, idx]\n", | |
" p1 = mrtot00 * mi10[:, idx] + mrtot10 * mi11[:, idx]\n", | |
" mrtot00 = p0\n", | |
" mrtot10 = p1\n", | |
"\n", | |
" p0 = mrtot01 * mi00[:, idx] + mrtot11 * mi01[:, idx]\n", | |
" p1 = mrtot01 * mi10[:, idx] + mrtot11 * mi11[:, idx]\n", | |
"\n", | |
" mrtot01 = p0\n", | |
" mrtot11 = p1\n", | |
"\n", | |
" r = mrtot01 / mrtot00\n", | |
" reflectivity = r * np.conj(r)\n", | |
"# reflectivity *= scale\n", | |
"# reflectivity += bkg\n", | |
"# return np.real(np.reshape(reflectivity, qvals.shape))\n", | |
" R[:] = np.real(reflectivity)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "623c9a76", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"q = np.linspace(0.01, 0.5, 3000)\n", | |
"\n", | |
"si = SLD(2.07)\n", | |
"sio2 = SLD(3.47)\n", | |
"layer = SLD(-0.5+1e-5j)\n", | |
"d2o = SLD(6.36)\n", | |
"\n", | |
"structure = si | sio2(100, 3) | layer(500, 3) | d2o(0, 3)\n", | |
"\n", | |
"w = structure.slabs()[:, :-1]\n", | |
"microw = structure._micro_slabs()[:, :-1]\n", | |
"\n", | |
"R = np.empty_like(q)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "89f5ceda", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np.testing.assert_allclose(f(q, w, R), abeles(q, w))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "f9bf976a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"270 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"with use_reflect_backend('c_parratt') as g:\n", | |
" %timeit g(q, w, threads=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "783129de", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"388 µs ± 427 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit f(q, w, R)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "3b05aa1e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"280 ms ± 2.73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit f(q, microw, R)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "7ea04022", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"96.6 ms ± 180 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"with use_reflect_backend('c_parratt') as g:\n", | |
" %timeit g(q, microw, threads=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "3e187ce3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"201 ms ± 1.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"kz = np.ascontiguousarray(q / 2.0)\n", | |
"rho_index = np.zeros_like(kz, int)\n", | |
"depth = np.ascontiguousarray(microw[:, 0])\n", | |
"rho = np.ascontiguousarray(microw[:, 1])\n", | |
"irho = np.ascontiguousarray(microw[:, 2])\n", | |
"sigma = np.ascontiguousarray(microw[1:, 3])\n", | |
"\n", | |
"# measure timings for refl1d.reflectivity.reflectivity\n", | |
"%timeit reflectivity(kz, depth, rho, irho=irho, sigma=sigma)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "79c5b1b2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"547 µs ± 2.12 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"kz = np.ascontiguousarray(q / 2.0)\n", | |
"depth = np.ascontiguousarray(w[:, 0])\n", | |
"rho = np.ascontiguousarray(w[:, 1])\n", | |
"irho = np.ascontiguousarray(w[:, 2])\n", | |
"sigma = np.ascontiguousarray(w[1:, 3])\n", | |
"\n", | |
"# measure timings for refl1d.reflectivity.reflectivity\n", | |
"%timeit reflectivity(kz, depth, rho, irho=irho, sigma=sigma)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "0a4e4a86", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_REFL_SIG = 'void(f8[:], f8[:, :], f8[:])'\n", | |
"_REFL_LOCALS = {\n", | |
" \"nlayers\": numba.int64,\n", | |
" \"npoints\": numba.int64,\n", | |
" \"TINY\": numba.float64,\n", | |
" \"sub\": numba.complex128,\n", | |
" \"superf\": numba.complex128,\n", | |
" \"_t\": numba.complex128,\n", | |
" \"kn\": numba.complex128,\n", | |
" \"kn_next\": numba.complex128,\n", | |
" \"qq2\": numba.complex128,\n", | |
" \"rj\": numba.complex128,\n", | |
" \"beta\": numba.complex128,\n", | |
" \"r\": numba.complex128,\n", | |
"}\n", | |
"\n", | |
"_REFL_LOCALS.update((\"B{i}{j}\".format(i=i, j=j), numba.complex128)\n", | |
" for i in range(0, 2) for j in range(0, 2))\n", | |
"_REFL_LOCALS.update((\"M{i}{j}\".format(i=i, j=j), numba.complex128)\n", | |
" for i in range(0, 2) for j in range(0, 2))\n", | |
"_REFL_LOCALS.update((\"C{i}\".format(i=i), numba.complex128)\n", | |
" for i in range(0, 2))\n", | |
"\n", | |
"\n", | |
"\n", | |
"@numba.njit(_REFL_SIG, parallel=False, cache=True, locals=_REFL_LOCALS)\n", | |
"def refl(q, w, R):\n", | |
" nlayers = w.shape[0] - 2\n", | |
" npoints = q.shape[0]\n", | |
" TINY = 1.0e-11\n", | |
" \n", | |
" SLD = np.zeros((nlayers + 2), dtype=np.complex128)\n", | |
" thickness = np.zeros((nlayers), dtype=np.complex128)\n", | |
" rough_sqr = np.zeros((nlayers + 1), dtype=float)\n", | |
" \n", | |
" sub = complex(w[-1, 1], np.fabs(w[-1, 2]) + TINY);\n", | |
" superf = complex(w[0, 1])\n", | |
" \n", | |
" oneC = complex(1.0, 0)\n", | |
"\n", | |
" for i in range(1, nlayers + 1):\n", | |
" _t = complex(w[i, 1], np.fabs(w[i, 2]) + TINY)\n", | |
" SLD[i] = 4e-6 * np.pi * (_t - superf)\n", | |
" thickness[i - 1] = complex(0, np.fabs(w[i, 0]))\n", | |
" rough_sqr[i - 1] = -2 * w[i, 3]**2\n", | |
" \n", | |
" SLD[0] = complex(0, 0)\n", | |
" SLD[nlayers + 1] = 4e-6 * np.pi * (sub - superf)\n", | |
" rough_sqr[nlayers] = -2 * w[-1, 3]**2\n", | |
"\n", | |
" for j in range(npoints):\n", | |
" B00 = B11 = 1.0\n", | |
" B01 = B10 = 0\n", | |
" \n", | |
" qq2 = complex(q[j] * q[j] / 4, 0)\n", | |
" \n", | |
" # now calculate reflectivities and wavevectors\n", | |
" kn = complex(q[j] / 2.0, 0)\n", | |
" \n", | |
" for ii in range(nlayers + 1):\n", | |
" kn_next = np.sqrt(qq2 - SLD[ii + 1])\n", | |
" \n", | |
" # reflectance of the interface\n", | |
" rj = (kn - kn_next)/(kn + kn_next) * np.exp(kn * kn_next * rough_sqr[ii])\n", | |
" beta = np.exp(kn * thickness[ii - 1])\n", | |
" \n", | |
" M00 = beta if i > 0 else 1.0\n", | |
" M11 = 1 / beta if i > 0 else 1.0\n", | |
" M10 = rj * M00\n", | |
" M01 = rj * M11\n", | |
"\n", | |
" # Multiply existing layers B by new layer M\n", | |
" # We have unrolled the matrix multiply for speed.\n", | |
" C1 = B00*M00 + B10*M01\n", | |
" C2 = B00*M10 + B10*M11\n", | |
" B00 = C1\n", | |
" B10 = C2\n", | |
" C1 = B01*M00 + B11*M01\n", | |
" C2 = B01*M10 + B11*M11\n", | |
" B01 = C1\n", | |
" B11 = C2\n", | |
"\n", | |
" kn = kn_next\n", | |
" \n", | |
" r = B01 / B00\n", | |
" r = r * np.conj(r)\n", | |
" R[j] = np.real(r)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "2288e6a2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"174 ms ± 1.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit refl(q, microw, R)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "09772dc1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment