Skip to content

Instantly share code, notes, and snippets.

@andyfaff
Created March 9, 2023 22:17
Show Gist options
  • Save andyfaff/798dc630efebda33419e64fece4e6d58 to your computer and use it in GitHub Desktop.
Save andyfaff/798dc630efebda33419e64fece4e6d58 to your computer and use it in GitHub Desktop.
Numba reflectivity kernel investigation
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "dc9bf6df",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import numba\n",
"from numba import guvectorize, prange\n",
"import matplotlib.pyplot as plt\n",
"from refnx.reflect import available_backends, use_reflect_backend, abeles, Structure, SLD\n",
"from refl1d.reflectivity import reflectivity"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9aade67e",
"metadata": {},
"outputs": [],
"source": [
"@guvectorize([\"void(float64[:], float64[:,:], float64[:])\"],\n",
" \"(m),(n,p)->(m)\", cache=True)\n",
"def f(q, layers, R):\n",
" nlayers = layers.shape[0] - 2\n",
" npnts = q.size\n",
" \n",
" kn = np.zeros((npnts, nlayers + 2), np.complex128)\n",
" mi00 = np.ones((npnts, nlayers + 1), np.complex128)\n",
"\n",
" sld = np.zeros(nlayers + 2, np.complex128)\n",
" TINY = 1e-30\n",
" # addition of TINY is to ensure the correct branch cut\n",
" # in the complex sqrt calculation of kn.\n",
" sld[1:] += (\n",
" (layers[1:, 1] - layers[0, 1]) + 1j * (np.abs(layers[1:, 2]) + TINY)\n",
" ) * 1.0e-6\n",
"\n",
" # kn is a 2D array. Rows are Q points, columns are kn in a layer.\n",
" # calculate wavevector in each layer, for each Q point.\n",
" q = np.ascontiguousarray(q)\n",
" tempq = np.reshape(q, (q.shape[0], 1))\n",
" kn[:] = np.sqrt(tempq ** 2.0 / 4.0 - 4.0 * np.pi * sld)\n",
"\n",
" # reflectances for each layer\n",
" # rj.shape = (npnts, nlayers + 1)\n",
" rj = kn[:, :-1] - kn[:, 1:]\n",
" rj /= kn[:, :-1] + kn[:, 1:]\n",
" rj *= np.exp(-2.0 * kn[:, :-1] * kn[:, 1:] * layers[1:, 3] ** 2)\n",
"\n",
" # characteristic matrices for each layer\n",
" # miNN.shape = (npnts, nlayers + 1)\n",
" if nlayers:\n",
" mi00[:, 1:] = np.exp(kn[:, 1:-1] * 1j * np.fabs(layers[1:-1, 0]))\n",
" mi11 = 1.0 / mi00\n",
" mi10 = rj * mi00\n",
" mi01 = rj * mi11\n",
"\n",
" # initialise matrix total\n",
" mrtot00 = mi00[:, 0]\n",
" mrtot01 = mi01[:, 0]\n",
" mrtot10 = mi10[:, 0]\n",
" mrtot11 = mi11[:, 0]\n",
"\n",
" # propagate characteristic matrices\n",
" for idx in range(1, nlayers + 1):\n",
" # matrix multiply mrtot by characteristic matrix\n",
" p0 = mrtot00 * mi00[:, idx] + mrtot10 * mi01[:, idx]\n",
" p1 = mrtot00 * mi10[:, idx] + mrtot10 * mi11[:, idx]\n",
" mrtot00 = p0\n",
" mrtot10 = p1\n",
"\n",
" p0 = mrtot01 * mi00[:, idx] + mrtot11 * mi01[:, idx]\n",
" p1 = mrtot01 * mi10[:, idx] + mrtot11 * mi11[:, idx]\n",
"\n",
" mrtot01 = p0\n",
" mrtot11 = p1\n",
"\n",
" r = mrtot01 / mrtot00\n",
" reflectivity = r * np.conj(r)\n",
"# reflectivity *= scale\n",
"# reflectivity += bkg\n",
"# return np.real(np.reshape(reflectivity, qvals.shape))\n",
" R[:] = np.real(reflectivity)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "623c9a76",
"metadata": {},
"outputs": [],
"source": [
"q = np.linspace(0.01, 0.5, 3000)\n",
"\n",
"si = SLD(2.07)\n",
"sio2 = SLD(3.47)\n",
"layer = SLD(-0.5+1e-5j)\n",
"d2o = SLD(6.36)\n",
"\n",
"structure = si | sio2(100, 3) | layer(500, 3) | d2o(0, 3)\n",
"\n",
"w = structure.slabs()[:, :-1]\n",
"microw = structure._micro_slabs()[:, :-1]\n",
"\n",
"R = np.empty_like(q)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "89f5ceda",
"metadata": {},
"outputs": [],
"source": [
"np.testing.assert_allclose(f(q, w, R), abeles(q, w))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f9bf976a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"270 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"with use_reflect_backend('c_parratt') as g:\n",
" %timeit g(q, w, threads=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "783129de",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"388 µs ± 427 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"%timeit f(q, w, R)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3b05aa1e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"280 ms ± 2.73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit f(q, microw, R)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7ea04022",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"96.6 ms ± 180 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"with use_reflect_backend('c_parratt') as g:\n",
" %timeit g(q, microw, threads=1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3e187ce3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"201 ms ± 1.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"kz = np.ascontiguousarray(q / 2.0)\n",
"rho_index = np.zeros_like(kz, int)\n",
"depth = np.ascontiguousarray(microw[:, 0])\n",
"rho = np.ascontiguousarray(microw[:, 1])\n",
"irho = np.ascontiguousarray(microw[:, 2])\n",
"sigma = np.ascontiguousarray(microw[1:, 3])\n",
"\n",
"# measure timings for refl1d.reflectivity.reflectivity\n",
"%timeit reflectivity(kz, depth, rho, irho=irho, sigma=sigma)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "79c5b1b2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"547 µs ± 2.12 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"kz = np.ascontiguousarray(q / 2.0)\n",
"depth = np.ascontiguousarray(w[:, 0])\n",
"rho = np.ascontiguousarray(w[:, 1])\n",
"irho = np.ascontiguousarray(w[:, 2])\n",
"sigma = np.ascontiguousarray(w[1:, 3])\n",
"\n",
"# measure timings for refl1d.reflectivity.reflectivity\n",
"%timeit reflectivity(kz, depth, rho, irho=irho, sigma=sigma)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "0a4e4a86",
"metadata": {},
"outputs": [],
"source": [
"_REFL_SIG = 'void(f8[:], f8[:, :], f8[:])'\n",
"_REFL_LOCALS = {\n",
" \"nlayers\": numba.int64,\n",
" \"npoints\": numba.int64,\n",
" \"TINY\": numba.float64,\n",
" \"sub\": numba.complex128,\n",
" \"superf\": numba.complex128,\n",
" \"_t\": numba.complex128,\n",
" \"kn\": numba.complex128,\n",
" \"kn_next\": numba.complex128,\n",
" \"qq2\": numba.complex128,\n",
" \"rj\": numba.complex128,\n",
" \"beta\": numba.complex128,\n",
" \"r\": numba.complex128,\n",
"}\n",
"\n",
"_REFL_LOCALS.update((\"B{i}{j}\".format(i=i, j=j), numba.complex128)\n",
" for i in range(0, 2) for j in range(0, 2))\n",
"_REFL_LOCALS.update((\"M{i}{j}\".format(i=i, j=j), numba.complex128)\n",
" for i in range(0, 2) for j in range(0, 2))\n",
"_REFL_LOCALS.update((\"C{i}\".format(i=i), numba.complex128)\n",
" for i in range(0, 2))\n",
"\n",
"\n",
"\n",
"@numba.njit(_REFL_SIG, parallel=False, cache=True, locals=_REFL_LOCALS)\n",
"def refl(q, w, R):\n",
" nlayers = w.shape[0] - 2\n",
" npoints = q.shape[0]\n",
" TINY = 1.0e-11\n",
" \n",
" SLD = np.zeros((nlayers + 2), dtype=np.complex128)\n",
" thickness = np.zeros((nlayers), dtype=np.complex128)\n",
" rough_sqr = np.zeros((nlayers + 1), dtype=float)\n",
" \n",
" sub = complex(w[-1, 1], np.fabs(w[-1, 2]) + TINY);\n",
" superf = complex(w[0, 1])\n",
" \n",
" oneC = complex(1.0, 0)\n",
"\n",
" for i in range(1, nlayers + 1):\n",
" _t = complex(w[i, 1], np.fabs(w[i, 2]) + TINY)\n",
" SLD[i] = 4e-6 * np.pi * (_t - superf)\n",
" thickness[i - 1] = complex(0, np.fabs(w[i, 0]))\n",
" rough_sqr[i - 1] = -2 * w[i, 3]**2\n",
" \n",
" SLD[0] = complex(0, 0)\n",
" SLD[nlayers + 1] = 4e-6 * np.pi * (sub - superf)\n",
" rough_sqr[nlayers] = -2 * w[-1, 3]**2\n",
"\n",
" for j in range(npoints):\n",
" B00 = B11 = 1.0\n",
" B01 = B10 = 0\n",
" \n",
" qq2 = complex(q[j] * q[j] / 4, 0)\n",
" \n",
" # now calculate reflectivities and wavevectors\n",
" kn = complex(q[j] / 2.0, 0)\n",
" \n",
" for ii in range(nlayers + 1):\n",
" kn_next = np.sqrt(qq2 - SLD[ii + 1])\n",
" \n",
" # reflectance of the interface\n",
" rj = (kn - kn_next)/(kn + kn_next) * np.exp(kn * kn_next * rough_sqr[ii])\n",
" beta = np.exp(kn * thickness[ii - 1])\n",
" \n",
" M00 = beta if i > 0 else 1.0\n",
" M11 = 1 / beta if i > 0 else 1.0\n",
" M10 = rj * M00\n",
" M01 = rj * M11\n",
"\n",
" # Multiply existing layers B by new layer M\n",
" # We have unrolled the matrix multiply for speed.\n",
" C1 = B00*M00 + B10*M01\n",
" C2 = B00*M10 + B10*M11\n",
" B00 = C1\n",
" B10 = C2\n",
" C1 = B01*M00 + B11*M01\n",
" C2 = B01*M10 + B11*M11\n",
" B01 = C1\n",
" B11 = C2\n",
"\n",
" kn = kn_next\n",
" \n",
" r = B01 / B00\n",
" r = r * np.conj(r)\n",
" R[j] = np.real(r)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "2288e6a2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"174 ms ± 1.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit refl(q, microw, R)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09772dc1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment