Skip to content

Instantly share code, notes, and snippets.

@andyfaff
Last active March 7, 2023 23:59
Show Gist options
  • Save andyfaff/efa84c2a88bbb837ff6dc5acdb84dea4 to your computer and use it in GitHub Desktop.
Save andyfaff/efa84c2a88bbb837ff6dc5acdb84dea4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "ff2c5513",
"metadata": {},
"source": [
"# Comparison of kernel execution times\n",
"\n",
"This notebook compares kernel execution times for the core reflectivity calculation for various refl1d versions, and those offered by the various backends in refnx."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "a87afb2e",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import platform\n",
"import itertools\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import numba\n",
"\n",
"from refl1d.reflectivity import reflectivity\n",
"\n",
"import refnx\n",
"from refnx.reflect import available_backends, use_reflect_backend, abeles, Structure, SLD\n",
"\n",
"q = np.linspace(0.01, 0.5, 3000)\n",
"\n",
"si = SLD(2.07)\n",
"sio2 = SLD(3.47)\n",
"layer = SLD(-0.5+1e-5j)\n",
"d2o = SLD(6.36)\n",
"\n",
"structure = si | sio2(100, 3) | layer(500, 3) | d2o(0, 3)\n",
"\n",
"slabs = structure.slabs()[:, :-1]\n",
"microslabs = structure._micro_slabs()[:, :-1]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a9265590",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'python_version': '3.8.16.final.0 (64 bit)',\n",
" 'cpuinfo_version': [9, 0, 0],\n",
" 'cpuinfo_version_string': '9.0.0',\n",
" 'arch': 'X86_64',\n",
" 'bits': 64,\n",
" 'count': 8,\n",
" 'arch_string_raw': 'x86_64',\n",
" 'vendor_id_raw': 'GenuineIntel',\n",
" 'brand_raw': 'Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz',\n",
" 'hz_advertised_friendly': '2.8000 GHz',\n",
" 'hz_actual_friendly': '2.8000 GHz',\n",
" 'hz_advertised': [2800000000, 0],\n",
" 'hz_actual': [2800000000, 0],\n",
" 'l2_cache_size': 262144,\n",
" 'stepping': 9,\n",
" 'model': 158,\n",
" 'family': 6,\n",
" 'flags': ['1gbpage',\n",
" '3dnowprefetch',\n",
" 'abm',\n",
" 'acapmsr',\n",
" 'acpi',\n",
" 'adx',\n",
" 'aes',\n",
" 'apic',\n",
" 'avx',\n",
" 'avx1.0',\n",
" 'avx2',\n",
" 'bmi1',\n",
" 'bmi2',\n",
" 'clflush',\n",
" 'clflushopt',\n",
" 'clfsh',\n",
" 'clfsopt',\n",
" 'cmov',\n",
" 'cx16',\n",
" 'cx8',\n",
" 'de',\n",
" 'ds',\n",
" 'ds_cpl',\n",
" 'dscpl',\n",
" 'dtes64',\n",
" 'dts',\n",
" 'em64t',\n",
" 'erms',\n",
" 'est',\n",
" 'f16c',\n",
" 'fma',\n",
" 'fpu',\n",
" 'fpu_csds',\n",
" 'fxsr',\n",
" 'ht',\n",
" 'htt',\n",
" 'ibrs',\n",
" 'intel_pt',\n",
" 'invpcid',\n",
" 'ipt',\n",
" 'l1df',\n",
" 'lahf',\n",
" 'lahf_lm',\n",
" 'lzcnt',\n",
" 'mca',\n",
" 'mce',\n",
" 'mdclear',\n",
" 'mmx',\n",
" 'mon',\n",
" 'monitor',\n",
" 'movbe',\n",
" 'mpx',\n",
" 'msr',\n",
" 'mtrr',\n",
" 'osxsave',\n",
" 'pae',\n",
" 'pat',\n",
" 'pbe',\n",
" 'pcid',\n",
" 'pclmulqdq',\n",
" 'pdcm',\n",
" 'pge',\n",
" 'pni',\n",
" 'popcnt',\n",
" 'prefetchw',\n",
" 'pse',\n",
" 'pse36',\n",
" 'rdrand',\n",
" 'rdrnd',\n",
" 'rdseed',\n",
" 'rdtscp',\n",
" 'rdwrfsgs',\n",
" 'seglim64',\n",
" 'sep',\n",
" 'sgx',\n",
" 'smap',\n",
" 'smep',\n",
" 'ss',\n",
" 'ssbd',\n",
" 'sse',\n",
" 'sse2',\n",
" 'sse3',\n",
" 'sse4.1',\n",
" 'sse4.2',\n",
" 'sse4_1',\n",
" 'sse4_2',\n",
" 'ssse3',\n",
" 'stibp',\n",
" 'syscall',\n",
" 'tm',\n",
" 'tm2',\n",
" 'tpr',\n",
" 'tsc',\n",
" 'tsc_thread_offset',\n",
" 'tscdeadline',\n",
" 'tsci',\n",
" 'tsctmr',\n",
" 'tsxfa',\n",
" 'vme',\n",
" 'vmx',\n",
" 'x2apic',\n",
" 'xd',\n",
" 'xsave',\n",
" 'xtpr'],\n",
" 'l2_cache_line_size': 256,\n",
" 'l2_cache_associativity': 6}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import cpuinfo\n",
"cpuinfo.get_cpu_info()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "839a3833",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('darwin',\n",
" sys.version_info(major=3, minor=8, micro=16, releaselevel='final', serial=0))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sys.platform, sys.version_info"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "cd83619f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.1.31'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"refnx.version.version"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "19baf7cf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1.23.5'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.version.version"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ad068f90",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.56.4'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numba._version.version_version"
]
},
{
"cell_type": "markdown",
"id": "112bc096",
"metadata": {},
"source": [
"## Nevot-Croce comparison, 2 layers"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "146b56eb",
"metadata": {},
"outputs": [],
"source": [
"refnx_R = abeles(q, slabs)\n",
"R = reflectivity(\n",
" q / 2.0,\n",
" slabs[:, 0],\n",
" slabs[:, 1],\n",
" irho=slabs[:, 2],\n",
" sigma=slabs[1:, 3]\n",
")\n",
"\n",
"kz = np.ascontiguousarray(q / 2.0)\n",
"depth = np.ascontiguousarray(slabs[:, 0])\n",
"rho = np.ascontiguousarray(slabs[:, 1])\n",
"irho = np.ascontiguousarray(slabs[:, 2])\n",
"sigma = np.ascontiguousarray(slabs[1:, 3])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4de4975f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"refnx kernel timings\n",
"--------------------\n",
"\n",
"\n",
"backend = python, threads = 1\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/anz/miniconda3/envs/dev38/lib/python3.8/site-packages/refnx/reflect/reflect_model.py:233: UserWarning: Using the SLOW reflectivity calculation.\n",
" warnings.warn(\"Using the SLOW reflectivity calculation.\")\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.25 ms ± 60.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
"\n",
"backend = c, threads = -1\n",
"238 µs ± 13.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
"\n",
"backend = c, threads = 1\n",
"618 µs ± 16.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
"\n",
"backend = c_parratt, threads = -1\n",
"221 µs ± 7.82 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
"\n",
"backend = c_parratt, threads = 1\n",
"539 µs ± 11 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
"\n",
"backend = py_parratt, threads = 1\n",
"923 µs ± 14.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
"\n"
]
}
],
"source": [
"# -1 means use all the processors\n",
"threads = [-1, 1]\n",
"\n",
"print(\"refnx kernel timings\")\n",
"print(\"--------------------\")\n",
"print(\"\\n\")\n",
"\n",
"for backend, thread in itertools.product(available_backends(), threads):\n",
" with use_reflect_backend(backend) as f:\n",
" if backend in ['py_parratt', 'python'] and thread == -1:\n",
" continue\n",
" print(f\"backend = {backend}, threads = {thread}\")\n",
" %timeit f(q, slabs, threads=thread)\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0cafb647",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"928 µs ± 11 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"# measure timings for refl1d.reflectivity.reflectivity\n",
"%timeit reflectivity(kz, depth, rho, irho=irho, sigma=sigma)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cec72ae5",
"metadata": {},
"outputs": [],
"source": [
"refl1d_version = ['0.8.10', '0.8.11', '0.8.12', '0.8.14', '0.8.15']\n",
"reflectivity_time = [1.46, 1.4, 1.43, 1.5, 1.02]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "1c5f225c",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.plot(refl1d_version, reflectivity_time, label='refl1d')\n",
"plt.hlines(1.20, '0.8.10', '0.8.15', 'r', label='python')\n",
"plt.hlines(0.277, '0.8.10', '0.8.15', 'r', label='c, -1', linestyle='dashed')\n",
"plt.hlines(0.642, '0.8.10', '0.8.15', 'r', label='c, 1', linestyle='dashdot')\n",
"plt.hlines(0.995, '0.8.10', '0.8.15', 'black', label='py_parratt')\n",
"plt.hlines(0.238, '0.8.10', '0.8.15', 'black', label='c_parratt, -1', linestyle='dashed')\n",
"plt.hlines(0.552, '0.8.10', '0.8.15', 'black', label='c_parratt, 1', linestyle='dashdot')\n",
"\n",
"plt.legend()\n",
"plt.ylabel(\"time / ms\")\n",
"plt.xlabel(\"refl1d version\");"
]
},
{
"cell_type": "markdown",
"id": "d1c70d98",
"metadata": {},
"source": [
"## Microslabs comparison"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "17dca2ab",
"metadata": {},
"outputs": [],
"source": [
"kz = np.ascontiguousarray(q / 2.0)\n",
"depth = np.ascontiguousarray(microslabs[:, 0])\n",
"rho = np.ascontiguousarray(microslabs[:, 1])\n",
"irho = np.ascontiguousarray(microslabs[:, 2])\n",
"sigma = np.ascontiguousarray(microslabs[1:, 3])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "5d7953ab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1317, 4)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"microslabs.shape"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "6f523de1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"refnx kernel timings\n",
"--------------------\n",
"\n",
"\n",
"backend = python, threads = 1\n",
"691 ms ± 23.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"\n",
"backend = c, threads = -1\n",
"44.3 ms ± 685 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"\n",
"backend = c, threads = 1\n",
"235 ms ± 5.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"\n",
"backend = c_parratt, threads = -1\n",
"36.1 ms ± 105 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"\n",
"backend = c_parratt, threads = 1\n",
"162 ms ± 3.63 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"\n",
"backend = py_parratt, threads = 1\n",
"538 ms ± 10.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"\n"
]
}
],
"source": [
"# -1 means use all the processors\n",
"threads = [-1, 1]\n",
"\n",
"print(\"refnx kernel timings\")\n",
"print(\"--------------------\")\n",
"print(\"\\n\")\n",
"\n",
"for backend, thread in itertools.product(available_backends(), threads):\n",
" with use_reflect_backend(backend) as f:\n",
" if backend in ['py_parratt', 'python'] and thread == -1:\n",
" continue\n",
" print(f\"backend = {backend}, threads = {thread}\")\n",
" %timeit f(q, microslabs, threads=thread)\n",
" print(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "8c57ab9e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"341 ms ± 4.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"# measure timings for refl1d.reflectivity.reflectivity\n",
"%timeit reflectivity(kz, depth, rho, irho=irho, sigma=sigma)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "7e334080",
"metadata": {},
"outputs": [],
"source": [
"refl1d_version = ['0.8.10', '0.8.11', '0.8.12', '0.8.14', '0.8.15']\n",
"reflectivity_time = [547, 532, 553, 586, 339]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "50845a92",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.plot(refl1d_version, reflectivity_time, label='refl1d')\n",
"plt.hlines(698, '0.8.10', '0.8.15', 'r', label='python')\n",
"plt.hlines(44.1, '0.8.10', '0.8.15', 'r', label='c, -1', linestyle='dashed')\n",
"plt.hlines(239, '0.8.10', '0.8.15', 'r', label='c, 1', linestyle='dashdot')\n",
"plt.hlines(521, '0.8.10', '0.8.15', 'black', label='py_parratt')\n",
"plt.hlines(36.4, '0.8.10', '0.8.15', 'black', label='c_parratt, -1', linestyle='dashed')\n",
"plt.hlines(169, '0.8.10', '0.8.15', 'black', label='c_parratt, 1', linestyle='dashdot')\n",
"\n",
"plt.title(\"Microslab comparison\") \n",
"plt.legend()\n",
"plt.ylabel(\"time / ms\")\n",
"plt.xlabel(\"refl1d version\");"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e74dceb",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment