-
-
Save sklam/17cc21d93c17a2fec11f05d0aacce5a1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "54f89b27", | |
"metadata": {}, | |
"source": [ | |
"Checking `unordered_map` performance\n", | |
"\n", | |
"Uses CPPYY for C++ auto-binding and JIT: https://cppyy.readthedocs.io/en/latest/repositories.html\n", | |
"\n", | |
"Reference: https://numba.discourse.group/t/speed-tips-for-mapping-all-values-of-an-array/717" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6644a91e", | |
"metadata": {}, | |
"source": [ | |
"Case 1: cppyy cling JIT version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "d002a974", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import cppyy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "9509c3a0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cppyy.cppdef(\"\"\"\n", | |
"#include <unordered_map>\n", | |
"\n", | |
"void map_array(const size_t N, const size_t inarr[], size_t outarr[], \n", | |
" const size_t inval[], const size_t outval[]) {\n", | |
" std::unordered_map<size_t, size_t> lut;\n", | |
"\n", | |
" for (size_t i=0; i<N; ++i){\n", | |
" lut[inval[i]] = outval[i];\n", | |
" }\n", | |
"\n", | |
"\n", | |
" for (size_t i=0; i<N; ++i){\n", | |
" outarr[i] = lut[inarr[i]];\n", | |
" }\n", | |
"\n", | |
"}\n", | |
"\n", | |
"\n", | |
"size_t addr_map_array() {\n", | |
" return (size_t)&map_array;\n", | |
"}\n", | |
"\n", | |
"\"\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "af98d86b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from cppyy.gbl import addr_map_array, map_array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "dd4827d2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<cppyy.CPPOverload at 0x7f5130c170c0>" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"addr_map_array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "47292839", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "1b92a493", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = 10000\n", | |
"inarr = np.arange(N)\n", | |
"outarr = np.zeros_like(inarr)\n", | |
"inval = inarr.copy()\n", | |
"outval = inarr.copy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "17ba4ef4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"map_array(N, inarr.data, outarr.data, inval.data, outval.data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "c292f9b5", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([ 0, 1, 2, ..., 9997, 9998, 9999])" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"outarr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "9e80f679", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"915 µs ± 22.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit map_array(N, inarr.data, outarr.data, inval.data, outval.data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b3aaed5c", | |
"metadata": {}, | |
"source": [ | |
"Case 2: AOT C++ compiled by GCC and auto-binding with CPPYY" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "5bfef659", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Overwriting libmaparray.h\n" | |
] | |
} | |
], | |
"source": [ | |
"%%writefile libmaparray.h\n", | |
"#include <cstddef>\n", | |
"void aot_map_array(const size_t N, const size_t inarr[], size_t outarr[],\n", | |
" const size_t inval[], const size_t outval[]);\n", | |
"size_t addr_aot_map_array();" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "1db34ef2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Overwriting libmaparray.cpp\n" | |
] | |
} | |
], | |
"source": [ | |
"%%writefile libmaparray.cpp\n", | |
"// clang++ -shared -O3 -o libmaparray.so libmaparray.cpp\n", | |
"\n", | |
"#include \"libmaparray.h\"\n", | |
"#include <unordered_map>\n", | |
"\n", | |
"void aot_map_array(const size_t N, const size_t inarr[], size_t outarr[],\n", | |
" const size_t inval[], const size_t outval[]) {\n", | |
" std::unordered_map<size_t, size_t> lut;\n", | |
"\n", | |
" for (size_t i=0; i<N; ++i){\n", | |
" lut[inval[i]] = outval[i];\n", | |
" }\n", | |
"\n", | |
"\n", | |
" for (size_t i=0; i<N; ++i){\n", | |
" outarr[i] = lut[inarr[i]];\n", | |
" }\n", | |
"\n", | |
"}\n", | |
"\n", | |
"\n", | |
"size_t addr_aot_map_array() {\n", | |
" return (size_t)&aot_map_array;\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "5d0187b5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!g++ -fPIC -std=c++11 -O3 -shared -o libmaparray.so libmaparray.cpp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "221b543c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cppyy.include('libmaparray.h')\n", | |
"cppyy.load_library('libmaparray')\n", | |
"from cppyy.gbl import aot_map_array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "9999e2db", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"894 µs ± 8.66 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit aot_map_array(N, inarr.data, outarr.data, inval.data, outval.data)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "4a2125f0", | |
"metadata": {}, | |
"source": [ | |
"Case 3: Numba calls CPPYY JITed C++ function" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "1c64be40", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"139986868609088" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"addr = addr_map_array()\n", | |
"addr" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "4f14a7c4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Ctypes binding for \n", | |
"from ctypes import *\n", | |
"cfn_map_array = CFUNCTYPE(None, c_size_t, c_void_p, c_void_p, c_void_p, c_void_p)(addr)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "84d22821", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numba" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "b6116465", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@numba.njit\n", | |
"def cpp_map_array(inarr, outarr, inval, outval):\n", | |
" cfn_map_array(inarr.size, inarr.ctypes.data, outarr.ctypes.data, inval.ctypes.data, outval.ctypes.data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "71bd1fd3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cpp_map_array(inarr, outarr, inval, outval) # warm up" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "2884b42d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"870 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit cpp_map_array(inarr, outarr, inval, outval)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "05096a9f", | |
"metadata": {}, | |
"source": [ | |
"Case 4: Numba typed.Dict version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "f9c5c029", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"@numba.njit\n", | |
"def nb_map_array(inarr, outarr, inval, outval):\n", | |
" lut = {}\n", | |
" for i in range(len(inval)):\n", | |
" lut[inval[i]] = outval[i]\n", | |
" for i in range(len(inarr)):\n", | |
" outarr[i] = lut[inarr[i]]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "48c37997", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nb_map_array(inarr, outarr, inval, outval) # warm up" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "2a9e2ef7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"455 µs ± 3.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit nb_map_array(inarr, outarr, inval, outval)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "222512bb", | |
"metadata": {}, | |
"source": [ | |
"Benchmark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"id": "db98050e", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"def benchmark(N):\n", | |
" print(f\"N = {N}\")\n", | |
" inarr = np.random.randint(0, 7, size=N)\n", | |
" inval = np.arange(N)\n", | |
" outval = np.random.randint(0, 7, size=N)\n", | |
"\n", | |
" print(\"CPPYY-cling\".center(80, '-'))\n", | |
" outarr1 = np.zeros_like(inarr)\n", | |
" %timeit map_array(N, inarr.data, outarr1.data, inval.data, outval.data)\n", | |
" \n", | |
" print(\"C++ AOT\".center(80, '-'))\n", | |
" outarr2 = np.zeros_like(inarr)\n", | |
" %timeit aot_map_array(N, inarr.data, outarr2.data, inval.data, outval.data)\n", | |
"\n", | |
" print(\"Numba call cppyy-cling\".center(80, '-'))\n", | |
" outarr3 = np.zeros_like(inarr)\n", | |
" %timeit cpp_map_array(inarr, outarr3, inval, outval)\n", | |
" \n", | |
" print(\"Numba typed.Dict\".center(80, '-'))\n", | |
" outarr4 = np.zeros_like(inarr)\n", | |
" %timeit nb_map_array(inarr, outarr4, inval, outval)\n", | |
" # verify output\n", | |
" np.testing.assert_equal(outarr1, outarr2)\n", | |
" np.testing.assert_equal(outarr1, outarr3)\n", | |
" np.testing.assert_equal(outarr1, outarr4)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"id": "55f937c6", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"N = 500\n", | |
"----------------------------------CPPYY-cling-----------------------------------\n", | |
"48.3 µs ± 19.1 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", | |
"------------------------------------C++ AOT-------------------------------------\n", | |
"49.2 µs ± 202 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", | |
"-----------------------------Numba call cppyy-cling-----------------------------\n", | |
"45.7 µs ± 19.9 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", | |
"--------------------------------Numba typed.Dict--------------------------------\n", | |
"24.9 µs ± 279 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"benchmark(500)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"id": "9b7f207e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"N = 10000\n", | |
"----------------------------------CPPYY-cling-----------------------------------\n", | |
"870 µs ± 2.82 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", | |
"------------------------------------C++ AOT-------------------------------------\n", | |
"898 µs ± 9.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", | |
"-----------------------------Numba call cppyy-cling-----------------------------\n", | |
"866 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", | |
"--------------------------------Numba typed.Dict--------------------------------\n", | |
"468 µs ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"benchmark(10000)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"id": "63f5d093", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"N = 500000\n", | |
"----------------------------------CPPYY-cling-----------------------------------\n", | |
"45 ms ± 61.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", | |
"------------------------------------C++ AOT-------------------------------------\n", | |
"46.4 ms ± 60 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", | |
"-----------------------------Numba call cppyy-cling-----------------------------\n", | |
"45.5 ms ± 1.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", | |
"--------------------------------Numba typed.Dict--------------------------------\n", | |
"32.3 ms ± 74.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"benchmark(500000)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"id": "eb86f084", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"N = 4194304\n", | |
"----------------------------------CPPYY-cling-----------------------------------\n", | |
"361 ms ± 554 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"------------------------------------C++ AOT-------------------------------------\n", | |
"374 ms ± 8.16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"-----------------------------Numba call cppyy-cling-----------------------------\n", | |
"361 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", | |
"--------------------------------Numba typed.Dict--------------------------------\n", | |
"281 ms ± 1.76 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"benchmark(2048 * 2048)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b73eb145", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The conda environment: