Skip to content

Instantly share code, notes, and snippets.

@sklam
Created May 14, 2021 16:37
Show Gist options
  • Save sklam/17cc21d93c17a2fec11f05d0aacce5a1 to your computer and use it in GitHub Desktop.
Save sklam/17cc21d93c17a2fec11f05d0aacce5a1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "54f89b27",
"metadata": {},
"source": [
"Checking `unordered_map` performance\n",
"\n",
"Uses CPPYY for C++ auto-binding and JIT: https://cppyy.readthedocs.io/en/latest/repositories.html\n",
"\n",
"Reference: https://numba.discourse.group/t/speed-tips-for-mapping-all-values-of-an-array/717"
]
},
{
"cell_type": "markdown",
"id": "6644a91e",
"metadata": {},
"source": [
"Case 1: cppyy cling JIT version"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d002a974",
"metadata": {},
"outputs": [],
"source": [
"import cppyy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9509c3a0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cppyy.cppdef(\"\"\"\n",
"#include <unordered_map>\n",
"\n",
"void map_array(const size_t N, const size_t inarr[], size_t outarr[], \n",
" const size_t inval[], const size_t outval[]) {\n",
" std::unordered_map<size_t, size_t> lut;\n",
"\n",
" for (size_t i=0; i<N; ++i){\n",
" lut[inval[i]] = outval[i];\n",
" }\n",
"\n",
"\n",
" for (size_t i=0; i<N; ++i){\n",
" outarr[i] = lut[inarr[i]];\n",
" }\n",
"\n",
"}\n",
"\n",
"\n",
"size_t addr_map_array() {\n",
" return (size_t)&map_array;\n",
"}\n",
"\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "af98d86b",
"metadata": {},
"outputs": [],
"source": [
"from cppyy.gbl import addr_map_array, map_array"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dd4827d2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<cppyy.CPPOverload at 0x7f5130c170c0>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"addr_map_array"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "47292839",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1b92a493",
"metadata": {},
"outputs": [],
"source": [
"N = 10000\n",
"inarr = np.arange(N)\n",
"outarr = np.zeros_like(inarr)\n",
"inval = inarr.copy()\n",
"outval = inarr.copy()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "17ba4ef4",
"metadata": {},
"outputs": [],
"source": [
"map_array(N, inarr.data, outarr.data, inval.data, outval.data)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c292f9b5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, ..., 9997, 9998, 9999])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outarr"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9e80f679",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"915 µs ± 22.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit map_array(N, inarr.data, outarr.data, inval.data, outval.data)"
]
},
{
"cell_type": "markdown",
"id": "b3aaed5c",
"metadata": {},
"source": [
"Case 2: AOT C++ compiled by GCC and auto-binding with CPPYY"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5bfef659",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting libmaparray.h\n"
]
}
],
"source": [
"%%writefile libmaparray.h\n",
"#include <cstddef>\n",
"void aot_map_array(const size_t N, const size_t inarr[], size_t outarr[],\n",
" const size_t inval[], const size_t outval[]);\n",
"size_t addr_aot_map_array();"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "1db34ef2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting libmaparray.cpp\n"
]
}
],
"source": [
"%%writefile libmaparray.cpp\n",
"// clang++ -shared -O3 -o libmaparray.so libmaparray.cpp\n",
"\n",
"#include \"libmaparray.h\"\n",
"#include <unordered_map>\n",
"\n",
"void aot_map_array(const size_t N, const size_t inarr[], size_t outarr[],\n",
" const size_t inval[], const size_t outval[]) {\n",
" std::unordered_map<size_t, size_t> lut;\n",
"\n",
" for (size_t i=0; i<N; ++i){\n",
" lut[inval[i]] = outval[i];\n",
" }\n",
"\n",
"\n",
" for (size_t i=0; i<N; ++i){\n",
" outarr[i] = lut[inarr[i]];\n",
" }\n",
"\n",
"}\n",
"\n",
"\n",
"size_t addr_aot_map_array() {\n",
" return (size_t)&aot_map_array;\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5d0187b5",
"metadata": {},
"outputs": [],
"source": [
"!g++ -fPIC -std=c++11 -O3 -shared -o libmaparray.so libmaparray.cpp"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "221b543c",
"metadata": {},
"outputs": [],
"source": [
"cppyy.include('libmaparray.h')\n",
"cppyy.load_library('libmaparray')\n",
"from cppyy.gbl import aot_map_array"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "9999e2db",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"894 µs ± 8.66 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit aot_map_array(N, inarr.data, outarr.data, inval.data, outval.data)"
]
},
{
"cell_type": "markdown",
"id": "4a2125f0",
"metadata": {},
"source": [
"Case 3: Numba calls CPPYY JITed C++ function"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "1c64be40",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"139986868609088"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"addr = addr_map_array()\n",
"addr"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "4f14a7c4",
"metadata": {},
"outputs": [],
"source": [
"# Ctypes binding for \n",
"from ctypes import *\n",
"cfn_map_array = CFUNCTYPE(None, c_size_t, c_void_p, c_void_p, c_void_p, c_void_p)(addr)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "84d22821",
"metadata": {},
"outputs": [],
"source": [
"import numba"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b6116465",
"metadata": {},
"outputs": [],
"source": [
"@numba.njit\n",
"def cpp_map_array(inarr, outarr, inval, outval):\n",
" cfn_map_array(inarr.size, inarr.ctypes.data, outarr.ctypes.data, inval.ctypes.data, outval.ctypes.data)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "71bd1fd3",
"metadata": {},
"outputs": [],
"source": [
"cpp_map_array(inarr, outarr, inval, outval) # warm up"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2884b42d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"870 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit cpp_map_array(inarr, outarr, inval, outval)"
]
},
{
"cell_type": "markdown",
"id": "05096a9f",
"metadata": {},
"source": [
"Case 4: Numba typed.Dict version"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f9c5c029",
"metadata": {},
"outputs": [],
"source": [
"\n",
"@numba.njit\n",
"def nb_map_array(inarr, outarr, inval, outval):\n",
" lut = {}\n",
" for i in range(len(inval)):\n",
" lut[inval[i]] = outval[i]\n",
" for i in range(len(inarr)):\n",
" outarr[i] = lut[inarr[i]]\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "48c37997",
"metadata": {},
"outputs": [],
"source": [
"nb_map_array(inarr, outarr, inval, outval) # warm up"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "2a9e2ef7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"455 µs ± 3.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"%timeit nb_map_array(inarr, outarr, inval, outval)"
]
},
{
"cell_type": "markdown",
"id": "222512bb",
"metadata": {},
"source": [
"Benchmark"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "db98050e",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def benchmark(N):\n",
" print(f\"N = {N}\")\n",
" inarr = np.random.randint(0, 7, size=N)\n",
" inval = np.arange(N)\n",
" outval = np.random.randint(0, 7, size=N)\n",
"\n",
" print(\"CPPYY-cling\".center(80, '-'))\n",
" outarr1 = np.zeros_like(inarr)\n",
" %timeit map_array(N, inarr.data, outarr1.data, inval.data, outval.data)\n",
" \n",
" print(\"C++ AOT\".center(80, '-'))\n",
" outarr2 = np.zeros_like(inarr)\n",
" %timeit aot_map_array(N, inarr.data, outarr2.data, inval.data, outval.data)\n",
"\n",
" print(\"Numba call cppyy-cling\".center(80, '-'))\n",
" outarr3 = np.zeros_like(inarr)\n",
" %timeit cpp_map_array(inarr, outarr3, inval, outval)\n",
" \n",
" print(\"Numba typed.Dict\".center(80, '-'))\n",
" outarr4 = np.zeros_like(inarr)\n",
" %timeit nb_map_array(inarr, outarr4, inval, outval)\n",
" # verify output\n",
" np.testing.assert_equal(outarr1, outarr2)\n",
" np.testing.assert_equal(outarr1, outarr3)\n",
" np.testing.assert_equal(outarr1, outarr4)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "55f937c6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N = 500\n",
"----------------------------------CPPYY-cling-----------------------------------\n",
"48.3 µs ± 19.1 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"------------------------------------C++ AOT-------------------------------------\n",
"49.2 µs ± 202 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"-----------------------------Numba call cppyy-cling-----------------------------\n",
"45.7 µs ± 19.9 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
"--------------------------------Numba typed.Dict--------------------------------\n",
"24.9 µs ± 279 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
"source": [
"benchmark(500)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "9b7f207e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N = 10000\n",
"----------------------------------CPPYY-cling-----------------------------------\n",
"870 µs ± 2.82 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"------------------------------------C++ AOT-------------------------------------\n",
"898 µs ± 9.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"-----------------------------Numba call cppyy-cling-----------------------------\n",
"866 µs ± 1.13 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
"--------------------------------Numba typed.Dict--------------------------------\n",
"468 µs ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
"source": [
"benchmark(10000)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "63f5d093",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N = 500000\n",
"----------------------------------CPPYY-cling-----------------------------------\n",
"45 ms ± 61.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"------------------------------------C++ AOT-------------------------------------\n",
"46.4 ms ± 60 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"-----------------------------Numba call cppyy-cling-----------------------------\n",
"45.5 ms ± 1.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n",
"--------------------------------Numba typed.Dict--------------------------------\n",
"32.3 ms ± 74.5 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"benchmark(500000)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "eb86f084",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"N = 4194304\n",
"----------------------------------CPPYY-cling-----------------------------------\n",
"361 ms ± 554 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"------------------------------------C++ AOT-------------------------------------\n",
"374 ms ± 8.16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"-----------------------------Numba call cppyy-cling-----------------------------\n",
"361 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
"--------------------------------Numba typed.Dict--------------------------------\n",
"281 ms ± 1.76 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"benchmark(2048 * 2048)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b73eb145",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@sklam
Copy link
Author

sklam commented May 14, 2021

The conda environment:

conda create -n numba_cling conda-forge::cppyy python=3.8 ipython notebook numpy numba

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment