Last active
May 29, 2022 18:46
-
-
Save tillahoffmann/af4af7186d2522ebe9820a9be39bcda6 to your computer and use it in GitHub Desktop.
Example for STL container exposed via `__iter__` (see https://stackoverflow.com/questions/72426351/fast-iteration-over-stl-container-in-cython).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "98af1ff2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%reload_ext cython" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "7f67d4d3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%%cython\n", | |
"\n", | |
"# Add -a for python interaction hints.\n", | |
"# distutils: language = c++\n", | |
"\n", | |
"from cython.operator cimport dereference, preincrement\n", | |
"from libcpp.unordered_map cimport unordered_map\n", | |
"from libcpp.unordered_set cimport unordered_set\n", | |
"\n", | |
"ctypedef unordered_map[int, unordered_set[int]] container_t\n", | |
"\n", | |
"\n", | |
"cdef class Base:\n", | |
" \"\"\"\n", | |
" Base class holding the data.\n", | |
" \"\"\"\n", | |
" cdef container_t container\n", | |
"\n", | |
" def __init__(self, int n):\n", | |
" # Populate the container ([] creates an entry).\n", | |
" for i in range(n):\n", | |
" self.container[i]\n", | |
" \n", | |
" def iterate(self):\n", | |
" \"\"\"\n", | |
" Iterate in C++ without python interaction as a baseline.\n", | |
" \"\"\"\n", | |
" it = self.container.begin()\n", | |
" while it != self.container.end():\n", | |
" preincrement(it)\n", | |
" \n", | |
" \n", | |
"cdef class A(Base):\n", | |
" \"\"\"\n", | |
" Simple implementation using `yield`.\n", | |
" \"\"\"\n", | |
" def __iter__(self):\n", | |
" it = self.container.begin()\n", | |
" while it != self.container.end():\n", | |
" yield dereference(it).first\n", | |
" preincrement(it)\n", | |
" \n", | |
" \n", | |
"cdef class Iterator:\n", | |
" \"\"\"\n", | |
" Dedicated iterator.\n", | |
" \"\"\"\n", | |
" cdef container_t* ptr\n", | |
" # Using container_t.iterator doesn't seem to work.\n", | |
" cdef unordered_map[int, unordered_set[int]].iterator it\n", | |
" \n", | |
" def __next__(self):\n", | |
" if self.it == dereference(self.ptr).end():\n", | |
" raise StopIteration\n", | |
" value = dereference(self.it).first\n", | |
" preincrement(self.it)\n", | |
" return value\n", | |
" \n", | |
" # Can't use __init__ because we use a pointer argument.\n", | |
" @staticmethod\n", | |
" cdef create(container_t* ptr):\n", | |
" self = Iterator()\n", | |
" self.ptr = ptr\n", | |
" self.it = dereference(ptr).begin()\n", | |
" return self\n", | |
" \n", | |
" \n", | |
"cdef class B(Base):\n", | |
" \"\"\"\n", | |
" Implementation using a dedicated iterator.\n", | |
" \"\"\"\n", | |
" def __iter__(self):\n", | |
" return Iterator.create(&self.container)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "73be0b45", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Number of elements in the container.\n", | |
"n = 1_000_000" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "720c2025", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"5.94 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = {i: set() for i in range(n)}\n", | |
"# Baseline python implementation.\n", | |
"list(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "b4159b21", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"20.3 ms ± 391 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = A(n)\n", | |
"list(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "15515c8a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"15.2 ms ± 138 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = B(n)\n", | |
"list(x)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "7e545a01", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.7 ms ± 13.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = Base(n)\n", | |
"x.iterate()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "d8fc3852", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# And just a sanity check that these do all yield the same result.\n", | |
"import itertools as it\n", | |
"\n", | |
"\n", | |
"containers = [A(n), B(n), {i: set() for i in range(n)}]\n", | |
"for x, y in it.combinations(containers, 2):\n", | |
" # The ordering is not necessarily the same; sort first.\n", | |
" assert list(sorted(x)) == list(sorted(y))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment