Skip to content

Instantly share code, notes, and snippets.

@tillahoffmann
Last active May 29, 2022 18:46
Show Gist options
  • Save tillahoffmann/af4af7186d2522ebe9820a9be39bcda6 to your computer and use it in GitHub Desktop.
Save tillahoffmann/af4af7186d2522ebe9820a9be39bcda6 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "98af1ff2",
"metadata": {},
"outputs": [],
"source": [
"%reload_ext cython"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7f67d4d3",
"metadata": {},
"outputs": [],
"source": [
"%%cython\n",
"\n",
"# Add -a for python interaction hints.\n",
"# distutils: language = c++\n",
"\n",
"from cython.operator cimport dereference, preincrement\n",
"from libcpp.unordered_map cimport unordered_map\n",
"from libcpp.unordered_set cimport unordered_set\n",
"\n",
"ctypedef unordered_map[int, unordered_set[int]] container_t\n",
"\n",
"\n",
"cdef class Base:\n",
" \"\"\"\n",
" Base class holding the data.\n",
" \"\"\"\n",
" cdef container_t container\n",
"\n",
" def __init__(self, int n):\n",
" # Populate the container ([] creates an entry).\n",
" for i in range(n):\n",
" self.container[i]\n",
" \n",
" def iterate(self):\n",
" \"\"\"\n",
" Iterate in C++ without python interaction as a baseline.\n",
" \"\"\"\n",
" it = self.container.begin()\n",
" while it != self.container.end():\n",
" preincrement(it)\n",
" \n",
" \n",
"cdef class A(Base):\n",
" \"\"\"\n",
" Simple implementation using `yield`.\n",
" \"\"\"\n",
" def __iter__(self):\n",
" it = self.container.begin()\n",
" while it != self.container.end():\n",
" yield dereference(it).first\n",
" preincrement(it)\n",
" \n",
" \n",
"cdef class Iterator:\n",
" \"\"\"\n",
" Dedicated iterator.\n",
" \"\"\"\n",
" cdef container_t* ptr\n",
" # Using container_t.iterator doesn't seem to work.\n",
" cdef unordered_map[int, unordered_set[int]].iterator it\n",
" \n",
" def __next__(self):\n",
" if self.it == dereference(self.ptr).end():\n",
" raise StopIteration\n",
" value = dereference(self.it).first\n",
" preincrement(self.it)\n",
" return value\n",
" \n",
" # Can't use __init__ because we use a pointer argument.\n",
" @staticmethod\n",
" cdef create(container_t* ptr):\n",
" self = Iterator()\n",
" self.ptr = ptr\n",
" self.it = dereference(ptr).begin()\n",
" return self\n",
" \n",
" \n",
"cdef class B(Base):\n",
" \"\"\"\n",
" Implementation using a dedicated iterator.\n",
" \"\"\"\n",
" def __iter__(self):\n",
" return Iterator.create(&self.container)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "73be0b45",
"metadata": {},
"outputs": [],
"source": [
"# Number of elements in the container.\n",
"n = 1_000_000"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "720c2025",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5.94 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit x = {i: set() for i in range(n)}\n",
"# Baseline python implementation.\n",
"list(x)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b4159b21",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"20.3 ms ± 391 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit x = A(n)\n",
"list(x)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "15515c8a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15.2 ms ± 138 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit x = B(n)\n",
"list(x)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7e545a01",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.7 ms ± 13.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit x = Base(n)\n",
"x.iterate()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d8fc3852",
"metadata": {},
"outputs": [],
"source": [
"# And just a sanity check that these do all yield the same result.\n",
"import itertools as it\n",
"\n",
"\n",
"containers = [A(n), B(n), {i: set() for i in range(n)}]\n",
"for x, y in it.combinations(containers, 2):\n",
" # The ordering is not necessarily the same; sort first.\n",
" assert list(sorted(x)) == list(sorted(y))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment