Created
July 18, 2020 17:48
-
-
Save ajfriend/3eccf3ec01b33f46dc14f5dbb13d8c6f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Experiments with vectorized functions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/ajfriend/work/h3-py/uber/h3-py/env/lib/python3.8/site-packages/h3/unstable/__init__.py:4: UserWarning: Modules under `h3.unstable` are experimental, and may change at any time.\n", | |
" warnings.warn(\n" | |
] | |
} | |
], | |
"source": [ | |
"from h3.unstable import vect\n", | |
"\n", | |
"import h3.api.numpy_int as h3i\n", | |
"import h3.api.basic_str as h3s\n", | |
"\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# `geo_to_h3` times" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = 1_000_000\n", | |
"\n", | |
"lats = np.random.uniform(0, 90, N) \n", | |
"lons = np.random.uniform(0, 90, N)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.1 s ± 57.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"[h3s.geo_to_h3(lat, lon, 10) for lat, lon in zip(lats, lons)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.63 s ± 7.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"[h3i.geo_to_h3(lat, lon, 10) for lat, lon in zip(lats, lons)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"802 ms ± 13.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"vect.geo_to_h3(lats, lons, 9)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Haversine distances between cells\n", | |
"\n", | |
"compute haversine distances between h3 cells, using\n", | |
"integer representation (avoiding conversion to/from `str` representation)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def rand_hexes(N):\n", | |
" lats = np.random.uniform(0, 90, N) \n", | |
" lons = np.random.uniform(0, 90, N)\n", | |
"\n", | |
" out = np.array(\n", | |
" [h3i.geo_to_h3(a, o, 9) for a,o in zip(lats, lons)],\n", | |
" dtype = 'uint64',\n", | |
" )\n", | |
" \n", | |
" return out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = 1_000_000\n", | |
"\n", | |
"a = rand_hexes(N)\n", | |
"b = rand_hexes(N)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 s ± 22.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"vect.cell_haversine(a, b)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Multithreading\n", | |
"\n", | |
"Since these functions release the Python GIL, we can acheive some speedups via parallelism on multicore machines.\n", | |
"\n", | |
"Timings below are run on a quad-core MacBook." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## `geo_to_h3`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from concurrent.futures import ThreadPoolExecutor\n", | |
"\n", | |
"N = 100_000\n", | |
"K = 10\n", | |
"\n", | |
"geos = [\n", | |
" # lat/lng vectors\n", | |
" (np.random.uniform(0, 90, N), np.random.uniform(0, 90, N))\n", | |
" for _ in range(K)\n", | |
"]\n", | |
"\n", | |
"def foo(geo):\n", | |
" lats, lngs = geo\n", | |
" out = vect.geo_to_h3(lats, lngs, 9)\n", | |
" return out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"821 ms ± 30.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"out = list(map(foo, geos))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"248 ms ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"with ThreadPoolExecutor(max_workers=4) as ex:\n", | |
" out = list(ex.map(foo, geos))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## `cell_haversine`\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = 100_000\n", | |
"K = 10\n", | |
"\n", | |
"pairs = [\n", | |
" (rand_hexes(N), rand_hexes(N))\n", | |
" for _ in range(K)\n", | |
"]\n", | |
"\n", | |
"def foo(ab):\n", | |
" a, b = ab\n", | |
" out = vect.cell_haversine(a, b)\n", | |
" return out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.03 s ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"out = list(map(foo, pairs))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"339 ms ± 21.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"with ThreadPoolExecutor(max_workers=4) as ex:\n", | |
" out = list(ex.map(foo, pairs))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment