Skip to content

Instantly share code, notes, and snippets.

@adriangb
Last active March 26, 2020 00:21
Show Gist options
  • Save adriangb/4f22454c54b747920955a3ad3fb9490c to your computer and use it in GitHub Desktop.
Save adriangb/4f22454c54b747920955a3ad3fb9490c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 2,
"metadata": {
"language_info": {
"name": "python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"version": "3.7.5-final"
},
"orig_nbformat": 2,
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"npconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3,
"kernelspec": {
"name": "python37564bitvenv37venvbcb618b1d1ae4db78859509e6fd246c3",
"display_name": "Python 3.7.5 64-bit ('venv37': venv)"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"import itertools\n",
"\n",
"import pygeos\n",
"import shapely\n",
"from shapely.strtree import STRtree\n",
"import geopandas\n",
"import rtree"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test Build"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"shells = [((x, y), (x + 1, y), (x, y + 1)) for x, y in itertools.product(range(200), range(200))]\n",
"\n",
"geoms_shapely = [shapely.geometry.Polygon(shell) for shell in shells]\n",
"geom_shapely_bounds = [(g_idx, g.bounds, None) for g_idx, g in enumerate(geoms_shapely)]\n",
"geoms_pygeos = pygeos.polygons(shells)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "rtree\n270 ms ± 35.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
}
],
"source": [
"print(\"rtree\")\n",
"%timeit rtree.index.Index(geom_shapely_bounds)\n",
"rtree_idx = rtree.index.Index(geom_shapely_bounds)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Shapely STRTree\n140 ms ± 18 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
}
],
"source": [
"print(\"Shapely STRTree\")\n",
"%timeit STRtree(geoms_shapely)\n",
"shapely_strtree = STRtree(geoms_shapely)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "PyGEOS STRTree\n4.98 ms ± 56.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
}
],
"source": [
"print(\"PyGEOS STRTree\")\n",
"%timeit pygeos.strtree.STRtree(geoms_pygeos)\n",
"pygeos_strtree = pygeos.strtree.STRtree(geoms_pygeos)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test Query"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"point_coords = (139.2938371195723, 17.29550838723575)\n",
"\n",
"search_pt_shapely = shapely.geometry.Point(*point_coords)\n",
"search_pt_shapely_bounds = search_pt_shapely.bounds\n",
"search_pt_pygeos = pygeos.points(*point_coords)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "rtree\n29 µs ± 1.21 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
}
],
"source": [
"print(\"rtree\")\n",
"%timeit list(rtree_idx.intersection(search_pt_shapely_bounds))"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Shapely STRTree\n2.31 ms ± 148 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
}
],
"source": [
"print(\"Shapely STRTree\")\n",
"%timeit shapely_strtree.query(search_pt_shapely)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "PyGEOS STRTree\n4.28 µs ± 140 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
}
],
"source": [
"print(\"PyGEOS STRTree\")\n",
"%timeit pygeos_strtree.query(search_pt_pygeos)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test Bulk Query"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"# re-use a subset of the creation geometries\n",
"num_geoms = 500 # too slow to time otherwise\n",
"\n",
"bulk_geoms_shapely = geoms_shapely[:num_geoms]\n",
"bulk_geom_shapely_bounds = geom_shapely_bounds[:num_geoms]\n",
"bulk_geoms_pygeos = geoms_pygeos[:num_geoms]"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "rtree\n16.9 ms ± 153 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)\n"
}
],
"source": [
"print(\"rtree\")\n",
"%timeit -r 3 [list(rtree_idx.intersection(geom[1])) for geom in bulk_geom_shapely_bounds]"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "Shapely STRTree\n987 ms ± 28.3 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)\n"
}
],
"source": [
"print(\"Shapely STRTree\")\n",
"%timeit -r 3 [shapely_strtree.query(geom) for geom in bulk_geoms_shapely]"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "PyGEOS STRTree\n1.49 ms ± 13.1 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)\n"
}
],
"source": [
"print(\"PyGEOS STRTree\")\n",
"%timeit -r 3 pygeos_strtree.query_bulk(bulk_geoms_pygeos)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test Predicate Checking"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"def check_pygeos():\n",
" return pygeos_strtree.query(search_pt_pygeos, predicate=\"intersects\")\n",
"\n",
"def check_rtree():\n",
" idxs = list(rtree_idx.intersection(search_pt_shapely_bounds))\n",
" return [i for i in idxs if search_pt_shapely.intersects(geoms_shapely[i])]"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "6.14 µs ± 282 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
}
],
"source": [
"%timeit check_pygeos()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": "134 µs ± 963 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
}
],
"source": [
"%timeit check_rtree()"
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment