Last active
November 18, 2021 11:28
-
-
Save jorisvandenbossche/4fab9c94525dcb5ddda8a89b4a5ad9f3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Benchmark CopyFromBuffer" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import geopandas\n", | |
"import pygeos\n", | |
"import pyogrio\n", | |
"\n", | |
"import pyproj\n", | |
"pyproj.datadir.set_data_dir(\"/home/joris/miniconda3/envs/geo-dev/share/proj/\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# arr = pygeos.from_shapely(df.geometry.array.data)\n", | |
"arr = df.geometry.array.data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Get flat array of rings:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"arr_rings = pygeos.get_rings(pygeos.get_parts(arr))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"coords = pygeos.get_coordinates(arr_rings)\n", | |
"offsets = np.insert(np.cumsum(pygeos.get_num_coordinates(arr_rings)*2), 0, 0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Recreate the linearrings:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ring_lengths = np.diff((offsets / 2).astype(int))\n", | |
"ring_indices = np.repeat(np.arange(len(ring_lengths)), ring_lengths)\n", | |
"rings = pygeos.linearrings(coords, indices=ring_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pygeos.equals(arr_rings, rings).all()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Test performance on larger dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>GEOID10</th>\n", | |
" <th>geometry</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>43451</td>\n", | |
" <td>POLYGON ((-83.70873 41.32733, -83.70815 41.327...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>43452</td>\n", | |
" <td>POLYGON ((-83.08698 41.53780, -83.08256 41.537...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>43456</td>\n", | |
" <td>MULTIPOLYGON (((-82.83558 41.71082, -82.83515 ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>43457</td>\n", | |
" <td>POLYGON ((-83.49650 41.25371, -83.48382 41.253...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>43458</td>\n", | |
" <td>POLYGON ((-83.22229 41.53102, -83.22228 41.532...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33139</th>\n", | |
" <td>84044</td>\n", | |
" <td>POLYGON ((-112.26022 40.76909, -112.25333 40.7...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33140</th>\n", | |
" <td>84045</td>\n", | |
" <td>MULTIPOLYGON (((-111.92421 40.17034, -111.9240...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33141</th>\n", | |
" <td>84046</td>\n", | |
" <td>POLYGON ((-110.00072 40.99745, -110.00036 40.9...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33142</th>\n", | |
" <td>84047</td>\n", | |
" <td>POLYGON ((-111.92141 40.62772, -111.92134 40.6...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33143</th>\n", | |
" <td>84049</td>\n", | |
" <td>POLYGON ((-111.59394 40.57707, -111.59386 40.5...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>33144 rows × 2 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" GEOID10 geometry\n", | |
"0 43451 POLYGON ((-83.70873 41.32733, -83.70815 41.327...\n", | |
"1 43452 POLYGON ((-83.08698 41.53780, -83.08256 41.537...\n", | |
"2 43456 MULTIPOLYGON (((-82.83558 41.71082, -82.83515 ...\n", | |
"3 43457 POLYGON ((-83.49650 41.25371, -83.48382 41.253...\n", | |
"4 43458 POLYGON ((-83.22229 41.53102, -83.22228 41.532...\n", | |
"... ... ...\n", | |
"33139 84044 POLYGON ((-112.26022 40.76909, -112.25333 40.7...\n", | |
"33140 84045 MULTIPOLYGON (((-111.92421 40.17034, -111.9240...\n", | |
"33141 84046 POLYGON ((-110.00072 40.99745, -110.00036 40.9...\n", | |
"33142 84047 POLYGON ((-111.92141 40.62772, -111.92134 40.6...\n", | |
"33143 84049 POLYGON ((-111.59394 40.57707, -111.59386 40.5...\n", | |
"\n", | |
"[33144 rows x 2 columns]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_tl = pyogrio.read_dataframe(\"benchmark-data/tl_2019_us_zcta510/tl_2019_us_zcta510.shp\", columns=[\"GEOID10\"])\n", | |
"df_tl" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"arr_tl = df_tl.geometry.array.data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"33144" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(arr_tl)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Get flat array of rings:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"arr_rings = pygeos.get_rings(pygeos.get_parts(arr_tl))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"coords = pygeos.get_coordinates(arr_rings)\n", | |
"offsets = np.insert(np.cumsum(pygeos.get_num_coordinates(arr_rings)*2), 0, 0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Recreate the linearrings:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ring_lengths = np.diff((offsets / 2).astype(int))\n", | |
"ring_indices = np.repeat(np.arange(len(ring_lengths)), ring_lengths)\n", | |
"rings = pygeos.linearrings(coords, indices=ring_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pygeos.equals_exact(arr_rings, rings).all()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np.savez(\"pygeos-benchmark-linearrings.npz\", offsets=offsets, coords=coords)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Only testing performance**:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pygeos" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with np.load(\"pygeos-benchmark-linearrings.npz\") as data:\n", | |
" offsets = data[\"offsets\"]\n", | |
" coords = data[\"coords\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ring_lengths = np.diff((offsets / 2).astype(int))\n", | |
"ring_indices = np.repeat(np.arange(len(ring_lengths)), ring_lengths)\n", | |
"rings = pygeos.linearrings(coords, indices=ring_indices)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Using pygeos master:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.13 s ± 6.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit pygeos.linearrings(coords, indices=ring_indices)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Using PR with CopyFromBuffer:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"558 ms ± 2.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit pygeos.linearrings(coords, indices=ring_indices)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python (geo-dev)", | |
"language": "python", | |
"name": "geo-dev" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment