Created
October 28, 2019 09:18
Revisions
-
jorisvandenbossche created this gist
Oct 28, 2019 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,255 @@ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting a flat array of coordinates from array of Geometry objects" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import geopandas\n", "import pygeos" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Quick copy of the relevant part of https://github.com/pyviz/datashader/pull/819\n", "\n", "import numpy as np\n", "\n", "def from_geopandas(ga):\n", " line_parts = [\n", " _shapely_to_array_parts(shape) for shape in ga\n", " ]\n", " line_lengths = [\n", " sum([len(part) for part in parts])\n", " for parts in line_parts\n", " ]\n", " flat_array = np.concatenate(\n", " [part for parts in line_parts for part in parts]\n", " )\n", " start_indices = np.concatenate(\n", " [[0], line_lengths[:-1]]\n", " ).astype('uint').cumsum()\n", " return start_indices, flat_array\n", " \n", " \n", "def _polygon_to_array_parts(polygon):\n", " import shapely.geometry as sg\n", " shape = sg.polygon.orient(polygon)\n", " exterior = np.asarray(shape.exterior.ctypes)\n", " polygon_parts = [exterior]\n", " hole_separator = np.array([-np.inf, -np.inf])\n", " for ring in shape.interiors:\n", " interior = np.asarray(ring.ctypes)\n", " polygon_parts.append(hole_separator)\n", " polygon_parts.append(interior)\n", " return polygon_parts\n", "\n", "def _shapely_to_array_parts(shape):\n", " import shapely.geometry as sg\n", " if isinstance(shape, sg.Polygon):\n", " # Single polygon\n", " return _polygon_to_array_parts(shape)\n", " elif isinstance(shape, sg.MultiPolygon):\n", " shape = list(shape)\n", " polygon_parts = _polygon_to_array_parts(shape[0])\n", " polygon_separator = np.array([np.inf, np.inf])\n", " for polygon in shape[1:]:\n", " polygon_parts.append(polygon_separator)\n", " polygon_parts.extend(_polygon_to_array_parts(polygon))\n", " return polygon_parts\n", " else:\n", " raise ValueError(\"\"\"\n", "Received invalid value of type {typ}. Must be an instance of\n", "shapely.geometry.Polygon or shapely.geometry.MultiPolygon\"\"\"\n", " .format(typ=type(shape).__name__))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Testing on the \"countries\" data:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Datashader's method:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([ 0, 48, 152, 208, 1854, 2766, 2990, 3098, 3266,\n", " 3790, 4034, 4264, 4512, 4586, 4660, 4822, 4938, 4980,\n", " 5032, 6308, 6354, 6374, 6556, 6820, 6838, 6860, 7050,\n", " 7074, 7414, 7456, 7862, 7982, 8134, 8334, 8438, 8510,\n", " 8614, 8728, 8768, 8838, 8878, 9062, 9142, 9194, 9346,\n", " 9412, 9430, 9452, 9536, 9610, 9690, 9778, 9866, 10018,\n", " 10096, 10146, 10262, 10378, 10500, 10538, 10588, 10680, 10820,\n", " 10858, 10912, 10956, 11034, 11158, 11256, 11318, 11332, 11454,\n", " 11510, 11668, 11690, 11842, 11868, 11920, 11942, 12040, 12058,\n", " 12090, 12152, 12276, 12314, 12358, 12376, 12394, 12454, 12550,\n", " 12576, 12610, 12738, 12812, 12952, 13040, 13138, 13176, 13326,\n", " 13598, 13670, 13696, 13742, 13874, 14012, 14094, 14164, 14272,\n", " 14424, 14478, 14518, 14598, 14688, 14874, 14964, 15038, 15102,\n", " 15156, 15244, 15282, 15326, 15364, 15480, 15536, 15646, 15784,\n", " 15832, 15918, 15966, 15980, 16014, 16044, 16110, 16212, 16238,\n", " 16264, 16346, 16480, 16964, 16984, 17466, 17484, 17662, 17712,\n", " 17826, 17866, 17956, 18004, 18236, 18362, 18378, 18414, 18494,\n", " 18560, 18630, 18686, 18820, 18886, 18974, 19126, 20462, 20494,\n", " 20524, 20650, 20738, 20850, 20968, 20998, 21050, 21106, 21134,\n", " 21180, 21216, 21312, 21348, 21390, 21406], dtype=uint64),\n", " array([180. , -16.06713266, 179.41350936, ..., 4.17369904,\n", " 30.83385242, 3.5091716 ]))" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from_geopandas(df.geometry.array)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "46.2 ms ± 4.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit from_geopandas(df.geometry.array)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using pygeos:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "arr = pygeos.from_wkb(geopandas.array.to_wkb(df.geometry.array))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[180. , -16.06713266],\n", " [180. , -16.55521657],\n", " [179.36414266, -16.80135408],\n", " ...,\n", " [ 31.88145 , 3.55827 ],\n", " [ 31.24556 , 3.7819 ],\n", " [ 30.83385242, 3.5091716 ]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pygeos.get_coordinates(arr)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "154 µs ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], "source": [ "%timeit pygeos.get_coordinates(arr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note: this currently gives a 2D array (not x and y intertwined) and it does not give the start indices, but I think having a version that does those things should not be a order of magnitude slower." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python (dev)", "language": "python", "name": "dev" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }