Skip to content

Instantly share code, notes, and snippets.

@jorisvandenbossche
Created October 28, 2019 09:18

Revisions

  1. jorisvandenbossche created this gist Oct 28, 2019.
    255 changes: 255 additions & 0 deletions pygeos-datashader.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,255 @@
    {
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "# Getting a flat array of coordinates from array of Geometry objects"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
    "import geopandas\n",
    "import pygeos"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
    "# Quick copy of the relevant part of https://github.com/pyviz/datashader/pull/819\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "def from_geopandas(ga):\n",
    " line_parts = [\n",
    " _shapely_to_array_parts(shape) for shape in ga\n",
    " ]\n",
    " line_lengths = [\n",
    " sum([len(part) for part in parts])\n",
    " for parts in line_parts\n",
    " ]\n",
    " flat_array = np.concatenate(\n",
    " [part for parts in line_parts for part in parts]\n",
    " )\n",
    " start_indices = np.concatenate(\n",
    " [[0], line_lengths[:-1]]\n",
    " ).astype('uint').cumsum()\n",
    " return start_indices, flat_array\n",
    " \n",
    " \n",
    "def _polygon_to_array_parts(polygon):\n",
    " import shapely.geometry as sg\n",
    " shape = sg.polygon.orient(polygon)\n",
    " exterior = np.asarray(shape.exterior.ctypes)\n",
    " polygon_parts = [exterior]\n",
    " hole_separator = np.array([-np.inf, -np.inf])\n",
    " for ring in shape.interiors:\n",
    " interior = np.asarray(ring.ctypes)\n",
    " polygon_parts.append(hole_separator)\n",
    " polygon_parts.append(interior)\n",
    " return polygon_parts\n",
    "\n",
    "def _shapely_to_array_parts(shape):\n",
    " import shapely.geometry as sg\n",
    " if isinstance(shape, sg.Polygon):\n",
    " # Single polygon\n",
    " return _polygon_to_array_parts(shape)\n",
    " elif isinstance(shape, sg.MultiPolygon):\n",
    " shape = list(shape)\n",
    " polygon_parts = _polygon_to_array_parts(shape[0])\n",
    " polygon_separator = np.array([np.inf, np.inf])\n",
    " for polygon in shape[1:]:\n",
    " polygon_parts.append(polygon_separator)\n",
    " polygon_parts.extend(_polygon_to_array_parts(polygon))\n",
    " return polygon_parts\n",
    " else:\n",
    " raise ValueError(\"\"\"\n",
    "Received invalid value of type {typ}. Must be an instance of\n",
    "shapely.geometry.Polygon or shapely.geometry.MultiPolygon\"\"\"\n",
    " .format(typ=type(shape).__name__))"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Testing on the \"countries\" data:"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
    "df = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Datashader's method:"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {},
    "outputs": [
    {
    "data": {
    "text/plain": [
    "(array([ 0, 48, 152, 208, 1854, 2766, 2990, 3098, 3266,\n",
    " 3790, 4034, 4264, 4512, 4586, 4660, 4822, 4938, 4980,\n",
    " 5032, 6308, 6354, 6374, 6556, 6820, 6838, 6860, 7050,\n",
    " 7074, 7414, 7456, 7862, 7982, 8134, 8334, 8438, 8510,\n",
    " 8614, 8728, 8768, 8838, 8878, 9062, 9142, 9194, 9346,\n",
    " 9412, 9430, 9452, 9536, 9610, 9690, 9778, 9866, 10018,\n",
    " 10096, 10146, 10262, 10378, 10500, 10538, 10588, 10680, 10820,\n",
    " 10858, 10912, 10956, 11034, 11158, 11256, 11318, 11332, 11454,\n",
    " 11510, 11668, 11690, 11842, 11868, 11920, 11942, 12040, 12058,\n",
    " 12090, 12152, 12276, 12314, 12358, 12376, 12394, 12454, 12550,\n",
    " 12576, 12610, 12738, 12812, 12952, 13040, 13138, 13176, 13326,\n",
    " 13598, 13670, 13696, 13742, 13874, 14012, 14094, 14164, 14272,\n",
    " 14424, 14478, 14518, 14598, 14688, 14874, 14964, 15038, 15102,\n",
    " 15156, 15244, 15282, 15326, 15364, 15480, 15536, 15646, 15784,\n",
    " 15832, 15918, 15966, 15980, 16014, 16044, 16110, 16212, 16238,\n",
    " 16264, 16346, 16480, 16964, 16984, 17466, 17484, 17662, 17712,\n",
    " 17826, 17866, 17956, 18004, 18236, 18362, 18378, 18414, 18494,\n",
    " 18560, 18630, 18686, 18820, 18886, 18974, 19126, 20462, 20494,\n",
    " 20524, 20650, 20738, 20850, 20968, 20998, 21050, 21106, 21134,\n",
    " 21180, 21216, 21312, 21348, 21390, 21406], dtype=uint64),\n",
    " array([180. , -16.06713266, 179.41350936, ..., 4.17369904,\n",
    " 30.83385242, 3.5091716 ]))"
    ]
    },
    "execution_count": 11,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "from_geopandas(df.geometry.array)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {},
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "46.2 ms ± 4.34 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
    ]
    }
    ],
    "source": [
    "%timeit from_geopandas(df.geometry.array)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Using pygeos:"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
    "arr = pygeos.from_wkb(geopandas.array.to_wkb(df.geometry.array))"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 14,
    "metadata": {},
    "outputs": [
    {
    "data": {
    "text/plain": [
    "array([[180. , -16.06713266],\n",
    " [180. , -16.55521657],\n",
    " [179.36414266, -16.80135408],\n",
    " ...,\n",
    " [ 31.88145 , 3.55827 ],\n",
    " [ 31.24556 , 3.7819 ],\n",
    " [ 30.83385242, 3.5091716 ]])"
    ]
    },
    "execution_count": 14,
    "metadata": {},
    "output_type": "execute_result"
    }
    ],
    "source": [
    "pygeos.get_coordinates(arr)"
    ]
    },
    {
    "cell_type": "code",
    "execution_count": 15,
    "metadata": {},
    "outputs": [
    {
    "name": "stdout",
    "output_type": "stream",
    "text": [
    "154 µs ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
    ]
    }
    ],
    "source": [
    "%timeit pygeos.get_coordinates(arr)"
    ]
    },
    {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
    "Note: this currently gives a 2D array (not x and y intertwined) and it does not give the start indices, but I think having a version that does those things should not be a order of magnitude slower."
    ]
    },
    {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": []
    }
    ],
    "metadata": {
    "kernelspec": {
    "display_name": "Python (dev)",
    "language": "python",
    "name": "dev"
    },
    "language_info": {
    "codemirror_mode": {
    "name": "ipython",
    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
    }
    },
    "nbformat": 4,
    "nbformat_minor": 4
    }