Last active
August 29, 2018 05:16
-
-
Save kuanb/871c6df0b770f88364aff7a8abc230fc to your computer and use it in GitHub Desktop.
Messing around with better spatial indexed of route line shapes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import sys\n", | |
"sys.path.append('..')\n", | |
"import peartree as pt\n", | |
"\n", | |
"# Read in a GTFS feed\n", | |
"feed = pt.get_representative_feed('gtfs/bart.zip')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>shape_id</th>\n", | |
" <th>shape_pt_lat</th>\n", | |
" <th>shape_pt_lon</th>\n", | |
" <th>shape_pt_sequence</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>01_shp</td>\n", | |
" <td>38.018937</td>\n", | |
" <td>-121.942073</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>01_shp</td>\n", | |
" <td>38.018920</td>\n", | |
" <td>-121.942449</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>01_shp</td>\n", | |
" <td>38.018917</td>\n", | |
" <td>-121.942633</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>01_shp</td>\n", | |
" <td>38.018919</td>\n", | |
" <td>-121.942820</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>01_shp</td>\n", | |
" <td>38.018915</td>\n", | |
" <td>-121.943121</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" shape_id shape_pt_lat shape_pt_lon shape_pt_sequence\n", | |
"0 01_shp 38.018937 -121.942073 0\n", | |
"1 01_shp 38.018920 -121.942449 1\n", | |
"2 01_shp 38.018917 -121.942633 2\n", | |
"3 01_shp 38.018919 -121.942820 3\n", | |
"4 01_shp 38.018915 -121.943121 4" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Identify a single shape id to use as an example\n", | |
"shapes = feed.shapes\n", | |
"shape_ids = shapes.shape_id.unique()\n", | |
"shape_id = shape_ids[0]\n", | |
"\n", | |
"\n", | |
"shape_df = shapes[shapes.shape_id == shape_id].sort_values(by='shape_pt_sequence')\n", | |
"shape_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from itertools import tee\n", | |
"import numpy as np\n", | |
"from rtree.index import Index\n", | |
"from shapely.geometry import LineString, Point\n", | |
"\n", | |
"def pairwise(iterable):\n", | |
" a, b = tee(iterable)\n", | |
" next(b, None)\n", | |
" return zip(a, b)\n", | |
"\n", | |
"class CandidateCache(object):\n", | |
" def __init__(self, shape_lons, shape_lats):\n", | |
" assert shape_lons.shape[0] > 1\n", | |
" assert shape_lons.shape == shape_lats.shape\n", | |
" self.shape_lons = shape_lons\n", | |
" self.shape_lats = shape_lats\n", | |
"\n", | |
" def spatial_index(self):\n", | |
" return Index((i, s.bounds, None) for i, s in enumerate(self.segments()))\n", | |
"\n", | |
" def segments(self):\n", | |
" return list(map(LineString, pairwise(zip(self.shape_lons, self.shape_lats))))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2370 coordinate points\n", | |
"2369 linestring segments\n" | |
] | |
} | |
], | |
"source": [ | |
"# These should be the same (well, n - 1) length... sanity check\n", | |
"print(len(shape_df.shape_pt_lon), 'coordinate points')\n", | |
"cc = CandidateCache(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n", | |
"print(len(cc.segments()), 'linestring segments')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 18.6 s, sys: 0 ns, total: 18.6 s\n", | |
"Wall time: 18.6 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"for i in range(100):\n", | |
" cc = CandidateCache(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n", | |
" cc.spatial_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 168, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import geopandas as gpd\n", | |
"from shapely.ops import transform\n", | |
"\n", | |
"class CandidateCacheAlternative(object):\n", | |
" def __init__(self, shape_lons, shape_lats):\n", | |
" self.shape_lons = shape_lons\n", | |
" self.shape_lats = shape_lats\n", | |
"\n", | |
" def spatial_index(self):\n", | |
" return Index((i, s.bounds, None) for i, s in enumerate(self.simple_segments()))\n", | |
" \n", | |
" def route_as_line(self):\n", | |
" raw_segments = zip(self.shape_lons, self.shape_lats)\n", | |
" return LineString(raw_segments)\n", | |
" \n", | |
" def simple_segments(self):\n", | |
" # TODO: Clean up reprojection logic, using geopandas is overkill\n", | |
" # and also compute the correct local meter projection (a la OSMnx)\n", | |
" gs1 = gpd.GeoSeries(self.route_as_line())\n", | |
" gs1.crs = {'init': 'epsg:4326'}\n", | |
" reprojected = gs1.to_crs(epsg=2163).geometry[0]\n", | |
" \n", | |
" # Clean to 5 meters\n", | |
" simplified = reprojected.simplify(5)\n", | |
" \n", | |
" # Return as original projection\n", | |
" gs2 = gpd.GeoSeries(simplified)\n", | |
" gs2.crs = {'init': 'epsg:2163'}\n", | |
" simple_original = gs2.to_crs(epsg=4326).geometry[0]\n", | |
" \n", | |
" xys = simple_original.coords.xy\n", | |
" lons = xys[0]\n", | |
" lats = xys[1]\n", | |
" \n", | |
" return list(map(LineString, pairwise(zip(lons, lats))))\n", | |
"\n", | |
" def segments(self):\n", | |
" return list(map(LineString, pairwise(zip(self.shape_lons, self.shape_lats))))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 169, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 3.94 s, sys: 20 ms, total: 3.96 s\n", | |
"Wall time: 3.97 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"for i in range(100):\n", | |
" cc = CandidateCacheAlternative(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n", | |
" cc.spatial_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 174, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2370 coordinate points\n", | |
"255 linestring segments\n" | |
] | |
} | |
], | |
"source": [ | |
"# These should be the same (well, n - 1) length... sanity check\n", | |
"print(len(shape_df.shape_pt_lon), 'coordinate points')\n", | |
"cc = CandidateCacheAlternative(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n", | |
"print(len(cc.simple_segments()), 'linestring segments')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 175, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cc1 = CandidateCache(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n", | |
"si_v1 = cc1.spatial_index()\n", | |
"\n", | |
"cc2 = CandidateCacheAlternative(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n", | |
"si_v2 = cc2.spatial_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 176, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from shapely.geometry import box, Point\n", | |
"import random\n", | |
"\n", | |
"def random_boxes_within(poly, num_points, buffer_max):\n", | |
" min_x, min_y, max_x, max_y = poly.bounds\n", | |
"\n", | |
" points = []\n", | |
" for i in range(num_points):\n", | |
" random_point = Point([random.uniform(min_x, max_x), random.uniform(min_y, max_y)])\n", | |
" points.append(random_point)\n", | |
"\n", | |
" return [p.buffer(random.random() * buffer_max).bounds for p in points]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 177, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rbxs = random_boxes_within(cc2.route_as_line(), 500, 0.005)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 180, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 28.5 s, sys: 200 ms, total: 28.7 s\n", | |
"Wall time: 28.8 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"for i in range(1000):\n", | |
" for rp in rbxs:\n", | |
" si_v1.intersection(rp)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 181, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 21.4 s, sys: 130 ms, total: 21.5 s\n", | |
"Wall time: 21.5 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"for i in range(1000):\n", | |
" for rp in rbxs:\n", | |
" si_v2.intersection(rp)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment