Skip to content

Instantly share code, notes, and snippets.

@kuanb
Last active August 29, 2018 05:16
Show Gist options
  • Save kuanb/871c6df0b770f88364aff7a8abc230fc to your computer and use it in GitHub Desktop.
Save kuanb/871c6df0b770f88364aff7a8abc230fc to your computer and use it in GitHub Desktop.
Messing around with better spatial indexed of route line shapes
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')\n",
"import peartree as pt\n",
"\n",
"# Read in a GTFS feed\n",
"feed = pt.get_representative_feed('gtfs/bart.zip')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>shape_id</th>\n",
" <th>shape_pt_lat</th>\n",
" <th>shape_pt_lon</th>\n",
" <th>shape_pt_sequence</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>01_shp</td>\n",
" <td>38.018937</td>\n",
" <td>-121.942073</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>01_shp</td>\n",
" <td>38.018920</td>\n",
" <td>-121.942449</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>01_shp</td>\n",
" <td>38.018917</td>\n",
" <td>-121.942633</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>01_shp</td>\n",
" <td>38.018919</td>\n",
" <td>-121.942820</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>01_shp</td>\n",
" <td>38.018915</td>\n",
" <td>-121.943121</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" shape_id shape_pt_lat shape_pt_lon shape_pt_sequence\n",
"0 01_shp 38.018937 -121.942073 0\n",
"1 01_shp 38.018920 -121.942449 1\n",
"2 01_shp 38.018917 -121.942633 2\n",
"3 01_shp 38.018919 -121.942820 3\n",
"4 01_shp 38.018915 -121.943121 4"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Identify a single shape id to use as an example\n",
"shapes = feed.shapes\n",
"shape_ids = shapes.shape_id.unique()\n",
"shape_id = shape_ids[0]\n",
"\n",
"\n",
"shape_df = shapes[shapes.shape_id == shape_id].sort_values(by='shape_pt_sequence')\n",
"shape_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"from itertools import tee\n",
"import numpy as np\n",
"from rtree.index import Index\n",
"from shapely.geometry import LineString, Point\n",
"\n",
"def pairwise(iterable):\n",
" a, b = tee(iterable)\n",
" next(b, None)\n",
" return zip(a, b)\n",
"\n",
"class CandidateCache(object):\n",
" def __init__(self, shape_lons, shape_lats):\n",
" assert shape_lons.shape[0] > 1\n",
" assert shape_lons.shape == shape_lats.shape\n",
" self.shape_lons = shape_lons\n",
" self.shape_lats = shape_lats\n",
"\n",
" def spatial_index(self):\n",
" return Index((i, s.bounds, None) for i, s in enumerate(self.segments()))\n",
"\n",
" def segments(self):\n",
" return list(map(LineString, pairwise(zip(self.shape_lons, self.shape_lats))))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2370 coordinate points\n",
"2369 linestring segments\n"
]
}
],
"source": [
"# These should be the same (well, n - 1) length... sanity check\n",
"print(len(shape_df.shape_pt_lon), 'coordinate points')\n",
"cc = CandidateCache(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n",
"print(len(cc.segments()), 'linestring segments')"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 18.6 s, sys: 0 ns, total: 18.6 s\n",
"Wall time: 18.6 s\n"
]
}
],
"source": [
"%%time\n",
"for i in range(100):\n",
" cc = CandidateCache(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n",
" cc.spatial_index()"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [],
"source": [
"import geopandas as gpd\n",
"from shapely.ops import transform\n",
"\n",
"class CandidateCacheAlternative(object):\n",
" def __init__(self, shape_lons, shape_lats):\n",
" self.shape_lons = shape_lons\n",
" self.shape_lats = shape_lats\n",
"\n",
" def spatial_index(self):\n",
" return Index((i, s.bounds, None) for i, s in enumerate(self.simple_segments()))\n",
" \n",
" def route_as_line(self):\n",
" raw_segments = zip(self.shape_lons, self.shape_lats)\n",
" return LineString(raw_segments)\n",
" \n",
" def simple_segments(self):\n",
" # TODO: Clean up reprojection logic, using geopandas is overkill\n",
" # and also compute the correct local meter projection (a la OSMnx)\n",
" gs1 = gpd.GeoSeries(self.route_as_line())\n",
" gs1.crs = {'init': 'epsg:4326'}\n",
" reprojected = gs1.to_crs(epsg=2163).geometry[0]\n",
" \n",
" # Clean to 5 meters\n",
" simplified = reprojected.simplify(5)\n",
" \n",
" # Return as original projection\n",
" gs2 = gpd.GeoSeries(simplified)\n",
" gs2.crs = {'init': 'epsg:2163'}\n",
" simple_original = gs2.to_crs(epsg=4326).geometry[0]\n",
" \n",
" xys = simple_original.coords.xy\n",
" lons = xys[0]\n",
" lats = xys[1]\n",
" \n",
" return list(map(LineString, pairwise(zip(lons, lats))))\n",
"\n",
" def segments(self):\n",
" return list(map(LineString, pairwise(zip(self.shape_lons, self.shape_lats))))"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.94 s, sys: 20 ms, total: 3.96 s\n",
"Wall time: 3.97 s\n"
]
}
],
"source": [
"%%time\n",
"for i in range(100):\n",
" cc = CandidateCacheAlternative(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n",
" cc.spatial_index()"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2370 coordinate points\n",
"255 linestring segments\n"
]
}
],
"source": [
"# These should be the same (well, n - 1) length... sanity check\n",
"print(len(shape_df.shape_pt_lon), 'coordinate points')\n",
"cc = CandidateCacheAlternative(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n",
"print(len(cc.simple_segments()), 'linestring segments')"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [],
"source": [
"cc1 = CandidateCache(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n",
"si_v1 = cc1.spatial_index()\n",
"\n",
"cc2 = CandidateCacheAlternative(shape_df.shape_pt_lon, shape_df.shape_pt_lat)\n",
"si_v2 = cc2.spatial_index()"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [],
"source": [
"from shapely.geometry import box, Point\n",
"import random\n",
"\n",
"def random_boxes_within(poly, num_points, buffer_max):\n",
" min_x, min_y, max_x, max_y = poly.bounds\n",
"\n",
" points = []\n",
" for i in range(num_points):\n",
" random_point = Point([random.uniform(min_x, max_x), random.uniform(min_y, max_y)])\n",
" points.append(random_point)\n",
"\n",
" return [p.buffer(random.random() * buffer_max).bounds for p in points]"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [],
"source": [
"rbxs = random_boxes_within(cc2.route_as_line(), 500, 0.005)"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 28.5 s, sys: 200 ms, total: 28.7 s\n",
"Wall time: 28.8 s\n"
]
}
],
"source": [
"%%time\n",
"for i in range(1000):\n",
" for rp in rbxs:\n",
" si_v1.intersection(rp)"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 21.4 s, sys: 130 ms, total: 21.5 s\n",
"Wall time: 21.5 s\n"
]
}
],
"source": [
"%%time\n",
"for i in range(1000):\n",
" for rp in rbxs:\n",
" si_v2.intersection(rp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment