Created
July 13, 2017 02:17
-
-
Save kuanb/4d2d75726dfa163184197d38da1ff7e8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import csv\n", | |
"import urllib.request\n", | |
"import codecs\n", | |
"\n", | |
"url = 'https://u13557332.dl.dropboxusercontent.com/u/13557332/example_geometries.csv'\n", | |
"ftpstream = urllib.request.urlopen(url)\n", | |
"csvfile = csv.reader(codecs.iterdecode(ftpstream, 'utf-8'))\n", | |
"data = [row for row in csvfile]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"pdf = pd.DataFrame(data[1:], columns=data[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>value</th>\n", | |
" <th>geometry</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>14856</td>\n", | |
" <td>345.192394456171</td>\n", | |
" <td>(POLYGON ((649141.3988551921 119588.0508027334...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>8716</td>\n", | |
" <td>0.0316076464557733</td>\n", | |
" <td>(POLYGON ((638329.3738409473 104950.4713290278...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>661</td>\n", | |
" <td>0.0675289128828207</td>\n", | |
" <td>(POLYGON ((615153.4964940767 106669.0885181571...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>709</td>\n", | |
" <td>1.08899014222718</td>\n", | |
" <td>(POLYGON ((615551.2034368705 95687.35198442677...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>717</td>\n", | |
" <td>0.495591782569045</td>\n", | |
" <td>(POLYGON ((615508.9807402943 111413.6218265343...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id value \\\n", | |
"0 14856 345.192394456171 \n", | |
"1 8716 0.0316076464557733 \n", | |
"2 661 0.0675289128828207 \n", | |
"3 709 1.08899014222718 \n", | |
"4 717 0.495591782569045 \n", | |
"\n", | |
" geometry \n", | |
"0 (POLYGON ((649141.3988551921 119588.0508027334... \n", | |
"1 (POLYGON ((638329.3738409473 104950.4713290278... \n", | |
"2 (POLYGON ((615153.4964940767 106669.0885181571... \n", | |
"3 (POLYGON ((615551.2034368705 95687.35198442677... \n", | |
"4 (POLYGON ((615508.9807402943 111413.6218265343... " | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import geopandas as gpd\n", | |
"from shapely.wkt import loads\n", | |
"\n", | |
"geoms = list(map(loads, pdf.geometry.values))\n", | |
"gdf = gpd.GeoDataFrame(pdf, geometry=geoms)\n", | |
"\n", | |
"gdf.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"centroid_gs = gpd.GeoSeries(gdf.geometry.centroid)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"target_geom = gdf.loc[0].geometry.buffer(0.5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.4946950984798605" | |
] | |
}, | |
"execution_count": 63, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from timeit import timeit\n", | |
"\n", | |
"# method 1\n", | |
"def run_m1():\n", | |
" sub_v1 = gdf[gdf.intersects(target_geom)]\n", | |
"\n", | |
"time = timeit(run_m1, number=25)\n", | |
"time/25" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"gdf2 = gdf.copy()\n", | |
"sindex = gdf2.sindex" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.004886196120060049" | |
] | |
}, | |
"execution_count": 67, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"def run_m2():\n", | |
" possible = gdf2.iloc[sorted(list(sindex.intersection(target_geom.bounds)))]\n", | |
" sub_v2 = possible[possible.intersects(target_geom)]\n", | |
"\n", | |
"time = timeit(run_m2, number=25)\n", | |
"time/25" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tb = target_geom.bounds\n", | |
"vector_target = {}\n", | |
"vector_target['bounds_minx'] = tb[0]\n", | |
"vector_target['bounds_miny'] = tb[1]\n", | |
"vector_target['bounds_maxx'] = tb[2]\n", | |
"vector_target['bounds_maxy'] = tb[3]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.026924925280036406" | |
] | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gdf3 = gdf.copy()\n", | |
"\n", | |
"target_geom.bounds\n", | |
"\n", | |
"bounds = gdf3.bounds\n", | |
"gdf3['bounds_minx'] = bounds['minx']\n", | |
"gdf3['bounds_miny'] = bounds['miny']\n", | |
"gdf3['bounds_maxx'] = bounds['maxx']\n", | |
"gdf3['bounds_maxy'] = bounds['maxy']\n", | |
"\n", | |
"def check_bounds_intersect(row, df):\n", | |
" # for each point in the bounds, make sure that an intersection\n", | |
" # can occur at some point\n", | |
"\n", | |
" # check if there are total overlaps with the geometry\n", | |
" total_overlap = ((df['bounds_minx'] <= row['bounds_minx']) &\n", | |
" (df['bounds_maxx'] >= row['bounds_maxx']) &\n", | |
" (df['bounds_miny'] <= row['bounds_miny']) &\n", | |
" (df['bounds_maxy'] >= row['bounds_maxy']))\n", | |
"\n", | |
" # check if at least one point lies within the row bounds\n", | |
" # bottom left\n", | |
" b_l_corner_in = ((df['bounds_minx'] >= row['bounds_minx']) &\n", | |
" (df['bounds_minx'] <= row['bounds_maxx']) &\n", | |
" (df['bounds_miny'] >= row['bounds_miny']) &\n", | |
" (df['bounds_miny'] <= row['bounds_maxy']))\n", | |
"\n", | |
" # bottom right\n", | |
" b_r_corner_in = ((df['bounds_maxx'] >= row['bounds_minx']) &\n", | |
" (df['bounds_maxx'] <= row['bounds_maxx']) &\n", | |
" (df['bounds_miny'] >= row['bounds_miny']) &\n", | |
" (df['bounds_miny'] <= row['bounds_maxy']))\n", | |
"\n", | |
" # top right\n", | |
" t_r_corner_in = ((df['bounds_maxx'] >= row['bounds_minx']) &\n", | |
" (df['bounds_maxx'] <= row['bounds_maxx']) &\n", | |
" (df['bounds_maxy'] >= row['bounds_miny']) &\n", | |
" (df['bounds_maxy'] <= row['bounds_maxy']))\n", | |
"\n", | |
" # top left\n", | |
" t_l_corner_in = ((df['bounds_minx'] >= row['bounds_minx']) &\n", | |
" (df['bounds_minx'] <= row['bounds_maxx']) &\n", | |
" (df['bounds_maxy'] >= row['bounds_miny']) &\n", | |
" (df['bounds_maxy'] <= row['bounds_maxy']))\n", | |
"\n", | |
" # check if at least one side intersects with the row geom\n", | |
" # left side\n", | |
" left_side_in = ((df['bounds_minx'] >= row['bounds_minx']) &\n", | |
" (df['bounds_minx'] <= row['bounds_maxx']) &\n", | |
" (df['bounds_miny'] <= row['bounds_miny']) &\n", | |
" (df['bounds_maxy'] >= row['bounds_maxy']))\n", | |
"\n", | |
" # right side\n", | |
" right_side_in = ((df['bounds_maxx'] >= row['bounds_minx']) &\n", | |
" (df['bounds_maxx'] <= row['bounds_maxx']) &\n", | |
" (df['bounds_miny'] <= row['bounds_miny']) &\n", | |
" (df['bounds_maxy'] >= row['bounds_maxy']))\n", | |
"\n", | |
" # bottom side\n", | |
" bottom_side_in = ((df['bounds_minx'] <= row['bounds_minx']) &\n", | |
" (df['bounds_maxx'] >= row['bounds_maxx']) &\n", | |
" (df['bounds_miny'] >= row['bounds_miny']) &\n", | |
" (df['bounds_miny'] <= row['bounds_maxy']))\n", | |
"\n", | |
" # top side\n", | |
" top_side_in = ((df['bounds_minx'] <= row['bounds_minx']) &\n", | |
" (df['bounds_maxx'] >= row['bounds_maxx']) &\n", | |
" (df['bounds_maxy'] >= row['bounds_miny']) &\n", | |
" (df['bounds_maxy'] <= row['bounds_maxy']))\n", | |
"\n", | |
" return (total_overlap | b_l_corner_in | b_r_corner_in |\n", | |
" t_r_corner_in | t_l_corner_in | left_side_in |\n", | |
" right_side_in | bottom_side_in | top_side_in)\n", | |
"\n", | |
"def run_m3():\n", | |
" possible = gdf3[check_bounds_intersect(vector_target, gdf3)]\n", | |
"# sub_v2 = possible[possible.intersects(target_geom)]\n", | |
"\n", | |
"time = timeit(run_m3, number=25)\n", | |
"time/25" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment