Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kuanb/4d2d75726dfa163184197d38da1ff7e8 to your computer and use it in GitHub Desktop.
Save kuanb/4d2d75726dfa163184197d38da1ff7e8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv\n",
"import urllib.request\n",
"import codecs\n",
"\n",
"url = 'https://u13557332.dl.dropboxusercontent.com/u/13557332/example_geometries.csv'\n",
"ftpstream = urllib.request.urlopen(url)\n",
"csvfile = csv.reader(codecs.iterdecode(ftpstream, 'utf-8'))\n",
"data = [row for row in csvfile]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"pdf = pd.DataFrame(data[1:], columns=data[0])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>value</th>\n",
" <th>geometry</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>14856</td>\n",
" <td>345.192394456171</td>\n",
" <td>(POLYGON ((649141.3988551921 119588.0508027334...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8716</td>\n",
" <td>0.0316076464557733</td>\n",
" <td>(POLYGON ((638329.3738409473 104950.4713290278...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>661</td>\n",
" <td>0.0675289128828207</td>\n",
" <td>(POLYGON ((615153.4964940767 106669.0885181571...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>709</td>\n",
" <td>1.08899014222718</td>\n",
" <td>(POLYGON ((615551.2034368705 95687.35198442677...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>717</td>\n",
" <td>0.495591782569045</td>\n",
" <td>(POLYGON ((615508.9807402943 111413.6218265343...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id value \\\n",
"0 14856 345.192394456171 \n",
"1 8716 0.0316076464557733 \n",
"2 661 0.0675289128828207 \n",
"3 709 1.08899014222718 \n",
"4 717 0.495591782569045 \n",
"\n",
" geometry \n",
"0 (POLYGON ((649141.3988551921 119588.0508027334... \n",
"1 (POLYGON ((638329.3738409473 104950.4713290278... \n",
"2 (POLYGON ((615153.4964940767 106669.0885181571... \n",
"3 (POLYGON ((615551.2034368705 95687.35198442677... \n",
"4 (POLYGON ((615508.9807402943 111413.6218265343... "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import geopandas as gpd\n",
"from shapely.wkt import loads\n",
"\n",
"geoms = list(map(loads, pdf.geometry.values))\n",
"gdf = gpd.GeoDataFrame(pdf, geometry=geoms)\n",
"\n",
"gdf.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"centroid_gs = gpd.GeoSeries(gdf.geometry.centroid)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"target_geom = gdf.loc[0].geometry.buffer(0.5)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.4946950984798605"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from timeit import timeit\n",
"\n",
"# method 1\n",
"def run_m1():\n",
" sub_v1 = gdf[gdf.intersects(target_geom)]\n",
"\n",
"time = timeit(run_m1, number=25)\n",
"time/25"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"gdf2 = gdf.copy()\n",
"sindex = gdf2.sindex"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.004886196120060049"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def run_m2():\n",
" possible = gdf2.iloc[sorted(list(sindex.intersection(target_geom.bounds)))]\n",
" sub_v2 = possible[possible.intersects(target_geom)]\n",
"\n",
"time = timeit(run_m2, number=25)\n",
"time/25"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tb = target_geom.bounds\n",
"vector_target = {}\n",
"vector_target['bounds_minx'] = tb[0]\n",
"vector_target['bounds_miny'] = tb[1]\n",
"vector_target['bounds_maxx'] = tb[2]\n",
"vector_target['bounds_maxy'] = tb[3]"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.026924925280036406"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdf3 = gdf.copy()\n",
"\n",
"target_geom.bounds\n",
"\n",
"bounds = gdf3.bounds\n",
"gdf3['bounds_minx'] = bounds['minx']\n",
"gdf3['bounds_miny'] = bounds['miny']\n",
"gdf3['bounds_maxx'] = bounds['maxx']\n",
"gdf3['bounds_maxy'] = bounds['maxy']\n",
"\n",
"def check_bounds_intersect(row, df):\n",
" # for each point in the bounds, make sure that an intersection\n",
" # can occur at some point\n",
"\n",
" # check if there are total overlaps with the geometry\n",
" total_overlap = ((df['bounds_minx'] <= row['bounds_minx']) &\n",
" (df['bounds_maxx'] >= row['bounds_maxx']) &\n",
" (df['bounds_miny'] <= row['bounds_miny']) &\n",
" (df['bounds_maxy'] >= row['bounds_maxy']))\n",
"\n",
" # check if at least one point lies within the row bounds\n",
" # bottom left\n",
" b_l_corner_in = ((df['bounds_minx'] >= row['bounds_minx']) &\n",
" (df['bounds_minx'] <= row['bounds_maxx']) &\n",
" (df['bounds_miny'] >= row['bounds_miny']) &\n",
" (df['bounds_miny'] <= row['bounds_maxy']))\n",
"\n",
" # bottom right\n",
" b_r_corner_in = ((df['bounds_maxx'] >= row['bounds_minx']) &\n",
" (df['bounds_maxx'] <= row['bounds_maxx']) &\n",
" (df['bounds_miny'] >= row['bounds_miny']) &\n",
" (df['bounds_miny'] <= row['bounds_maxy']))\n",
"\n",
" # top right\n",
" t_r_corner_in = ((df['bounds_maxx'] >= row['bounds_minx']) &\n",
" (df['bounds_maxx'] <= row['bounds_maxx']) &\n",
" (df['bounds_maxy'] >= row['bounds_miny']) &\n",
" (df['bounds_maxy'] <= row['bounds_maxy']))\n",
"\n",
" # top left\n",
" t_l_corner_in = ((df['bounds_minx'] >= row['bounds_minx']) &\n",
" (df['bounds_minx'] <= row['bounds_maxx']) &\n",
" (df['bounds_maxy'] >= row['bounds_miny']) &\n",
" (df['bounds_maxy'] <= row['bounds_maxy']))\n",
"\n",
" # check if at least one side intersects with the row geom\n",
" # left side\n",
" left_side_in = ((df['bounds_minx'] >= row['bounds_minx']) &\n",
" (df['bounds_minx'] <= row['bounds_maxx']) &\n",
" (df['bounds_miny'] <= row['bounds_miny']) &\n",
" (df['bounds_maxy'] >= row['bounds_maxy']))\n",
"\n",
" # right side\n",
" right_side_in = ((df['bounds_maxx'] >= row['bounds_minx']) &\n",
" (df['bounds_maxx'] <= row['bounds_maxx']) &\n",
" (df['bounds_miny'] <= row['bounds_miny']) &\n",
" (df['bounds_maxy'] >= row['bounds_maxy']))\n",
"\n",
" # bottom side\n",
" bottom_side_in = ((df['bounds_minx'] <= row['bounds_minx']) &\n",
" (df['bounds_maxx'] >= row['bounds_maxx']) &\n",
" (df['bounds_miny'] >= row['bounds_miny']) &\n",
" (df['bounds_miny'] <= row['bounds_maxy']))\n",
"\n",
" # top side\n",
" top_side_in = ((df['bounds_minx'] <= row['bounds_minx']) &\n",
" (df['bounds_maxx'] >= row['bounds_maxx']) &\n",
" (df['bounds_maxy'] >= row['bounds_miny']) &\n",
" (df['bounds_maxy'] <= row['bounds_maxy']))\n",
"\n",
" return (total_overlap | b_l_corner_in | b_r_corner_in |\n",
" t_r_corner_in | t_l_corner_in | left_side_in |\n",
" right_side_in | bottom_side_in | top_side_in)\n",
"\n",
"def run_m3():\n",
" possible = gdf3[check_bounds_intersect(vector_target, gdf3)]\n",
"# sub_v2 = possible[possible.intersects(target_geom)]\n",
"\n",
"time = timeit(run_m3, number=25)\n",
"time/25"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment