Skip to content

Instantly share code, notes, and snippets.

@rutgerhofste
Created August 24, 2017 21:25
Show Gist options
  • Save rutgerhofste/0b4947ab1be6a6746e127954c9bd520d to your computer and use it in GitHub Desktop.
Save rutgerhofste/0b4947ab1be6a6746e127954c9bd520d to your computer and use it in GitHub Desktop.
geopandasSolved
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"S3_INPUT_PATH = \"s3://wri-projects/Aqueduct30/test/testGpd/\"\n",
"EC2_INPUT_PATH = \"/volumes/data/temp/\"\n",
"EC2_OUTPUT_PATH = \"/volumes/data/temp/output/\"\n",
"S3_OUTPUT_PATH = \"s3://wri-projects/Aqueduct30/test/output/\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"!mkdir -p {EC2_INPUT_PATH}\n",
"!mkdir -p {EC2_OUTPUT_PATH}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"!aws s3 cp {S3_INPUT_PATH} {EC2_INPUT_PATH} --recursive --quiet"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import geopandas as gpd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib notebook\n",
"import os\n",
"import folium\n",
"from shapely.wkt import loads\n",
"from shapely.geometry import Point"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"gdfFAO = gpd.read_file('/volumes/data/temp/FAO/faoBuffered.shp')\n",
"gdfHybas = gpd.read_file('/volumes/data/temp/Hybas/hybas_lev06_v1c_merged_fiona_Cropped_V01.shp')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"gdfHybas = gdfHybas.set_index('PFAF_ID')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>COAST</th>\n",
" <th>DIST_MAIN</th>\n",
" <th>DIST_SINK</th>\n",
" <th>ENDO</th>\n",
" <th>HYBAS_ID</th>\n",
" <th>MAIN_BAS</th>\n",
" <th>NEXT_DOWN</th>\n",
" <th>NEXT_SINK</th>\n",
" <th>ORDER</th>\n",
" <th>SORT</th>\n",
" <th>SUB_AREA</th>\n",
" <th>UP_AREA</th>\n",
" <th>geometry</th>\n",
" </tr>\n",
" <tr>\n",
" <th>PFAF_ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>232260</th>\n",
" <td>0</td>\n",
" <td>227.1</td>\n",
" <td>227.1</td>\n",
" <td>0</td>\n",
" <td>2060499090</td>\n",
" <td>2060021030</td>\n",
" <td>2060502710</td>\n",
" <td>2060021030</td>\n",
" <td>2</td>\n",
" <td>526</td>\n",
" <td>13664.2</td>\n",
" <td>13664.2</td>\n",
" <td>POLYGON ((1.133333333333358 47.35833333333336,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232270</th>\n",
" <td>0</td>\n",
" <td>227.3</td>\n",
" <td>227.3</td>\n",
" <td>0</td>\n",
" <td>2060498990</td>\n",
" <td>2060021030</td>\n",
" <td>2060502710</td>\n",
" <td>2060021030</td>\n",
" <td>1</td>\n",
" <td>527</td>\n",
" <td>10041.2</td>\n",
" <td>42572.4</td>\n",
" <td>POLYGON ((2.87916666666669 46.73750000000003, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232405</th>\n",
" <td>0</td>\n",
" <td>275.6</td>\n",
" <td>275.6</td>\n",
" <td>0</td>\n",
" <td>2060455290</td>\n",
" <td>2060022150</td>\n",
" <td>2060446440</td>\n",
" <td>2060022150</td>\n",
" <td>1</td>\n",
" <td>542</td>\n",
" <td>1088.5</td>\n",
" <td>44526.2</td>\n",
" <td>POLYGON ((2.079166666666683 48.98750000000003,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232404</th>\n",
" <td>0</td>\n",
" <td>275.7</td>\n",
" <td>275.7</td>\n",
" <td>0</td>\n",
" <td>2060455180</td>\n",
" <td>2060022150</td>\n",
" <td>2060446440</td>\n",
" <td>2060022150</td>\n",
" <td>2</td>\n",
" <td>543</td>\n",
" <td>16783.0</td>\n",
" <td>16783.0</td>\n",
" <td>POLYGON ((3.770833333333363 49.19166666666669,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232407</th>\n",
" <td>0</td>\n",
" <td>353.6</td>\n",
" <td>353.6</td>\n",
" <td>0</td>\n",
" <td>2060459800</td>\n",
" <td>2060022150</td>\n",
" <td>2060455290</td>\n",
" <td>2060022150</td>\n",
" <td>1</td>\n",
" <td>544</td>\n",
" <td>7203.8</td>\n",
" <td>12783.1</td>\n",
" <td>POLYGON ((2.90416666666669 49.13750000000003, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" COAST DIST_MAIN DIST_SINK ENDO HYBAS_ID MAIN_BAS \\\n",
"PFAF_ID \n",
"232260 0 227.1 227.1 0 2060499090 2060021030 \n",
"232270 0 227.3 227.3 0 2060498990 2060021030 \n",
"232405 0 275.6 275.6 0 2060455290 2060022150 \n",
"232404 0 275.7 275.7 0 2060455180 2060022150 \n",
"232407 0 353.6 353.6 0 2060459800 2060022150 \n",
"\n",
" NEXT_DOWN NEXT_SINK ORDER SORT SUB_AREA UP_AREA \\\n",
"PFAF_ID \n",
"232260 2060502710 2060021030 2 526 13664.2 13664.2 \n",
"232270 2060502710 2060021030 1 527 10041.2 42572.4 \n",
"232405 2060446440 2060022150 1 542 1088.5 44526.2 \n",
"232404 2060446440 2060022150 2 543 16783.0 16783.0 \n",
"232407 2060455290 2060022150 1 544 7203.8 12783.1 \n",
"\n",
" geometry \n",
"PFAF_ID \n",
"232260 POLYGON ((1.133333333333358 47.35833333333336,... \n",
"232270 POLYGON ((2.87916666666669 46.73750000000003, ... \n",
"232405 POLYGON ((2.079166666666683 48.98750000000003,... \n",
"232404 POLYGON ((3.770833333333363 49.19166666666669,... \n",
"232407 POLYGON ((2.90416666666669 49.13750000000003, ... "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdfHybas.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"gsHybasBuffer = gdfHybas['geometry'].buffer(-0.005,resolution=16)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"in order to use merge, I needed to convert the geoSeries to a geoDataFrame"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"gdfHybasBuffer =gpd.GeoDataFrame(geometry=gsHybasBuffer)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The old geometry will be replaced by the new geometry (buffered)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"gdfHybas = gdfHybas.drop('geometry',1)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>geometry</th>\n",
" </tr>\n",
" <tr>\n",
" <th>PFAF_ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>232260</th>\n",
" <td>POLYGON ((0.4802056493726044 47.33676526482385...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232270</th>\n",
" <td>POLYGON ((0.3946555531847661 47.49782138220293...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232405</th>\n",
" <td>POLYGON ((1.985697993274555 48.88870001490769,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232404</th>\n",
" <td>POLYGON ((1.642712429100236 49.6413141345426, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232407</th>\n",
" <td>POLYGON ((2.414370437159644 48.81750000000003,...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" geometry\n",
"PFAF_ID \n",
"232260 POLYGON ((0.4802056493726044 47.33676526482385...\n",
"232270 POLYGON ((0.3946555531847661 47.49782138220293...\n",
"232405 POLYGON ((1.985697993274555 48.88870001490769,...\n",
"232404 POLYGON ((1.642712429100236 49.6413141345426, ...\n",
"232407 POLYGON ((2.414370437159644 48.81750000000003,..."
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdfHybasBuffer.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>COAST</th>\n",
" <th>DIST_MAIN</th>\n",
" <th>DIST_SINK</th>\n",
" <th>ENDO</th>\n",
" <th>HYBAS_ID</th>\n",
" <th>MAIN_BAS</th>\n",
" <th>NEXT_DOWN</th>\n",
" <th>NEXT_SINK</th>\n",
" <th>ORDER</th>\n",
" <th>SORT</th>\n",
" <th>SUB_AREA</th>\n",
" <th>UP_AREA</th>\n",
" </tr>\n",
" <tr>\n",
" <th>PFAF_ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>232260</th>\n",
" <td>0</td>\n",
" <td>227.1</td>\n",
" <td>227.1</td>\n",
" <td>0</td>\n",
" <td>2060499090</td>\n",
" <td>2060021030</td>\n",
" <td>2060502710</td>\n",
" <td>2060021030</td>\n",
" <td>2</td>\n",
" <td>526</td>\n",
" <td>13664.2</td>\n",
" <td>13664.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232270</th>\n",
" <td>0</td>\n",
" <td>227.3</td>\n",
" <td>227.3</td>\n",
" <td>0</td>\n",
" <td>2060498990</td>\n",
" <td>2060021030</td>\n",
" <td>2060502710</td>\n",
" <td>2060021030</td>\n",
" <td>1</td>\n",
" <td>527</td>\n",
" <td>10041.2</td>\n",
" <td>42572.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232405</th>\n",
" <td>0</td>\n",
" <td>275.6</td>\n",
" <td>275.6</td>\n",
" <td>0</td>\n",
" <td>2060455290</td>\n",
" <td>2060022150</td>\n",
" <td>2060446440</td>\n",
" <td>2060022150</td>\n",
" <td>1</td>\n",
" <td>542</td>\n",
" <td>1088.5</td>\n",
" <td>44526.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232404</th>\n",
" <td>0</td>\n",
" <td>275.7</td>\n",
" <td>275.7</td>\n",
" <td>0</td>\n",
" <td>2060455180</td>\n",
" <td>2060022150</td>\n",
" <td>2060446440</td>\n",
" <td>2060022150</td>\n",
" <td>2</td>\n",
" <td>543</td>\n",
" <td>16783.0</td>\n",
" <td>16783.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>232407</th>\n",
" <td>0</td>\n",
" <td>353.6</td>\n",
" <td>353.6</td>\n",
" <td>0</td>\n",
" <td>2060459800</td>\n",
" <td>2060022150</td>\n",
" <td>2060455290</td>\n",
" <td>2060022150</td>\n",
" <td>1</td>\n",
" <td>544</td>\n",
" <td>7203.8</td>\n",
" <td>12783.1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" COAST DIST_MAIN DIST_SINK ENDO HYBAS_ID MAIN_BAS \\\n",
"PFAF_ID \n",
"232260 0 227.1 227.1 0 2060499090 2060021030 \n",
"232270 0 227.3 227.3 0 2060498990 2060021030 \n",
"232405 0 275.6 275.6 0 2060455290 2060022150 \n",
"232404 0 275.7 275.7 0 2060455180 2060022150 \n",
"232407 0 353.6 353.6 0 2060459800 2060022150 \n",
"\n",
" NEXT_DOWN NEXT_SINK ORDER SORT SUB_AREA UP_AREA \n",
"PFAF_ID \n",
"232260 2060502710 2060021030 2 526 13664.2 13664.2 \n",
"232270 2060502710 2060021030 1 527 10041.2 42572.4 \n",
"232405 2060446440 2060022150 1 542 1088.5 44526.2 \n",
"232404 2060446440 2060022150 2 543 16783.0 16783.0 \n",
"232407 2060455290 2060022150 1 544 7203.8 12783.1 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdfHybas.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Contrary to pandas, geopandas does not automatically merge based on index. Therefore I copy the indices to new columns. Hopefully merging on index by default will be supported in the future. "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"gdfHybas['PFAF_ID2'] = gdfHybas.index\n",
"gdfHybasBuffer['PFAF_ID2'] = gdfHybasBuffer.index"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"gdfHybasNew = gdfHybasBuffer.merge(gdfHybas,how=\"outer\",on=\"PFAF_ID2\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>geometry</th>\n",
" <th>PFAF_ID2</th>\n",
" <th>COAST</th>\n",
" <th>DIST_MAIN</th>\n",
" <th>DIST_SINK</th>\n",
" <th>ENDO</th>\n",
" <th>HYBAS_ID</th>\n",
" <th>MAIN_BAS</th>\n",
" <th>NEXT_DOWN</th>\n",
" <th>NEXT_SINK</th>\n",
" <th>ORDER</th>\n",
" <th>SORT</th>\n",
" <th>SUB_AREA</th>\n",
" <th>UP_AREA</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>POLYGON ((0.4802056493726044 47.33676526482385...</td>\n",
" <td>232260</td>\n",
" <td>0</td>\n",
" <td>227.1</td>\n",
" <td>227.1</td>\n",
" <td>0</td>\n",
" <td>2060499090</td>\n",
" <td>2060021030</td>\n",
" <td>2060502710</td>\n",
" <td>2060021030</td>\n",
" <td>2</td>\n",
" <td>526</td>\n",
" <td>13664.2</td>\n",
" <td>13664.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>POLYGON ((0.3946555531847661 47.49782138220293...</td>\n",
" <td>232270</td>\n",
" <td>0</td>\n",
" <td>227.3</td>\n",
" <td>227.3</td>\n",
" <td>0</td>\n",
" <td>2060498990</td>\n",
" <td>2060021030</td>\n",
" <td>2060502710</td>\n",
" <td>2060021030</td>\n",
" <td>1</td>\n",
" <td>527</td>\n",
" <td>10041.2</td>\n",
" <td>42572.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>POLYGON ((1.985697993274555 48.88870001490769,...</td>\n",
" <td>232405</td>\n",
" <td>0</td>\n",
" <td>275.6</td>\n",
" <td>275.6</td>\n",
" <td>0</td>\n",
" <td>2060455290</td>\n",
" <td>2060022150</td>\n",
" <td>2060446440</td>\n",
" <td>2060022150</td>\n",
" <td>1</td>\n",
" <td>542</td>\n",
" <td>1088.5</td>\n",
" <td>44526.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>POLYGON ((1.642712429100236 49.6413141345426, ...</td>\n",
" <td>232404</td>\n",
" <td>0</td>\n",
" <td>275.7</td>\n",
" <td>275.7</td>\n",
" <td>0</td>\n",
" <td>2060455180</td>\n",
" <td>2060022150</td>\n",
" <td>2060446440</td>\n",
" <td>2060022150</td>\n",
" <td>2</td>\n",
" <td>543</td>\n",
" <td>16783.0</td>\n",
" <td>16783.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>POLYGON ((2.414370437159644 48.81750000000003,...</td>\n",
" <td>232407</td>\n",
" <td>0</td>\n",
" <td>353.6</td>\n",
" <td>353.6</td>\n",
" <td>0</td>\n",
" <td>2060459800</td>\n",
" <td>2060022150</td>\n",
" <td>2060455290</td>\n",
" <td>2060022150</td>\n",
" <td>1</td>\n",
" <td>544</td>\n",
" <td>7203.8</td>\n",
" <td>12783.1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" geometry PFAF_ID2 COAST \\\n",
"0 POLYGON ((0.4802056493726044 47.33676526482385... 232260 0 \n",
"1 POLYGON ((0.3946555531847661 47.49782138220293... 232270 0 \n",
"2 POLYGON ((1.985697993274555 48.88870001490769,... 232405 0 \n",
"3 POLYGON ((1.642712429100236 49.6413141345426, ... 232404 0 \n",
"4 POLYGON ((2.414370437159644 48.81750000000003,... 232407 0 \n",
"\n",
" DIST_MAIN DIST_SINK ENDO HYBAS_ID MAIN_BAS NEXT_DOWN NEXT_SINK \\\n",
"0 227.1 227.1 0 2060499090 2060021030 2060502710 2060021030 \n",
"1 227.3 227.3 0 2060498990 2060021030 2060502710 2060021030 \n",
"2 275.6 275.6 0 2060455290 2060022150 2060446440 2060022150 \n",
"3 275.7 275.7 0 2060455180 2060022150 2060446440 2060022150 \n",
"4 353.6 353.6 0 2060459800 2060022150 2060455290 2060022150 \n",
"\n",
" ORDER SORT SUB_AREA UP_AREA \n",
"0 2 526 13664.2 13664.2 \n",
"1 1 527 10041.2 42572.4 \n",
"2 1 542 1088.5 44526.2 \n",
"3 2 543 16783.0 16783.0 \n",
"4 1 544 7203.8 12783.1 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdfHybasNew.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that geopandas did not preserve the Index. Hopefully that will get fixed in the future as well. "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"gdfHybasBuffer.to_file(os.path.join(EC2_OUTPUT_PATH,'output.shp'))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"!aws s3 cp {EC2_OUTPUT_PATH} {S3_OUTPUT_PATH} --recursive --quiet"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 35",
"language": "python",
"name": "python35"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment