Skip to content

Instantly share code, notes, and snippets.

@sgibbes
Created September 12, 2018 16:30
Show Gist options
  • Save sgibbes/e8dc3684900d75b75308724705f09e0f to your computer and use it in GitHub Desktop.
Save sgibbes/e8dc3684900d75b75308724705f09e0f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"\n",
"# hide scipy compatibility warnings\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"from matplotlib import pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tsc_driver</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Commodity driven deforestation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>Forestry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>Urbanization</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tsc_driver driver_text\n",
"0 1 Commodity driven deforestation\n",
"1 3 Forestry\n",
"2 2 Shifting agriculture\n",
"3 5 Urbanization\n",
"4 4 Wildfire"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create TSC Driver lookup\n",
"driver_lkp = {'1': 'Commodity driven deforestation', \n",
" '2': 'Shifting agriculture', \n",
" '3': 'Forestry',\n",
" '4': 'Wildfire',\n",
" '5': 'Urbanization'}\n",
"\n",
"driver_lkp_df = pd.DataFrame.from_dict(driver_lkp, orient='index').reset_index()\n",
"driver_lkp_df.columns = ['tsc_driver','driver_text']\n",
"driver_lkp_df['tsc_driver'] = driver_lkp_df.tsc_driver.astype('int64')\n",
"driver_lkp_df"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{u'year_data.year': 2001, u'iso': u'BRA', u'area_loss': 596935.2298880816, u'bound1': u'2'}\n"
]
}
],
"source": [
"# Group and sum API data by: Year, Country, TSC Driver\n",
"query_url = 'https://staging-api.globalforestwatch.org/v1/query/'\n",
"dataset_id = 'e5149c98-c31d-4f7e-95eb-654993031348'\n",
"url = query_url + dataset_id \n",
"sql1 = \"SELECT iso, bound1, year_data.year, SUM(year_data.area_loss) as area_loss, \" \\\n",
" \"FROM data \" \\\n",
" \"WHERE polyname = 'tsc' AND thresh= 0 \" \\\n",
" \"GROUP BY iso, bound1, nested(year_data.year)\"\n",
" \n",
"r1 = requests.get(url, params={'sql': sql1})\n",
"data1 = r1.json()['data']\n",
"print data1[0]"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>596935.229888</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>577274.667222</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>545546.773658</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2005</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss tsc_driver iso year_loss\n",
"0 596935.229888 2 BRA 2001\n",
"1 577274.667222 2 BRA 2002\n",
"2 496130.516378 2 BRA 2003\n",
"3 544077.639378 2 BRA 2004\n",
"4 545546.773658 2 BRA 2005"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Put API data into Pandas Dataframe\n",
"df = pd.DataFrame(data1)\n",
"\n",
"df.columns = ['area_loss', 'tsc_driver', 'iso', 'year_loss']\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss</th>\n",
" <th>year_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>149458.899300</td>\n",
" <td>no loss</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1025.223938</td>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>820.866828</td>\n",
" <td>2002</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>467.656569</td>\n",
" <td>2003</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>445.509818</td>\n",
" <td>2004</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss year_loss tsc_driver iso\n",
"0 149458.899300 no loss 1 AGO\n",
"1 1025.223938 2001 1 AGO\n",
"2 820.866828 2002 1 AGO\n",
"3 467.656569 2003 1 AGO\n",
"4 445.509818 2004 1 AGO"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load zonal-stats (Arcpy) results into Pandas Dataframe\n",
"df2 = pd.read_csv('zonal_stats_tsc.csv')\n",
"df2.head()\n",
"df2 = df2.rename(index=str, columns={\"forest_loss_ha\": \"area_loss\", \"year\": \"year_loss\", 'gridcode': 'tsc_driver'})\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>area_loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" <td>1815.508554</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>ARG</td>\n",
" <td>116229.106563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AUS</td>\n",
" <td>29557.033911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BDI</td>\n",
" <td>126.925977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BEN</td>\n",
" <td>273.021312</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" year_loss tsc_driver iso area_loss\n",
"0 2001 1 AGO 1815.508554\n",
"1 2001 1 ARG 116229.106563\n",
"2 2001 1 AUS 29557.033911\n",
"3 2001 1 BDI 126.925977\n",
"4 2001 1 BEN 273.021312"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Group and sum Arcpy data by: Year, Country, TSC Driver\n",
"df2 = df2.groupby(['year_loss', 'tsc_driver', 'iso'])['area_loss'].sum().reset_index()\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>area_loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" <td>1815.508554</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>ARG</td>\n",
" <td>116229.106563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AUS</td>\n",
" <td>29557.033911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BDI</td>\n",
" <td>126.925977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BEN</td>\n",
" <td>273.021312</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" year_loss tsc_driver iso area_loss\n",
"0 2001 1 AGO 1815.508554\n",
"1 2001 1 ARG 116229.106563\n",
"2 2001 1 AUS 29557.033911\n",
"3 2001 1 BDI 126.925977\n",
"4 2001 1 BEN 273.021312"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# remove the rows with \"no loss\"\n",
"df2 = df2.loc[df2['year_loss'] != 'no loss']\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"# Fix data types which allows for joining of the 2 datasets\n",
"df2['year_loss'] = df2.year_loss.astype('int64')\n",
"df['tsc_driver'] = df.tsc_driver.astype('int64')"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>596935.229888</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2001</td>\n",
" <td>596948.788671</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>577274.667222</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2002</td>\n",
" <td>577297.022182</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" <td>496160.250287</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" <td>544123.134148</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>545546.773658</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2005</td>\n",
" <td>545546.395054</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 596935.229888 2 BRA 2001 596948.788671 \n",
"1 577274.667222 2 BRA 2002 577297.022182 \n",
"2 496130.516378 2 BRA 2003 496160.250287 \n",
"3 544077.639378 2 BRA 2004 544123.134148 \n",
"4 545546.773658 2 BRA 2005 545546.395054 \n",
"\n",
" driver_text \n",
"0 Shifting agriculture \n",
"1 Shifting agriculture \n",
"2 Shifting agriculture \n",
"3 Shifting agriculture \n",
"4 Shifting agriculture "
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Join the Datasets\n",
"joined = pd.merge(df, df2, on=['tsc_driver', 'year_loss', 'iso'], suffixes=['_hadoop', '_zstats'])\n",
"joined = pd.merge(joined, driver_lkp_df, on=['tsc_driver'])\n",
"joined.head()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>596935.229888</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2001</td>\n",
" <td>596948.788671</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.002271</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>577274.667222</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2002</td>\n",
" <td>577297.022182</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.003872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" <td>496160.250287</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.005993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" <td>544123.134148</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.008362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>545546.773658</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2005</td>\n",
" <td>545546.395054</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.000069</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 596935.229888 2 BRA 2001 596948.788671 \n",
"1 577274.667222 2 BRA 2002 577297.022182 \n",
"2 496130.516378 2 BRA 2003 496160.250287 \n",
"3 544077.639378 2 BRA 2004 544123.134148 \n",
"4 545546.773658 2 BRA 2005 545546.395054 \n",
"\n",
" driver_text perc_diff \n",
"0 Shifting agriculture 0.002271 \n",
"1 Shifting agriculture 0.003872 \n",
"2 Shifting agriculture 0.005993 \n",
"3 Shifting agriculture 0.008362 \n",
"4 Shifting agriculture 0.000069 "
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Calculate Percent Difference\n",
"joined['perc_diff'] = abs(((joined.area_loss_hadoop - joined.area_loss_zstats)/joined.area_loss_hadoop)*100)\n",
"joined.head()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7970</th>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>8.503351e+05</td>\n",
" <td>Wildfire</td>\n",
" <td>15.608657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7969</th>\n",
" <td>1.347988e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2009</td>\n",
" <td>1.196430e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>11.243300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7971</th>\n",
" <td>1.926181e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2011</td>\n",
" <td>1.744717e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>9.420942</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7964</th>\n",
" <td>2.599081e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2004</td>\n",
" <td>2.368086e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>8.887561</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7961</th>\n",
" <td>1.303954e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2001</td>\n",
" <td>1.200096e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>7.964837</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"7970 1.007609e+06 4 RUS 2010 8.503351e+05 \n",
"7969 1.347988e+06 4 RUS 2009 1.196430e+06 \n",
"7971 1.926181e+06 4 RUS 2011 1.744717e+06 \n",
"7964 2.599081e+06 4 RUS 2004 2.368086e+06 \n",
"7961 1.303954e+06 4 RUS 2001 1.200096e+06 \n",
"\n",
" driver_text perc_diff \n",
"7970 Wildfire 15.608657 \n",
"7969 Wildfire 11.243300 \n",
"7971 Wildfire 9.420942 \n",
"7964 Wildfire 8.887561 \n",
"7961 Wildfire 7.964837 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Sort data by % Difference and remove loss in 2017 \n",
"#(2017 numbers are not produced the same between the different methods, so we shouldn't compare them.)\n",
"\n",
"joined = joined.sort_values(by=['perc_diff'], ascending=False)\n",
"\n",
"# ignore 2017 data\n",
"joined = joined[joined.year_loss != 2017]\n",
"joined.head()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perc_diff</th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15.608657</td>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>8.503351e+05</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11.243300</td>\n",
" <td>1.347988e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2009</td>\n",
" <td>1.196430e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9.420942</td>\n",
" <td>1.926181e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2011</td>\n",
" <td>1.744717e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8.887561</td>\n",
" <td>2.599081e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2004</td>\n",
" <td>2.368086e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.964837</td>\n",
" <td>1.303954e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2001</td>\n",
" <td>1.200096e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5.485790</td>\n",
" <td>2.365334e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2002</td>\n",
" <td>2.235576e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2.115271</td>\n",
" <td>4.075586e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2012</td>\n",
" <td>3.989376e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2.038871</td>\n",
" <td>7.771904e+05</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2005</td>\n",
" <td>7.613445e+05</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1.776451</td>\n",
" <td>2.400159e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2003</td>\n",
" <td>2.357521e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1.771189</td>\n",
" <td>2.024291e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2008</td>\n",
" <td>1.988436e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1.653006</td>\n",
" <td>4.289393e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2016</td>\n",
" <td>4.218489e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>1.445078</td>\n",
" <td>1.393350e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2007</td>\n",
" <td>1.373215e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>1.282235</td>\n",
" <td>5.950501e+00</td>\n",
" <td>4</td>\n",
" <td>PLW</td>\n",
" <td>2013</td>\n",
" <td>6.026800e+00</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1.176542</td>\n",
" <td>1.297011e+01</td>\n",
" <td>4</td>\n",
" <td>PLW</td>\n",
" <td>2014</td>\n",
" <td>1.312271e+01</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>1.156588</td>\n",
" <td>1.513426e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2006</td>\n",
" <td>1.495922e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.480950</td>\n",
" <td>1.681561e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2015</td>\n",
" <td>1.673473e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perc_diff area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 15.608657 1.007609e+06 4 RUS 2010 8.503351e+05 \n",
"1 11.243300 1.347988e+06 4 RUS 2009 1.196430e+06 \n",
"2 9.420942 1.926181e+06 4 RUS 2011 1.744717e+06 \n",
"3 8.887561 2.599081e+06 4 RUS 2004 2.368086e+06 \n",
"4 7.964837 1.303954e+06 4 RUS 2001 1.200096e+06 \n",
"5 5.485790 2.365334e+06 4 RUS 2002 2.235576e+06 \n",
"6 2.115271 4.075586e+06 4 RUS 2012 3.989376e+06 \n",
"7 2.038871 7.771904e+05 4 RUS 2005 7.613445e+05 \n",
"8 1.776451 2.400159e+06 4 RUS 2003 2.357521e+06 \n",
"9 1.771189 2.024291e+06 4 RUS 2008 1.988436e+06 \n",
"10 1.653006 4.289393e+06 4 RUS 2016 4.218489e+06 \n",
"11 1.445078 1.393350e+06 4 RUS 2007 1.373215e+06 \n",
"12 1.282235 5.950501e+00 4 PLW 2013 6.026800e+00 \n",
"13 1.176542 1.297011e+01 4 PLW 2014 1.312271e+01 \n",
"14 1.156588 1.513426e+06 4 RUS 2006 1.495922e+06 \n",
"15 0.480950 1.681561e+06 4 RUS 2015 1.673473e+06 \n",
"\n",
" driver_text \n",
"0 Wildfire \n",
"1 Wildfire \n",
"2 Wildfire \n",
"3 Wildfire \n",
"4 Wildfire \n",
"5 Wildfire \n",
"6 Wildfire \n",
"7 Wildfire \n",
"8 Wildfire \n",
"9 Wildfire \n",
"10 Wildfire \n",
"11 Wildfire \n",
"12 Wildfire \n",
"13 Wildfire \n",
"14 Wildfire \n",
"15 Wildfire "
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show the Country with the highest % difference for each year\n",
"max_perc_diff = joined.groupby(['year_loss'], sort=False)['perc_diff'].max()\n",
"max_perc_diff = pd.DataFrame(max_perc_diff)\n",
"pd.merge(max_perc_diff, joined, on='perc_diff')"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>705481.629170</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2010</td>\n",
" <td>705600.476270</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.016846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" <td>544123.134148</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.008362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" <td>496160.250287</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.005993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>575815.168951</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2007</td>\n",
" <td>575843.710946</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.004957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>837854.743909</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2014</td>\n",
" <td>837891.854072</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.004429</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"9 705481.629170 2 BRA 2010 705600.476270 \n",
"3 544077.639378 2 BRA 2004 544123.134148 \n",
"2 496130.516378 2 BRA 2003 496160.250287 \n",
"6 575815.168951 2 BRA 2007 575843.710946 \n",
"13 837854.743909 2 BRA 2014 837891.854072 \n",
"\n",
" driver_text perc_diff \n",
"9 Shifting agriculture 0.016846 \n",
"3 Shifting agriculture 0.008362 \n",
"2 Shifting agriculture 0.005993 \n",
"6 Shifting agriculture 0.004957 \n",
"13 Shifting agriculture 0.004429 "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Brazil data sorted by largest error\n",
"bra = joined[joined.iso == 'BRA'].sort_values(by=['perc_diff'], ascending=False)\n",
"bra.head()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7970</th>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>8.503351e+05</td>\n",
" <td>Wildfire</td>\n",
" <td>15.608657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7969</th>\n",
" <td>1.347988e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2009</td>\n",
" <td>1.196430e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>11.243300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7971</th>\n",
" <td>1.926181e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2011</td>\n",
" <td>1.744717e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>9.420942</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7964</th>\n",
" <td>2.599081e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2004</td>\n",
" <td>2.368086e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>8.887561</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7961</th>\n",
" <td>1.303954e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2001</td>\n",
" <td>1.200096e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>7.964837</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"7970 1.007609e+06 4 RUS 2010 8.503351e+05 \n",
"7969 1.347988e+06 4 RUS 2009 1.196430e+06 \n",
"7971 1.926181e+06 4 RUS 2011 1.744717e+06 \n",
"7964 2.599081e+06 4 RUS 2004 2.368086e+06 \n",
"7961 1.303954e+06 4 RUS 2001 1.200096e+06 \n",
"\n",
" driver_text perc_diff \n",
"7970 Wildfire 15.608657 \n",
"7969 Wildfire 11.243300 \n",
"7971 Wildfire 9.420942 \n",
"7964 Wildfire 8.887561 \n",
"7961 Wildfire 7.964837 "
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TSC driver Wildfire data sorted by largest error\n",
"tsc_4 = joined[joined.tsc_driver == 4].sort_values(by=['perc_diff'], ascending=False)\n",
"tsc_4.head()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perc_diff</th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15.608657</td>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>850335.107925</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2.666857</td>\n",
" <td>5.734749e+00</td>\n",
" <td>5</td>\n",
" <td>BEN</td>\n",
" <td>2004</td>\n",
" <td>5.581812</td>\n",
" <td>Urbanization</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.234700</td>\n",
" <td>5.973740e+00</td>\n",
" <td>2</td>\n",
" <td>MSR</td>\n",
" <td>2008</td>\n",
" <td>5.899983</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.162793</td>\n",
" <td>6.614220e+00</td>\n",
" <td>3</td>\n",
" <td>SGP</td>\n",
" <td>2013</td>\n",
" <td>6.537310</td>\n",
" <td>Forestry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.086893</td>\n",
" <td>7.020931e+00</td>\n",
" <td>1</td>\n",
" <td>PLW</td>\n",
" <td>2008</td>\n",
" <td>6.944621</td>\n",
" <td>Commodity driven deforestation</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perc_diff area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 15.608657 1.007609e+06 4 RUS 2010 850335.107925 \n",
"1 2.666857 5.734749e+00 5 BEN 2004 5.581812 \n",
"2 1.234700 5.973740e+00 2 MSR 2008 5.899983 \n",
"3 1.162793 6.614220e+00 3 SGP 2013 6.537310 \n",
"4 1.086893 7.020931e+00 1 PLW 2008 6.944621 \n",
"\n",
" driver_text \n",
"0 Wildfire \n",
"1 Urbanization \n",
"2 Shifting agriculture \n",
"3 Forestry \n",
"4 Commodity driven deforestation "
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show the TSC Driver with the highest % difference\n",
"max_perc_diff = joined.groupby(['tsc_driver'], sort=False)['perc_diff'].max()\n",
"max_perc_diff = pd.DataFrame(max_perc_diff)\n",
"pd.merge(max_perc_diff, joined, on='perc_diff')"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perc_diff</th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15.608657</td>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>850335.107925</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.316813</td>\n",
" <td>8.136732e+02</td>\n",
" <td>1</td>\n",
" <td>NZL</td>\n",
" <td>2015</td>\n",
" <td>816.251010</td>\n",
" <td>Commodity driven deforestation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.268384</td>\n",
" <td>1.129610e+03</td>\n",
" <td>3</td>\n",
" <td>NGA</td>\n",
" <td>2015</td>\n",
" <td>1132.641657</td>\n",
" <td>Forestry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.457284</td>\n",
" <td>5.302967e+02</td>\n",
" <td>2</td>\n",
" <td>TTO</td>\n",
" <td>2002</td>\n",
" <td>532.721634</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.304736</td>\n",
" <td>8.790567e+02</td>\n",
" <td>5</td>\n",
" <td>COL</td>\n",
" <td>2011</td>\n",
" <td>881.735477</td>\n",
" <td>Urbanization</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perc_diff area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 15.608657 1.007609e+06 4 RUS 2010 850335.107925 \n",
"1 0.316813 8.136732e+02 1 NZL 2015 816.251010 \n",
"2 0.268384 1.129610e+03 3 NGA 2015 1132.641657 \n",
"3 0.457284 5.302967e+02 2 TTO 2002 532.721634 \n",
"4 0.304736 8.790567e+02 5 COL 2011 881.735477 \n",
"\n",
" driver_text \n",
"0 Wildfire \n",
"1 Commodity driven deforestation \n",
"2 Forestry \n",
"3 Shifting agriculture \n",
"4 Urbanization "
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show TSC Driver with the highest % difference in Countries with >100ha of Loss (according to Hadoop output)\n",
"over100ha = joined[joined.area_loss_hadoop > 100].sort_values(by=['area_loss_hadoop'], ascending=False)\n",
"\n",
"max_area_loss_hadoop = over100ha.groupby(['tsc_driver'], sort=False)['perc_diff'].max()\n",
"max_area_loss_hadoop = pd.DataFrame(max_area_loss_hadoop)\n",
"\n",
"pd.merge(max_area_loss_hadoop, joined, on='perc_diff')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"\n",
"# hide scipy compatibility warnings\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"from matplotlib import pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>tsc_driver</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Commodity driven deforestation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>Forestry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>Urbanization</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" tsc_driver driver_text\n",
"0 1 Commodity driven deforestation\n",
"1 3 Forestry\n",
"2 2 Shifting agriculture\n",
"3 5 Urbanization\n",
"4 4 Wildfire"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create TSC Driver lookup\n",
"driver_lkp = {'1': 'Commodity driven deforestation', \n",
" '2': 'Shifting agriculture', \n",
" '3': 'Forestry',\n",
" '4': 'Wildfire',\n",
" '5': 'Urbanization'}\n",
"\n",
"driver_lkp_df = pd.DataFrame.from_dict(driver_lkp, orient='index').reset_index()\n",
"driver_lkp_df.columns = ['tsc_driver','driver_text']\n",
"driver_lkp_df['tsc_driver'] = driver_lkp_df.tsc_driver.astype('int64')\n",
"driver_lkp_df"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{u'year_data.year': 2001, u'iso': u'BRA', u'area_loss': 596935.2298880816, u'bound1': u'2'}\n"
]
}
],
"source": [
"# Group and sum API data by: Year, Country, TSC Driver\n",
"query_url = 'https://staging-api.globalforestwatch.org/v1/query/'\n",
"dataset_id = 'e5149c98-c31d-4f7e-95eb-654993031348'\n",
"url = query_url + dataset_id \n",
"sql1 = \"SELECT iso, bound1, year_data.year, SUM(year_data.area_loss) as area_loss, \" \\\n",
" \"FROM data \" \\\n",
" \"WHERE polyname = 'tsc' AND thresh= 0 \" \\\n",
" \"GROUP BY iso, bound1, nested(year_data.year)\"\n",
" \n",
"r1 = requests.get(url, params={'sql': sql1})\n",
"data1 = r1.json()['data']\n",
"print data1[0]"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>596935.229888</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>577274.667222</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>545546.773658</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2005</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss tsc_driver iso year_loss\n",
"0 596935.229888 2 BRA 2001\n",
"1 577274.667222 2 BRA 2002\n",
"2 496130.516378 2 BRA 2003\n",
"3 544077.639378 2 BRA 2004\n",
"4 545546.773658 2 BRA 2005"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Put API data into Pandas Dataframe\n",
"df = pd.DataFrame(data1)\n",
"\n",
"df.columns = ['area_loss', 'tsc_driver', 'iso', 'year_loss']\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss</th>\n",
" <th>year_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>149458.899300</td>\n",
" <td>no loss</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1025.223938</td>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>820.866828</td>\n",
" <td>2002</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>467.656569</td>\n",
" <td>2003</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>445.509818</td>\n",
" <td>2004</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss year_loss tsc_driver iso\n",
"0 149458.899300 no loss 1 AGO\n",
"1 1025.223938 2001 1 AGO\n",
"2 820.866828 2002 1 AGO\n",
"3 467.656569 2003 1 AGO\n",
"4 445.509818 2004 1 AGO"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load zonal-stats (Arcpy) results into Pandas Dataframe\n",
"df2 = pd.read_csv('zonal_stats_tsc.csv')\n",
"df2.head()\n",
"df2 = df2.rename(index=str, columns={\"forest_loss_ha\": \"area_loss\", \"year\": \"year_loss\", 'gridcode': 'tsc_driver'})\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>area_loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" <td>1815.508554</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>ARG</td>\n",
" <td>116229.106563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AUS</td>\n",
" <td>29557.033911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BDI</td>\n",
" <td>126.925977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BEN</td>\n",
" <td>273.021312</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" year_loss tsc_driver iso area_loss\n",
"0 2001 1 AGO 1815.508554\n",
"1 2001 1 ARG 116229.106563\n",
"2 2001 1 AUS 29557.033911\n",
"3 2001 1 BDI 126.925977\n",
"4 2001 1 BEN 273.021312"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Group and sum Arcpy data by: Year, Country, TSC Driver\n",
"df2 = df2.groupby(['year_loss', 'tsc_driver', 'iso'])['area_loss'].sum().reset_index()\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year_loss</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>area_loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AGO</td>\n",
" <td>1815.508554</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>ARG</td>\n",
" <td>116229.106563</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>AUS</td>\n",
" <td>29557.033911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BDI</td>\n",
" <td>126.925977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2001</td>\n",
" <td>1</td>\n",
" <td>BEN</td>\n",
" <td>273.021312</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" year_loss tsc_driver iso area_loss\n",
"0 2001 1 AGO 1815.508554\n",
"1 2001 1 ARG 116229.106563\n",
"2 2001 1 AUS 29557.033911\n",
"3 2001 1 BDI 126.925977\n",
"4 2001 1 BEN 273.021312"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# remove the rows with \"no loss\"\n",
"df2 = df2.loc[df2['year_loss'] != 'no loss']\n",
"df2.head()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"# Fix data types which allows for joining of the 2 datasets\n",
"df2['year_loss'] = df2.year_loss.astype('int64')\n",
"df['tsc_driver'] = df.tsc_driver.astype('int64')"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>596935.229888</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2001</td>\n",
" <td>596948.788671</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>577274.667222</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2002</td>\n",
" <td>577297.022182</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" <td>496160.250287</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" <td>544123.134148</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>545546.773658</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2005</td>\n",
" <td>545546.395054</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 596935.229888 2 BRA 2001 596948.788671 \n",
"1 577274.667222 2 BRA 2002 577297.022182 \n",
"2 496130.516378 2 BRA 2003 496160.250287 \n",
"3 544077.639378 2 BRA 2004 544123.134148 \n",
"4 545546.773658 2 BRA 2005 545546.395054 \n",
"\n",
" driver_text \n",
"0 Shifting agriculture \n",
"1 Shifting agriculture \n",
"2 Shifting agriculture \n",
"3 Shifting agriculture \n",
"4 Shifting agriculture "
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Join the Datasets\n",
"joined = pd.merge(df, df2, on=['tsc_driver', 'year_loss', 'iso'], suffixes=['_hadoop', '_zstats'])\n",
"joined = pd.merge(joined, driver_lkp_df, on=['tsc_driver'])\n",
"joined.head()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>596935.229888</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2001</td>\n",
" <td>596948.788671</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.002271</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>577274.667222</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2002</td>\n",
" <td>577297.022182</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.003872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" <td>496160.250287</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.005993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" <td>544123.134148</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.008362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>545546.773658</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2005</td>\n",
" <td>545546.395054</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.000069</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 596935.229888 2 BRA 2001 596948.788671 \n",
"1 577274.667222 2 BRA 2002 577297.022182 \n",
"2 496130.516378 2 BRA 2003 496160.250287 \n",
"3 544077.639378 2 BRA 2004 544123.134148 \n",
"4 545546.773658 2 BRA 2005 545546.395054 \n",
"\n",
" driver_text perc_diff \n",
"0 Shifting agriculture 0.002271 \n",
"1 Shifting agriculture 0.003872 \n",
"2 Shifting agriculture 0.005993 \n",
"3 Shifting agriculture 0.008362 \n",
"4 Shifting agriculture 0.000069 "
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Calculate Percent Difference\n",
"joined['perc_diff'] = abs(((joined.area_loss_hadoop - joined.area_loss_zstats)/joined.area_loss_hadoop)*100)\n",
"joined.head()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7970</th>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>8.503351e+05</td>\n",
" <td>Wildfire</td>\n",
" <td>15.608657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7969</th>\n",
" <td>1.347988e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2009</td>\n",
" <td>1.196430e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>11.243300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7971</th>\n",
" <td>1.926181e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2011</td>\n",
" <td>1.744717e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>9.420942</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7964</th>\n",
" <td>2.599081e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2004</td>\n",
" <td>2.368086e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>8.887561</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7961</th>\n",
" <td>1.303954e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2001</td>\n",
" <td>1.200096e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>7.964837</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"7970 1.007609e+06 4 RUS 2010 8.503351e+05 \n",
"7969 1.347988e+06 4 RUS 2009 1.196430e+06 \n",
"7971 1.926181e+06 4 RUS 2011 1.744717e+06 \n",
"7964 2.599081e+06 4 RUS 2004 2.368086e+06 \n",
"7961 1.303954e+06 4 RUS 2001 1.200096e+06 \n",
"\n",
" driver_text perc_diff \n",
"7970 Wildfire 15.608657 \n",
"7969 Wildfire 11.243300 \n",
"7971 Wildfire 9.420942 \n",
"7964 Wildfire 8.887561 \n",
"7961 Wildfire 7.964837 "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Sort data by % Difference and remove loss in 2017 \n",
"#(2017 numbers are not produced the same between the different methods, so we shouldn't compare them.)\n",
"\n",
"joined = joined.sort_values(by=['perc_diff'], ascending=False)\n",
"\n",
"# ignore 2017 data\n",
"joined = joined[joined.year_loss != 2017]\n",
"joined.head()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perc_diff</th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15.608657</td>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>8.503351e+05</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11.243300</td>\n",
" <td>1.347988e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2009</td>\n",
" <td>1.196430e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9.420942</td>\n",
" <td>1.926181e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2011</td>\n",
" <td>1.744717e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8.887561</td>\n",
" <td>2.599081e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2004</td>\n",
" <td>2.368086e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.964837</td>\n",
" <td>1.303954e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2001</td>\n",
" <td>1.200096e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5.485790</td>\n",
" <td>2.365334e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2002</td>\n",
" <td>2.235576e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2.115271</td>\n",
" <td>4.075586e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2012</td>\n",
" <td>3.989376e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2.038871</td>\n",
" <td>7.771904e+05</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2005</td>\n",
" <td>7.613445e+05</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1.776451</td>\n",
" <td>2.400159e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2003</td>\n",
" <td>2.357521e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1.771189</td>\n",
" <td>2.024291e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2008</td>\n",
" <td>1.988436e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1.653006</td>\n",
" <td>4.289393e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2016</td>\n",
" <td>4.218489e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>1.445078</td>\n",
" <td>1.393350e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2007</td>\n",
" <td>1.373215e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>1.282235</td>\n",
" <td>5.950501e+00</td>\n",
" <td>4</td>\n",
" <td>PLW</td>\n",
" <td>2013</td>\n",
" <td>6.026800e+00</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>1.176542</td>\n",
" <td>1.297011e+01</td>\n",
" <td>4</td>\n",
" <td>PLW</td>\n",
" <td>2014</td>\n",
" <td>1.312271e+01</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>1.156588</td>\n",
" <td>1.513426e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2006</td>\n",
" <td>1.495922e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.480950</td>\n",
" <td>1.681561e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2015</td>\n",
" <td>1.673473e+06</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perc_diff area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 15.608657 1.007609e+06 4 RUS 2010 8.503351e+05 \n",
"1 11.243300 1.347988e+06 4 RUS 2009 1.196430e+06 \n",
"2 9.420942 1.926181e+06 4 RUS 2011 1.744717e+06 \n",
"3 8.887561 2.599081e+06 4 RUS 2004 2.368086e+06 \n",
"4 7.964837 1.303954e+06 4 RUS 2001 1.200096e+06 \n",
"5 5.485790 2.365334e+06 4 RUS 2002 2.235576e+06 \n",
"6 2.115271 4.075586e+06 4 RUS 2012 3.989376e+06 \n",
"7 2.038871 7.771904e+05 4 RUS 2005 7.613445e+05 \n",
"8 1.776451 2.400159e+06 4 RUS 2003 2.357521e+06 \n",
"9 1.771189 2.024291e+06 4 RUS 2008 1.988436e+06 \n",
"10 1.653006 4.289393e+06 4 RUS 2016 4.218489e+06 \n",
"11 1.445078 1.393350e+06 4 RUS 2007 1.373215e+06 \n",
"12 1.282235 5.950501e+00 4 PLW 2013 6.026800e+00 \n",
"13 1.176542 1.297011e+01 4 PLW 2014 1.312271e+01 \n",
"14 1.156588 1.513426e+06 4 RUS 2006 1.495922e+06 \n",
"15 0.480950 1.681561e+06 4 RUS 2015 1.673473e+06 \n",
"\n",
" driver_text \n",
"0 Wildfire \n",
"1 Wildfire \n",
"2 Wildfire \n",
"3 Wildfire \n",
"4 Wildfire \n",
"5 Wildfire \n",
"6 Wildfire \n",
"7 Wildfire \n",
"8 Wildfire \n",
"9 Wildfire \n",
"10 Wildfire \n",
"11 Wildfire \n",
"12 Wildfire \n",
"13 Wildfire \n",
"14 Wildfire \n",
"15 Wildfire "
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show the Country with the highest % difference for each year\n",
"max_perc_diff = joined.groupby(['year_loss'], sort=False)['perc_diff'].max()\n",
"max_perc_diff = pd.DataFrame(max_perc_diff)\n",
"pd.merge(max_perc_diff, joined, on='perc_diff')"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>705481.629170</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2010</td>\n",
" <td>705600.476270</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.016846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>544077.639378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2004</td>\n",
" <td>544123.134148</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.008362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>496130.516378</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2003</td>\n",
" <td>496160.250287</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.005993</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>575815.168951</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2007</td>\n",
" <td>575843.710946</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.004957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>837854.743909</td>\n",
" <td>2</td>\n",
" <td>BRA</td>\n",
" <td>2014</td>\n",
" <td>837891.854072</td>\n",
" <td>Shifting agriculture</td>\n",
" <td>0.004429</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"9 705481.629170 2 BRA 2010 705600.476270 \n",
"3 544077.639378 2 BRA 2004 544123.134148 \n",
"2 496130.516378 2 BRA 2003 496160.250287 \n",
"6 575815.168951 2 BRA 2007 575843.710946 \n",
"13 837854.743909 2 BRA 2014 837891.854072 \n",
"\n",
" driver_text perc_diff \n",
"9 Shifting agriculture 0.016846 \n",
"3 Shifting agriculture 0.008362 \n",
"2 Shifting agriculture 0.005993 \n",
"6 Shifting agriculture 0.004957 \n",
"13 Shifting agriculture 0.004429 "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Brazil data sorted by largest error\n",
"bra = joined[joined.iso == 'BRA'].sort_values(by=['perc_diff'], ascending=False)\n",
"bra.head()"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" <th>perc_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7970</th>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>8.503351e+05</td>\n",
" <td>Wildfire</td>\n",
" <td>15.608657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7969</th>\n",
" <td>1.347988e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2009</td>\n",
" <td>1.196430e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>11.243300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7971</th>\n",
" <td>1.926181e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2011</td>\n",
" <td>1.744717e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>9.420942</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7964</th>\n",
" <td>2.599081e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2004</td>\n",
" <td>2.368086e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>8.887561</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7961</th>\n",
" <td>1.303954e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2001</td>\n",
" <td>1.200096e+06</td>\n",
" <td>Wildfire</td>\n",
" <td>7.964837</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"7970 1.007609e+06 4 RUS 2010 8.503351e+05 \n",
"7969 1.347988e+06 4 RUS 2009 1.196430e+06 \n",
"7971 1.926181e+06 4 RUS 2011 1.744717e+06 \n",
"7964 2.599081e+06 4 RUS 2004 2.368086e+06 \n",
"7961 1.303954e+06 4 RUS 2001 1.200096e+06 \n",
"\n",
" driver_text perc_diff \n",
"7970 Wildfire 15.608657 \n",
"7969 Wildfire 11.243300 \n",
"7971 Wildfire 9.420942 \n",
"7964 Wildfire 8.887561 \n",
"7961 Wildfire 7.964837 "
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TSC driver Wildfire data sorted by largest error\n",
"tsc_4 = joined[joined.tsc_driver == 4].sort_values(by=['perc_diff'], ascending=False)\n",
"tsc_4.head()"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perc_diff</th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15.608657</td>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>850335.107925</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2.666857</td>\n",
" <td>5.734749e+00</td>\n",
" <td>5</td>\n",
" <td>BEN</td>\n",
" <td>2004</td>\n",
" <td>5.581812</td>\n",
" <td>Urbanization</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.234700</td>\n",
" <td>5.973740e+00</td>\n",
" <td>2</td>\n",
" <td>MSR</td>\n",
" <td>2008</td>\n",
" <td>5.899983</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.162793</td>\n",
" <td>6.614220e+00</td>\n",
" <td>3</td>\n",
" <td>SGP</td>\n",
" <td>2013</td>\n",
" <td>6.537310</td>\n",
" <td>Forestry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.086893</td>\n",
" <td>7.020931e+00</td>\n",
" <td>1</td>\n",
" <td>PLW</td>\n",
" <td>2008</td>\n",
" <td>6.944621</td>\n",
" <td>Commodity driven deforestation</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perc_diff area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 15.608657 1.007609e+06 4 RUS 2010 850335.107925 \n",
"1 2.666857 5.734749e+00 5 BEN 2004 5.581812 \n",
"2 1.234700 5.973740e+00 2 MSR 2008 5.899983 \n",
"3 1.162793 6.614220e+00 3 SGP 2013 6.537310 \n",
"4 1.086893 7.020931e+00 1 PLW 2008 6.944621 \n",
"\n",
" driver_text \n",
"0 Wildfire \n",
"1 Urbanization \n",
"2 Shifting agriculture \n",
"3 Forestry \n",
"4 Commodity driven deforestation "
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show the TSC Driver with the highest % difference\n",
"max_perc_diff = joined.groupby(['tsc_driver'], sort=False)['perc_diff'].max()\n",
"max_perc_diff = pd.DataFrame(max_perc_diff)\n",
"pd.merge(max_perc_diff, joined, on='perc_diff')"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perc_diff</th>\n",
" <th>area_loss_hadoop</th>\n",
" <th>tsc_driver</th>\n",
" <th>iso</th>\n",
" <th>year_loss</th>\n",
" <th>area_loss_zstats</th>\n",
" <th>driver_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>15.608657</td>\n",
" <td>1.007609e+06</td>\n",
" <td>4</td>\n",
" <td>RUS</td>\n",
" <td>2010</td>\n",
" <td>850335.107925</td>\n",
" <td>Wildfire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.316813</td>\n",
" <td>8.136732e+02</td>\n",
" <td>1</td>\n",
" <td>NZL</td>\n",
" <td>2015</td>\n",
" <td>816.251010</td>\n",
" <td>Commodity driven deforestation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.268384</td>\n",
" <td>1.129610e+03</td>\n",
" <td>3</td>\n",
" <td>NGA</td>\n",
" <td>2015</td>\n",
" <td>1132.641657</td>\n",
" <td>Forestry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.457284</td>\n",
" <td>5.302967e+02</td>\n",
" <td>2</td>\n",
" <td>TTO</td>\n",
" <td>2002</td>\n",
" <td>532.721634</td>\n",
" <td>Shifting agriculture</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.304736</td>\n",
" <td>8.790567e+02</td>\n",
" <td>5</td>\n",
" <td>COL</td>\n",
" <td>2011</td>\n",
" <td>881.735477</td>\n",
" <td>Urbanization</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perc_diff area_loss_hadoop tsc_driver iso year_loss area_loss_zstats \\\n",
"0 15.608657 1.007609e+06 4 RUS 2010 850335.107925 \n",
"1 0.316813 8.136732e+02 1 NZL 2015 816.251010 \n",
"2 0.268384 1.129610e+03 3 NGA 2015 1132.641657 \n",
"3 0.457284 5.302967e+02 2 TTO 2002 532.721634 \n",
"4 0.304736 8.790567e+02 5 COL 2011 881.735477 \n",
"\n",
" driver_text \n",
"0 Wildfire \n",
"1 Commodity driven deforestation \n",
"2 Forestry \n",
"3 Shifting agriculture \n",
"4 Urbanization "
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show TSC Driver with the highest % difference in Countries with >100ha of Loss (according to Hadoop output)\n",
"over100ha = joined[joined.area_loss_hadoop > 100].sort_values(by=['area_loss_hadoop'], ascending=False)\n",
"\n",
"max_area_loss_hadoop = over100ha.groupby(['tsc_driver'], sort=False)['perc_diff'].max()\n",
"max_area_loss_hadoop = pd.DataFrame(max_area_loss_hadoop)\n",
"\n",
"pd.merge(max_area_loss_hadoop, joined, on='perc_diff')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment