Created
February 27, 2017 14:31
-
-
Save mappingvermont/d0c2ece1f90b766369956d69f4bd1b33 to your computer and use it in GitHub Desktop.
cumsum loss data for brazil biomes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import csv | |
import pandas as pd | |
def main(): | |
header_list = ['biome', 'iso', 'adm1', 'adm2', 'year', 'thresh', 'area_raw', 'emissions_raw'] | |
print 'Reading df' | |
df = pd.read_csv('bra_biomes_int_gadm28.csv', names=header_list) | |
df = df.apply(lambda x: pd.to_numeric(x, errors='ignore')) | |
# select only brazil biomes | |
df = df[df.iso == 'BRA'] | |
# years in this CSV are stored as 1 - 14, should be 2001 - 2014 | |
df['year'] = df['year'] + 2000 | |
# convert area in m2 to area in ha | |
df['area_raw'] = df['area_raw'] / 10000 | |
# ID full universe of biome/ISO/ADM1/ADM2 combinations | |
all_combo_df = df.groupby(['biome', 'iso', 'adm1', 'adm2']).size().reset_index() | |
# delete the count column that was added | |
del all_combo_df[0] | |
# create dummy df and store all possible combinations | |
dummy_df = pd.DataFrame() | |
# create a record for every combination of threshold | |
# and year, in addition to the above iso/adm1/adm2 | |
for dummy_thresh in [0, 10, 15, 20, 25, 30, 50, 75]: | |
for dummy_year in range(2001, 2016): | |
all_combo_df['thresh'] = dummy_thresh | |
all_combo_df['year'] = dummy_year | |
dummy_df = dummy_df.append(all_combo_df) | |
# outer join our dummy_df to df, so that we get proper | |
# Nan values where we don't have data | |
print 'joining to dummy data' | |
joined_df = pd.merge(dummy_df, df, how='left', on=['biome', 'iso', 'adm1', 'adm2', 'thresh', 'year']) | |
# update all Nan values to be 0, so that they will be included | |
# in the sum when we cumsum | |
joined_df['area_raw'].fillna(0, inplace=True) | |
joined_df['emissions_raw'].fillna(0, inplace=True) | |
print 'grouping by year/iso/thresh' | |
grouped_df = joined_df.groupby(['year', 'biome', 'thresh'])['area_raw', 'emissions_raw'].sum().reset_index() | |
print 'Tabluating cum sum for thresh' | |
# First sort the DF by threshold DESC, then cumsum, grouping by iso and year | |
grouped_df = grouped_df.sort_values('thresh', ascending=False) | |
grouped_df['area'] = grouped_df.groupby(['biome', 'year'])['area_raw'].cumsum() | |
grouped_df['emissions'] = grouped_df.groupby(['biome', 'year'])['emissions_raw'].cumsum() | |
# Delete the area_raw column-- this shouldn't go in the database | |
del grouped_df['area_raw'] | |
del grouped_df['emissions_raw'] | |
writer = pd.ExcelWriter('BRA_biomes.xlsx') | |
print grouped_df | |
for thresh in [10, 15, 20, 25, 30, 50, 75]: | |
df_subset = grouped_df[grouped_df.thresh == thresh] | |
print df_subset | |
df_pivot = df_subset.pivot(index='biome', columns='year', values='area') | |
df_pivot.to_excel(writer, 'thresh{0}'.format(thresh)) | |
writer.save() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment