Skip to content

Instantly share code, notes, and snippets.

@mappingvermont
Created February 27, 2017 14:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mappingvermont/d0c2ece1f90b766369956d69f4bd1b33 to your computer and use it in GitHub Desktop.
Save mappingvermont/d0c2ece1f90b766369956d69f4bd1b33 to your computer and use it in GitHub Desktop.
cumsum loss data for brazil biomes
import os
import csv
import pandas as pd
def main():
header_list = ['biome', 'iso', 'adm1', 'adm2', 'year', 'thresh', 'area_raw', 'emissions_raw']
print 'Reading df'
df = pd.read_csv('bra_biomes_int_gadm28.csv', names=header_list)
df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
# select only brazil biomes
df = df[df.iso == 'BRA']
# years in this CSV are stored as 1 - 14, should be 2001 - 2014
df['year'] = df['year'] + 2000
# convert area in m2 to area in ha
df['area_raw'] = df['area_raw'] / 10000
# ID full universe of biome/ISO/ADM1/ADM2 combinations
all_combo_df = df.groupby(['biome', 'iso', 'adm1', 'adm2']).size().reset_index()
# delete the count column that was added
del all_combo_df[0]
# create dummy df and store all possible combinations
dummy_df = pd.DataFrame()
# create a record for every combination of threshold
# and year, in addition to the above iso/adm1/adm2
for dummy_thresh in [0, 10, 15, 20, 25, 30, 50, 75]:
for dummy_year in range(2001, 2016):
all_combo_df['thresh'] = dummy_thresh
all_combo_df['year'] = dummy_year
dummy_df = dummy_df.append(all_combo_df)
# outer join our dummy_df to df, so that we get proper
# Nan values where we don't have data
print 'joining to dummy data'
joined_df = pd.merge(dummy_df, df, how='left', on=['biome', 'iso', 'adm1', 'adm2', 'thresh', 'year'])
# update all Nan values to be 0, so that they will be included
# in the sum when we cumsum
joined_df['area_raw'].fillna(0, inplace=True)
joined_df['emissions_raw'].fillna(0, inplace=True)
print 'grouping by year/iso/thresh'
grouped_df = joined_df.groupby(['year', 'biome', 'thresh'])['area_raw', 'emissions_raw'].sum().reset_index()
print 'Tabluating cum sum for thresh'
# First sort the DF by threshold DESC, then cumsum, grouping by iso and year
grouped_df = grouped_df.sort_values('thresh', ascending=False)
grouped_df['area'] = grouped_df.groupby(['biome', 'year'])['area_raw'].cumsum()
grouped_df['emissions'] = grouped_df.groupby(['biome', 'year'])['emissions_raw'].cumsum()
# Delete the area_raw column-- this shouldn't go in the database
del grouped_df['area_raw']
del grouped_df['emissions_raw']
writer = pd.ExcelWriter('BRA_biomes.xlsx')
print grouped_df
for thresh in [10, 15, 20, 25, 30, 50, 75]:
df_subset = grouped_df[grouped_df.thresh == thresh]
print df_subset
df_pivot = df_subset.pivot(index='biome', columns='year', values='area')
df_pivot.to_excel(writer, 'thresh{0}'.format(thresh))
writer.save()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment