mappingvermont/results_to_table_brazil_biome.py

## results_to_table_brazil_biome.py
import os
import csv
import pandas as pd


def main():

    header_list = ['biome', 'iso', 'adm1', 'adm2', 'year', 'thresh', 'area_raw', 'emissions_raw']

    print 'Reading df'
    df = pd.read_csv('bra_biomes_int_gadm28.csv', names=header_list)
    df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

    # select only brazil biomes
    df = df[df.iso == 'BRA']

    # years in this CSV are stored as 1 - 14, should be 2001 - 2014
    df['year'] = df['year'] + 2000

    # convert area in m2 to area in ha
    df['area_raw'] = df['area_raw'] / 10000

    # ID full universe of biome/ISO/ADM1/ADM2 combinations
    all_combo_df = df.groupby(['biome', 'iso', 'adm1', 'adm2']).size().reset_index()

    # delete the count column that was added
    del all_combo_df[0]

    # create dummy df and store all possible combinations
    dummy_df = pd.DataFrame()

    # create a record for every combination of threshold
    # and year, in addition to the above iso/adm1/adm2
    for dummy_thresh in [0, 10, 15, 20, 25, 30, 50, 75]:
        for dummy_year in range(2001, 2016):
            all_combo_df['thresh'] = dummy_thresh
            all_combo_df['year'] = dummy_year
            dummy_df = dummy_df.append(all_combo_df)

    # outer join our dummy_df to df, so that we get proper
    # Nan values where we don't have data
    print 'joining to dummy data'
    joined_df = pd.merge(dummy_df, df, how='left', on=['biome', 'iso', 'adm1', 'adm2', 'thresh', 'year'])

    # update all Nan values to be 0, so that they will be included
    # in the sum when we cumsum
    joined_df['area_raw'].fillna(0, inplace=True)
    joined_df['emissions_raw'].fillna(0, inplace=True)

    print 'grouping by year/iso/thresh'
    grouped_df = joined_df.groupby(['year', 'biome', 'thresh'])['area_raw', 'emissions_raw'].sum().reset_index()

    print 'Tabluating cum sum for thresh'
    # First sort the DF by threshold DESC, then cumsum, grouping by iso and year
    grouped_df = grouped_df.sort_values('thresh', ascending=False)
    grouped_df['area'] = grouped_df.groupby(['biome', 'year'])['area_raw'].cumsum()
    grouped_df['emissions'] = grouped_df.groupby(['biome', 'year'])['emissions_raw'].cumsum()

    # Delete the area_raw column-- this shouldn't go in the database
    del grouped_df['area_raw']
    del grouped_df['emissions_raw']

    writer = pd.ExcelWriter('BRA_biomes.xlsx')

    print grouped_df

    for thresh in [10, 15, 20, 25, 30, 50, 75]:
        df_subset = grouped_df[grouped_df.thresh == thresh]
        print df_subset

        df_pivot = df_subset.pivot(index='biome', columns='year', values='area')
        df_pivot.to_excel(writer, 'thresh{0}'.format(thresh))

    writer.save()


if __name__ == '__main__':
    main()
	import os
	import csv
	import pandas as pd


	def main():

	header_list = ['biome', 'iso', 'adm1', 'adm2', 'year', 'thresh', 'area_raw', 'emissions_raw']

	print 'Reading df'
	df = pd.read_csv('bra_biomes_int_gadm28.csv', names=header_list)
	df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

	# select only brazil biomes
	df = df[df.iso == 'BRA']

	# years in this CSV are stored as 1 - 14, should be 2001 - 2014
	df['year'] = df['year'] + 2000

	# convert area in m2 to area in ha
	df['area_raw'] = df['area_raw'] / 10000

	# ID full universe of biome/ISO/ADM1/ADM2 combinations
	all_combo_df = df.groupby(['biome', 'iso', 'adm1', 'adm2']).size().reset_index()

	# delete the count column that was added
	del all_combo_df[0]

	# create dummy df and store all possible combinations
	dummy_df = pd.DataFrame()

	# create a record for every combination of threshold
	# and year, in addition to the above iso/adm1/adm2
	for dummy_thresh in [0, 10, 15, 20, 25, 30, 50, 75]:
	for dummy_year in range(2001, 2016):
	all_combo_df['thresh'] = dummy_thresh
	all_combo_df['year'] = dummy_year
	dummy_df = dummy_df.append(all_combo_df)

	# outer join our dummy_df to df, so that we get proper
	# Nan values where we don't have data
	print 'joining to dummy data'
	joined_df = pd.merge(dummy_df, df, how='left', on=['biome', 'iso', 'adm1', 'adm2', 'thresh', 'year'])

	# update all Nan values to be 0, so that they will be included
	# in the sum when we cumsum
	joined_df['area_raw'].fillna(0, inplace=True)
	joined_df['emissions_raw'].fillna(0, inplace=True)

	print 'grouping by year/iso/thresh'
	grouped_df = joined_df.groupby(['year', 'biome', 'thresh'])['area_raw', 'emissions_raw'].sum().reset_index()

	print 'Tabluating cum sum for thresh'
	# First sort the DF by threshold DESC, then cumsum, grouping by iso and year
	grouped_df = grouped_df.sort_values('thresh', ascending=False)
	grouped_df['area'] = grouped_df.groupby(['biome', 'year'])['area_raw'].cumsum()
	grouped_df['emissions'] = grouped_df.groupby(['biome', 'year'])['emissions_raw'].cumsum()

	# Delete the area_raw column-- this shouldn't go in the database
	del grouped_df['area_raw']
	del grouped_df['emissions_raw']

	writer = pd.ExcelWriter('BRA_biomes.xlsx')

	print grouped_df

	for thresh in [10, 15, 20, 25, 30, 50, 75]:
	df_subset = grouped_df[grouped_df.thresh == thresh]
	print df_subset

	df_pivot = df_subset.pivot(index='biome', columns='year', values='area')
	df_pivot.to_excel(writer, 'thresh{0}'.format(thresh))

	writer.save()


	if __name__ == '__main__':
	main()