Skip to content

Instantly share code, notes, and snippets.

@kjam
Created January 8, 2016 11:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kjam/d246a89563b4039d447c to your computer and use it in GitHub Desktop.
Save kjam/d246a89563b4039d447c to your computer and use it in GitHub Desktop.
import agate
import xlrd
from xlrd.sheet import ctype_text
text_type = agate.Text()
number_type = agate.Number()
boolean_type = agate.Boolean()
date_type = agate.Date()
def remove_bad_chars(val):
if val == '-':
return None
return val
def get_types(example_row):
types = []
for v in example_row:
value_type = ctype_text[v.ctype]
if value_type == 'text':
types.append(text_type)
elif value_type == 'number':
types.append(number_type)
elif value_type == 'xldate':
types.append(date_type)
else:
types.append(text_type)
return types
workbook = xlrd.open_workbook('unicef_oct_2014.xlsx')
sheet = workbook.sheets()[0]
title_rows = zip(sheet.row_values(4), sheet.row_values(5))
titles = [t[0] + ' ' + t[1] for t in title_rows]
titles = [t.strip() for t in titles]
country_rows = [sheet.row_values(r) for r in range(6, 114)]
cleaned_rows = []
for row in country_rows:
cleaned_row = [remove_bad_chars(rv) for rv in row]
cleaned_rows.append(cleaned_row)
example_row = sheet.row(6)
types = get_types(example_row)
table = agate.Table(cleaned_rows, titles, types)
ranked = table.compute([('Total Child Labor Rank',
agate.Rank('Total (%)', reverse=True)), ])
cpi_workbook = xlrd.open_workbook(
'corruption_perception_index.xlsx')
cpi_sheet = cpi_workbook.sheets()[0]
cpi_title_rows = zip(cpi_sheet.row_values(1), cpi_sheet.row_values(2))
cpi_titles = [t[0] + ' ' + t[1] for t in cpi_title_rows]
cpi_titles = [t.strip() for t in cpi_titles]
cpi_rows = [cpi_sheet.row_values(r) for r in range(3, cpi_sheet.nrows)]
def get_table(new_arr, types, titles):
try:
table = agate.Table(new_arr, titles, types)
return table
except Exception as e:
print e
cpi_types = get_types(cpi_sheet.row(3))
cpi_titles[0] = cpi_titles[0] + ' Duplicate'
cpi_table = get_table(cpi_rows, cpi_types, cpi_titles)
cpi_and_cl = cpi_table.join(ranked, 'Country / Territory',
'Countries and areas', inner=True)
cpi_and_cl.print_table()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment