Skip to content

Instantly share code, notes, and snippets.

@timwis
Last active December 2, 2016 21:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timwis/d33d2ac4f00e54d320ba81693fec4a3b to your computer and use it in GitHub Desktop.
Save timwis/d33d2ac4f00e54d320ba81693fec4a3b to your computer and use it in GitHub Desktop.
Voter turnout processing
import click
from registry import process_registry
from turnout import process_turnout
@click.command()
@click.option('--registry', '-r', 'registry_file', type=click.Path(),
required=True, help='Qualified voter registry file')
@click.option('--turnout', '-t', 'turnout_file', type=click.Path(),
required=True, help='Voter turnout file')
def process (registry_file, turnout_file):
"""Cleans and combines registry and turnout files"""
registry = process_registry(registry_file)
turnout = process_turnout(turnout_file)
registry.leftjoin(turnout, key=['ward', 'party']) \
.tocsv()
if __name__ == '__main__':
process()
from collections import OrderedDict
import petl as etl
aggregation = OrderedDict()
aggregation['sum_democratic'] = 'democratic', sum
aggregation['sum_republican'] = 'republican', sum
aggregation['sum_total'] = 'total', sum
def rowgenerator(row):
yield [row['ward'], 'democratic', row['sum_total'], row['sum_democratic']]
yield [row['ward'], 'republican', row['sum_total'], row['sum_republican']]
def process_registry(filepath):
table = etl.fromcsv(filepath) \
.rename({'Ward': 'ward',
'Division': 'division',
'Dem': 'democratic',
'Rep': 'republican',
'Total': 'total'}) \
.cut('ward', 'division', 'democratic', 'republican', 'total') \
.convert({'ward': 'strip',
'division': int,
'democratic': int,
'republican': int,
'total': int}) \
.aggregate('ward', aggregation) \
.rowmapmany(rowgenerator, header=['ward', 'party', 'registered_total',
'registered_party'])
return table
import petl as etl
def create_ward(row):
# Get first 2 characters, trim leading zeros, convert to int
return row['ward_div'][:2].lstrip('0')
def create_division(row):
# Get last 2 characters, trim leading zeros, convert to int
return int(row['ward_div'][2:].lstrip('0'))
def process_turnout(filepath):
table = etl.fromcsv(filepath) \
.rename({'Precinct Code': 'ward_div',
'Political Party': 'party',
'Voter Count': 'voters'}) \
.cut('ward_div', 'party', 'voters') \
.convert({'voters': int,
'party': 'lower'}) \
.addfield('ward', create_ward, index=0) \
.addfield('division', create_division, index=1) \
.cutout('ward_div')
total_turnout = table \
.aggregate('ward', aggregation=sum, value='voters') \
.rename('value', 'turnout_total')
party_turnout = table \
.select('{party} == "democratic" or {party} == "republican"') \
.aggregate(key=('ward', 'party'), aggregation=sum, value='voters') \
.rename('value', 'turnout_party')
combined_turnout = party_turnout.leftjoin(total_turnout, key='ward')
return combined_turnout
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment