Created
December 7, 2017 16:50
-
-
Save migurski/63cb2574bc6fa752c1d666bb4c5ad406 to your computer and use it in GitHub Desktop.
Convert WI ward-level result CSVs from OpenElections to tabular format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' Convert ward-level result CSVs from OpenElections to tabular format. | |
''' | |
import gzip, csv, collections, sys, itertools | |
def key1(row): | |
''' Outer key is the county and ward. | |
''' | |
return row['county'], row['ward'] | |
def key2(row): | |
''' Inner key is the office and party, but not the candidate. | |
''' | |
return row['office'], row['party'] | |
return row['office'], row['party'], row['candidate'] | |
def key3(key2): | |
''' Field name joins the office and party into a string. | |
''' | |
return ' - '.join(key2) | |
def value(row): | |
''' Value is the simple vote count. | |
''' | |
try: | |
return int(row['votes']) | |
except ValueError: | |
return 0 | |
filename1, filename2 = sys.argv[1:] | |
# Results go into a two-dimensional dictionary keyed on location and office. | |
results = collections.defaultdict(lambda: collections.defaultdict(lambda: None)) | |
# Add OpenElections CSV votes into results dictionary. | |
with gzip.open(filename1, 'rt') as file: | |
reader = csv.DictReader(file) | |
for row in reader: | |
curr_value = results[key1(row)][key2(row)] or 0 | |
results[key1(row)][key2(row)] = curr_value + value(row) | |
print(sum([sum(votes.values()) for votes in results.values()]), 'Votes') | |
# Output CSV has a column for each office. | |
fieldnames = ['County', 'Ward'] | |
key2s = set(itertools.chain(*[result.keys() for result in results.values()])) | |
fieldnames.extend(sorted(map(key3, key2s))) | |
# Write output CSV with values everywhere, even if they're zero. | |
with open(filename2, 'w') as file: | |
out = csv.DictWriter(file, fieldnames, dialect='excel-tab') | |
out.writeheader() | |
for ((county, ward), result) in sorted(results.items()): | |
row = {key3(k2): result[k2] for k2 in key2s} | |
row.update(County=county, Ward=ward) | |
out.writerow(row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment