Last active
December 7, 2017 16:50
-
-
Save migurski/ecdba9e77fa30c3a62f154820d695672 to your computer and use it in GitHub Desktop.
Convert NC precinct-level result CSVs from OpenElections to tabular format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' Convert precinct-level result CSVs from OpenElections to tabular format. | |
''' | |
import gzip, csv, collections, sys, itertools | |
def key1(row): | |
''' Outer key is the county and precinct. | |
''' | |
return row['parent_jurisdiction'], row['jurisdiction'] | |
def key2(row): | |
''' Inner key is the office and party, but not the candidate. | |
''' | |
return row['office'], row['party'] | |
return row['office'], row['party'], row['name_raw'] | |
def key3(key2): | |
''' Field name joins the office and party into a string. | |
''' | |
return ' - '.join(key2) | |
def value(row): | |
''' Value is the simple vote count. | |
''' | |
try: | |
return int(row['votes']) | |
except ValueError: | |
return 0 | |
filename1, filename2 = sys.argv[1:] | |
# Results go into a two-dimensional dictionary keyed on location and office. | |
results = collections.defaultdict(lambda: collections.defaultdict(lambda: None)) | |
# Add OpenElections CSV votes into results dictionary. | |
with gzip.open(filename1, 'rt') as file: | |
reader = csv.DictReader(file) | |
for row in reader: | |
curr_value = results[key1(row)][key2(row)] or 0 | |
results[key1(row)][key2(row)] = curr_value + value(row) | |
print(sum([sum(votes.values()) for votes in results.values()]), 'Votes') | |
# Output CSV has a column for each office. | |
fieldnames = ['County', 'Precinct'] | |
key2s = set(itertools.chain(*[result.keys() for result in results.values()])) | |
fieldnames.extend(sorted(map(key3, key2s))) | |
# Write output CSV with values everywhere, even if they're zero. | |
with open(filename2, 'w') as file: | |
out = csv.DictWriter(file, fieldnames, dialect='excel-tab') | |
out.writeheader() | |
for ((county, precinct), result) in sorted(results.items()): | |
row = {key3(k2): result[k2] for k2 in key2s} | |
row.update(County=county, Precinct=precinct) | |
out.writerow(row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment