migurski/openelections-table.py

## openelections-table.py
''' Convert precinct-level result CSVs from OpenElections to tabular format.
'''
import gzip, csv, collections, sys, itertools

def key1(row):
    ''' Outer key is the county and precinct.
    '''
    return row['parent_jurisdiction'], row['jurisdiction']

def key2(row):
    ''' Inner key is the office and party, but not the candidate.
    '''
    return row['office'], row['party']
    return row['office'], row['party'], row['name_raw']

def key3(key2):
    ''' Field name joins the office and party into a string.
    '''
    return ' - '.join(key2)

def value(row):
    ''' Value is the simple vote count.
    '''
    try:
        return int(row['votes'])
    except ValueError:
        return 0

filename1, filename2 = sys.argv[1:]

# Results go into a two-dimensional dictionary keyed on location and office.
results = collections.defaultdict(lambda: collections.defaultdict(lambda: None))

# Add OpenElections CSV votes into results dictionary.
with gzip.open(filename1, 'rt') as file:
    reader = csv.DictReader(file)
    for row in reader:
        curr_value = results[key1(row)][key2(row)] or 0
        results[key1(row)][key2(row)] = curr_value + value(row)

print(sum([sum(votes.values()) for votes in results.values()]), 'Votes')

# Output CSV has a column for each office.
fieldnames = ['County', 'Precinct']
key2s = set(itertools.chain(*[result.keys() for result in results.values()]))
fieldnames.extend(sorted(map(key3, key2s)))

# Write output CSV with values everywhere, even if they're zero.
with open(filename2, 'w') as file:
    out = csv.DictWriter(file, fieldnames, dialect='excel-tab')
    out.writeheader()
    for ((county, precinct), result) in sorted(results.items()):
        row = {key3(k2): result[k2] for k2 in key2s}
        row.update(County=county, Precinct=precinct)
        out.writerow(row)
	''' Convert precinct-level result CSVs from OpenElections to tabular format.
	'''
	import gzip, csv, collections, sys, itertools

	def key1(row):
	''' Outer key is the county and precinct.
	'''
	return row['parent_jurisdiction'], row['jurisdiction']

	def key2(row):
	''' Inner key is the office and party, but not the candidate.
	'''
	return row['office'], row['party']
	return row['office'], row['party'], row['name_raw']

	def key3(key2):
	''' Field name joins the office and party into a string.
	'''
	return ' - '.join(key2)

	def value(row):
	''' Value is the simple vote count.
	'''
	try:
	return int(row['votes'])
	except ValueError:
	return 0

	filename1, filename2 = sys.argv[1:]

	# Results go into a two-dimensional dictionary keyed on location and office.
	results = collections.defaultdict(lambda: collections.defaultdict(lambda: None))

	# Add OpenElections CSV votes into results dictionary.
	with gzip.open(filename1, 'rt') as file:
	reader = csv.DictReader(file)
	for row in reader:
	curr_value = results[key1(row)][key2(row)] or 0
	results[key1(row)][key2(row)] = curr_value + value(row)

	print(sum([sum(votes.values()) for votes in results.values()]), 'Votes')

	# Output CSV has a column for each office.
	fieldnames = ['County', 'Precinct']
	key2s = set(itertools.chain(*[result.keys() for result in results.values()]))
	fieldnames.extend(sorted(map(key3, key2s)))

	# Write output CSV with values everywhere, even if they're zero.
	with open(filename2, 'w') as file:
	out = csv.DictWriter(file, fieldnames, dialect='excel-tab')
	out.writeheader()
	for ((county, precinct), result) in sorted(results.items()):
	row = {key3(k2): result[k2] for k2 in key2s}
	row.update(County=county, Precinct=precinct)
	out.writerow(row)