zstumgoren/election_results.py

## election_results.py
import csv, urllib
from operator import itemgetter
from collections import defaultdict
from os.path import dirname, join

url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv"
filename = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv')
urllib.urlretrieve(url, filename)

reader = csv.DictReader(open(filename, 'rb'))
# defaultdict auto-creates non-existent keys with an empty dictionary as the default value.
results = defaultdict(dict)
# Initial data clean-up
for row in reader:
    # Parse name into first and last
    row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
    # Convert total votes to an integer
    row['votes'] = int(row['votes'])
    # Store county-level results by slugified office and district (if there is one),
    # then by candidate party and raw name
    race_key = row['office']
    if row['district']:
        race_key += "-%s" % row['district']
    # Create unique candidate key from party and name, in case multiple candidates have same
    cand_key = "-".join((row['party'], row['candidate']))
    # Get/create race in results dict
    race = results[race_key]
    # setdefault creates empty dict and list for a key if it doesn't already exist.
    race.setdefault(cand_key, []).append(row)

# Tally votes for Races and candidates and assign winners
summary = defaultdict(dict)
for race_key, cand_results in results.items():
    all_votes = 0
    cands = []
    for cand_key, results in cand_results.items():
        # Populate a new candidate dict using one set of county results
        cand = {
            'first_name': results[0]['first_name'],
            'last_name': results[0]['last_name'],
            'party': results[0]['party'],
            'winner': '',
        }
        # Calculate candidate total votes
        cand_total_votes = sum([result['votes'] for result in results])
        cand['votes'] =  cand_total_votes
        # Add cand totals to racewide vote count
        all_votes += cand_total_votes
        # And stash the candidate's data
        cands.append(cand)

    # sort cands from highest to lowest vote count
    sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True)
    # Determine winner, if any
    first = sorted_cands[0]
    second = sorted_cands[1]
    if first['votes'] != second['votes']:
        first['winner'] = 'X'
    # Get race metadata from one set of results
    result = cand_results.values()[0][0]
    # Add results to output
    summary[race_key] = {
        'date': result['date'],
        'office': result['office'],
        'district': result['district'],
        'all_votes': all_votes,
        'candidates': sorted_cands,
    }

outfile = join(dirname(__file__), 'summary_results.csv')
with open(outfile, 'wb') as fh:
    # We'll limit the output to cleanly parsed, standardized values
    fieldnames = [
	'date', 'office', 'district', 'last_name',
        'first_name','party', 'all_votes', 'votes', 'winner',
    ]
    writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
    writer.writeheader()
    for race, results in summary.items():
        cands = results.pop('candidates')
        for cand in cands:
            results.update(cand)
            writer.writerow(results)
	import csv, urllib
	from operator import itemgetter
	from collections import defaultdict
	from os.path import dirname, join

	url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv"
	filename = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv')
	urllib.urlretrieve(url, filename)

	reader = csv.DictReader(open(filename, 'rb'))
	# defaultdict auto-creates non-existent keys with an empty dictionary as the default value.
	results = defaultdict(dict)
	# Initial data clean-up
	for row in reader:
	# Parse name into first and last
	row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
	# Convert total votes to an integer
	row['votes'] = int(row['votes'])
	# Store county-level results by slugified office and district (if there is one),
	# then by candidate party and raw name
	race_key = row['office']
	if row['district']:
	race_key += "-%s" % row['district']
	# Create unique candidate key from party and name, in case multiple candidates have same
	cand_key = "-".join((row['party'], row['candidate']))
	# Get/create race in results dict
	race = results[race_key]
	# setdefault creates empty dict and list for a key if it doesn't already exist.
	race.setdefault(cand_key, []).append(row)

	# Tally votes for Races and candidates and assign winners
	summary = defaultdict(dict)
	for race_key, cand_results in results.items():
	all_votes = 0
	cands = []
	for cand_key, results in cand_results.items():
	# Populate a new candidate dict using one set of county results
	cand = {
	'first_name': results[0]['first_name'],
	'last_name': results[0]['last_name'],
	'party': results[0]['party'],
	'winner': '',
	}
	# Calculate candidate total votes
	cand_total_votes = sum([result['votes'] for result in results])
	cand['votes'] = cand_total_votes
	# Add cand totals to racewide vote count
	all_votes += cand_total_votes
	# And stash the candidate's data
	cands.append(cand)

	# sort cands from highest to lowest vote count
	sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True)
	# Determine winner, if any
	first = sorted_cands[0]
	second = sorted_cands[1]
	if first['votes'] != second['votes']:
	first['winner'] = 'X'
	# Get race metadata from one set of results
	result = cand_results.values()[0][0]
	# Add results to output
	summary[race_key] = {
	'date': result['date'],
	'office': result['office'],
	'district': result['district'],
	'all_votes': all_votes,
	'candidates': sorted_cands,
	}

	outfile = join(dirname(__file__), 'summary_results.csv')
	with open(outfile, 'wb') as fh:
	# We'll limit the output to cleanly parsed, standardized values
	fieldnames = [
	'date', 'office', 'district', 'last_name',
	'first_name','party', 'all_votes', 'votes', 'winner',
	]
	writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
	writer.writeheader()
	for race, results in summary.items():
	cands = results.pop('candidates')
	for cand in cands:
	results.update(cand)
	writer.writerow(results)