Skip to content

Instantly share code, notes, and snippets.

@quandyfactory
Created September 11, 2010 02:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quandyfactory/574718 to your computer and use it in GitHub Desktop.
Save quandyfactory/574718 to your computer and use it in GitHub Desktop.
Turns the City of #HamOnt's election page blog into SQL insert queries.
#!/usr/bin/env python
def prepare(blob):
"""
Cleans up the City of Hamilton's blob of text from its candidates page and turns it into a list of dictionaries.
Here's the URL: http://www.hamilton.ca/CityDepartments/CorporateServices/Clerks/MunicipalElection/Nominated+Candidates.htm
We grabbed the source code for the candidates, minus the rest of the HTML boilerplate from the page, and added an octothorpe # before each ward heading to make the ward sections easier to parse.
"""
blobs = blob.split('\n# ')
wards = []
for blob in blobs:
title = blob.split('\n')[0]
wards.append([title, '\n'.join(blob.split('\n')[1:])])
wards[0][0] = 'Mayor' # fix hash in first title
for ward in wards:
thisward = ward[1]
candidates = thisward.split('\n\n')
ward[1] = candidates
final = []
for ward in wards:
thisward = ward[0]
candidates = ward[1]
for candidate in candidates:
if candidate.strip() != '':
d = { 'ward': thisward, 'name': '', 'address': '', 'email': '', 'home_phone': '', 'bus_phone': '', 'fax_number': '' }
while ' ' in candidate:
candidate = candidate.replace(' ', ' ')
candidate = candidate.strip().replace("'", "'") # remove surrounding whitespace, replace sinqle quote with html entity
candidate = candidate.split('\n')
firstline = candidate[0].split(' ')
d['name'] = firstline[0]
d['address'] = ''
for row in candidate:
thisline = row.split(' ')
if thisline[0] != d['name']: d['address'] += ' %s' % (thisline[0])
if len(thisline) > 1:
if thisline[1][:8] == 'E-mail: ':
d['email'] = thisline[1][8:]
elif thisline[1][:12] == 'Home Phone: ':
d['home_phone'] = thisline[1][12:]
elif thisline[1][:16] == 'Business Phone: ':
d['bus_phone'] = thisline[1][16:]
elif thisline[1][:12] == 'Fax Number: ':
d['fax_number'] = thisline[1][12:]
d['address'] = d['address'].strip()
final.append(d)
return final
def make_insert(final, tablename):
"""
Turns the list of candidate dictionaries into an SQL insert statement
"""
fields = 'ward name address email home_phone bus_phone fax_number'.split(' ')
sql_cols = "(%s)" % (", ".join(fields))
sql_inserts = ', '.join(["('%s')" % ("', '".join([l[f] for f in fields])) for l in final])
return 'insert into %s %s values %s' % (tablename, sql_cols, sql_inserts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment