Skip to content

Instantly share code, notes, and snippets.

@eoglethorpe
Created March 10, 2016 10:42
Show Gist options
  • Save eoglethorpe/c8e1105f906f3fc925a5 to your computer and use it in GitHub Desktop.
Save eoglethorpe/c8e1105f906f3fc925a5 to your computer and use it in GitHub Desktop.
__author__ = 'eoglethorpe'
"""take election info from
http://projects.fivethirtyeight.com/election-2016/delegate-targets/democrats/
and convert it to JSON
"""
import json
import requests
from lxml import html
URL = 'http://projects.fivethirtyeight.com/election-2016/delegate-targets/democrats/'
def get_data():
page = requests.get(URL)
tree = html.fromstring(page.content)
tbl = tree.xpath('//*[@id="state-list"]/table[1]/tbody')[0]
states = []
#find children of results cols (3,4) as the values are children
for r in tbl:
cur_state = []
for i,c in enumerate(r):
if i < 3:
add = c.text
else:
add = c.getchildren()[0].text
cur_state.append(add)
states.append(cur_state)
return states
def clean_data():
#fix dates and results column
states = get_data()
#states[x]: date | state_nm | num_del_tot | del_won_hil | del_win_bern
cur_date = states[0][0]
for r in states:
#dates
if not r[0]:
r[0] = cur_date
else:
cur_date = r[0]
#results (cell is in format of result/needed prediction... don't need prediction)
if r[3][0] == '/':
r[3] = None
r[4] = None
else:
r[3] = r[3].split('/')[0]
r[4] = r[4].split('/')[0]
return states
def jsonify():
conv = []
for r in clean_data():
conv.append({'date' : r[0],
'state' : r[1],
'num_del_tot' : r[2],
'del_won_hill' : r[3],
'del_won_bern' : r[4]
})
return json.dumps(conv)
print jsonify()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment