Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import json
import csv
from io import StringIO
import requests
from lxml import etree
resp = requests.get('http://www.barackobama.com/contribution-disclosure/')
parsed = etree.parse(StringIO(resp.text), parser=etree.HTMLParser())
data_url_dict = parsed.xpath('/html/body/script[4]')[0]
letters = eval(data_url_dict.text.strip().replace("letters = ",""))
data = []
for letter in letters:
new = eval(requests.get(letters[letter]).text.replace("drawNames(","").replace(");",""))
data.extend(new)
json.dump(data, open('OFA_donors.json','w'))
keyset = set([])
for ks in [d.keys() for d in data]:
for k in ks:
keyset.add(k)
dw = csv.DictWriter(open('OFA_donors.csv', 'w'), list(keyset))
dw.writeheader()
dw.writerows(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.