Skip to content
Create a gist now

Instantly share code, notes, and snippets.

Embed URL


Subversion checkout URL

You can clone with
Download ZIP
Web Scraping NICAR Python class
import requests
import csv
from pyquery import PyQuery as pq
f = open('strike_all_bills.csv', 'w')
wr = csv.DictWriter(f, ['billno'])
req = requests.get('')
html = pq(req.text)
rows = html('tr.TableHeaderBackground').siblings()
for row in rows:
datarow = {}
datarow['billno'] = pq(row).children()[0].text.strip()
datarow['docurl'] = pq(row).children().children('a').attr('href')
billreq = requests.get(datarow['docurl'])
billhtml = pq(billreq.text)
import requests
import csv
from pyquery import PyQuery as pq
postvars = {
f = open('obamafec.csv', 'w')
wr = csv.DictWriter(f, ['committee_name', 'receipts', 'disb', 'cash', 'debt', 'through'])
req ='', postvars)
html = pq(req.text)
committee_table = pq(html('table')[1])
rows = committee_table('tr')
for row in rows:
row = pq(row)
if len(row.children()) == 6:
datarow = {}
# we have a row with values, not a title
datarow['committee_name'] = pq(row.children()[0]).text()
datarow['receipts'] = pq(row.children()[1]).text()
datarow['disb'] = pq(row.children()[2]).text()
datarow['cash'] = pq(row.children()[3]).text()
datarow['debt'] = pq(row.children()[4]).text()
datarow['through'] = pq(row.children()[5]).text()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.