Skip to content

Instantly share code, notes, and snippets.

@markng markng/
Created Feb 25, 2012

What would you like to do?
Web Scraping NICAR Python class
import requests
import csv
from pyquery import PyQuery as pq
f = open('strike_all_bills.csv', 'w')
wr = csv.DictWriter(f, ['billno'])
req = requests.get('')
html = pq(req.text)
rows = html('tr.TableHeaderBackground').siblings()
for row in rows:
datarow = {}
datarow['billno'] = pq(row).children()[0].text.strip()
datarow['docurl'] = pq(row).children().children('a').attr('href')
billreq = requests.get(datarow['docurl'])
billhtml = pq(billreq.text)
import requests
import csv
from pyquery import PyQuery as pq
postvars = {
f = open('obamafec.csv', 'w')
wr = csv.DictWriter(f, ['committee_name', 'receipts', 'disb', 'cash', 'debt', 'through'])
req ='', postvars)
html = pq(req.text)
committee_table = pq(html('table')[1])
rows = committee_table('tr')
for row in rows:
row = pq(row)
if len(row.children()) == 6:
datarow = {}
# we have a row with values, not a title
datarow['committee_name'] = pq(row.children()[0]).text()
datarow['receipts'] = pq(row.children()[1]).text()
datarow['disb'] = pq(row.children()[2]).text()
datarow['cash'] = pq(row.children()[3]).text()
datarow['debt'] = pq(row.children()[4]).text()
datarow['through'] = pq(row.children()[5]).text()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.