markng/part1.py

## part1.py
import requests
import csv
from pyquery import PyQuery as pq

f = open('strike_all_bills.csv', 'w')
wr = csv.DictWriter(f, ['billno'])

req = requests.get('http://www.azleg.gov/StrikeEverything.asp')
html = pq(req.text)
rows = html('tr.TableHeaderBackground').siblings()
for row in rows:
    datarow = {}
    datarow['billno'] = pq(row).children()[0].text.strip()
    datarow['docurl'] = pq(row).children().children('a').attr('href')
    billreq = requests.get(datarow['docurl'])
    billhtml = pq(billreq.text)
    wr.writerow(datarow)

## part2.py
import requests
import csv
from pyquery import PyQuery as pq

postvars = {
    'dbyear':12,
    'cancom':2,
    'name':"OBAMA",
}
f = open('obamafec.csv', 'w')
wr = csv.DictWriter(f, ['committee_name', 'receipts', 'disb', 'cash', 'debt', 'through'])

req = requests.post('http://query.nictusa.com/cgi-bin/cancomsrs/', postvars)
html = pq(req.text)
committee_table = pq(html('table')[1])
rows = committee_table('tr')
for row in rows:
    row = pq(row)
    if len(row.children()) == 6:
        datarow = {}
        # we have a row with values, not a title
        datarow['committee_name'] = pq(row.children()[0]).text()
        datarow['receipts'] = pq(row.children()[1]).text()
        datarow['disb'] = pq(row.children()[2]).text()
        datarow['cash'] = pq(row.children()[3]).text()
        datarow['debt'] = pq(row.children()[4]).text()
        datarow['through'] = pq(row.children()[5]).text()
        wr.writerow(datarow)
	import requests
	import csv
	from pyquery import PyQuery as pq

	f = open('strike_all_bills.csv', 'w')
	wr = csv.DictWriter(f, ['billno'])

	req = requests.get('http://www.azleg.gov/StrikeEverything.asp')
	html = pq(req.text)
	rows = html('tr.TableHeaderBackground').siblings()
	for row in rows:
	datarow = {}
	datarow['billno'] = pq(row).children()[0].text.strip()
	datarow['docurl'] = pq(row).children().children('a').attr('href')
	billreq = requests.get(datarow['docurl'])
	billhtml = pq(billreq.text)
	wr.writerow(datarow)