python-legi-write-bio
Python scraper to pull Wisconsin state senator and state representative district contact information and biographies into a text file or csv.
import itertools | |
import requests | |
import lxml | |
from lxml import html | |
from django.utils.encoding import smart_str, smart_unicode | |
#opens text file for output, names it output | |
file = open('output.txt', 'w') | |
endpoint = 99 | |
district = 1 | |
while district <= endpoint: | |
#search URL and assign to variable r | |
r = requests.get('http://legis.wisconsin.gov/w3asp/contact/legislatorpages.aspx?house=Assembly&district=' + str(district) + '&display=bio') | |
#create variable tree from r's content | |
tree = lxml.html.fromstring(r.content) | |
#search the tree for the given element | |
elements = tree.cssselect("div.indent span") | |
#for each element in the variable | |
for el in elements: | |
#set data to the content | |
data = el.text_content().strip().encode('utf-8') | |
#display the data | |
print data | |
#write the data to the file | |
file.write(data) | |
district = district + 1 | |
#close the file | |
file.close() |