Skip to content

Instantly share code, notes, and snippets.

@oxguy3
Created June 26, 2015 02:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oxguy3/62caceb5767996bd4703 to your computer and use it in GitHub Desktop.
Save oxguy3/62caceb5767996bd4703 to your computer and use it in GitHub Desktop.
scrape property tax PINs from Cook County IL's website
from lxml import html
import requests
import sqlite3
import sys
con = sqlite3.connect("cook_county.db")
con.execute("CREATE TABLE IF NOT EXISTS properties(pin TEXT(18) UNIQUE, number TEXT(63), street TEXT(255), unit TEXT(255), city TEXT(255), zip TEXT(5))")
con.execute("DELETE FROM properties")
with con:
cur = con.cursor()
for qCity in ("e", "u", "p", "d", "l", "r", "chicago"):
for qNum in "0123456789":
for qStreet in "abcdefghijklmnopqrstuvwxyz":
print "Starting", qCity, qNum, qStreet, ". . .",
url = 'http://www.cookcountypropertyinfo.com/Pages/Address-Results.aspx?hnum=' + qNum + '&sname=' + qStreet + '%20&city=' + qCity +'&zip=&unit=&dir='
page = requests.get(url)
dom = html.fromstring(page.text)
propertiesRaw = dom.xpath('//*[@id="ctl00_PlaceHolderMain_ctl00_resultsPanel"]/a/text()')
for propRaw in propertiesRaw:
splitLeftParen = propRaw.split('(')
if (len(splitLeftParen) > 2):
lastSegment = splitLeftParen.pop()
splitLeftParen = ('('.join(splitLeftParen), lastSegment)
fullAddress = splitLeftParen[0].strip()
splitAddress = fullAddress.split(' ', 1)
hnum = splitAddress[0].strip()
splitStreet = splitAddress[1].split("Unit", 1)
street = splitStreet[0].strip()
unit = None
if (len(splitStreet) > 1):
unit = splitStreet[1].strip()
splitRightParen = splitLeftParen[1].split(')', 1)
splitComma = splitRightParen[0].split(', ', 1)
city = splitComma[0].strip()
zipCode = splitComma[1].strip()
pin = splitRightParen[1].split('-', 1)[1].strip()
cur.execute("INSERT OR IGNORE INTO properties VALUES (:pin, :hnum, :street, :unit, :city, :zipCode)",\
{"pin": pin, "hnum": hnum, "street": street, "unit": unit, "city": city, "zipCode": zipCode})
con.commit()
print " ", len(propertiesRaw), "\trecords parsed!"
print
print "SCRAPING COMPLETE!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment