Skip to content

Instantly share code, notes, and snippets.

@tristanwietsma
Created October 21, 2013 04:12
Show Gist options
  • Save tristanwietsma/7078569 to your computer and use it in GitHub Desktop.
Save tristanwietsma/7078569 to your computer and use it in GitHub Desktop.
Real estate tax web scraper
import sys, re
from string import join
import requests
r = requests.get("http://www.co.kane.il.us/TaxAssessment/Treasurer.aspx?parcelnumber=" + sys.argv[1])
text = r.text
ADDRESS = """<span id="lblPropertyAddress[0-9]" style="display:inline-block;">([A-Za-z0-9 ,-.]*)</span><br />"""
TAXES = """TaxYear=(\d\d\d\d)">(\d\d\d\d|Current Year)</a></td><td>([0-9.]*)</td>"""
RATE = """<span id="lblTaxRate" style="display:inline-block;"><font face="Arial">([0-9.]*)</font></span><br />"""
CODE = """<span id="lblTaxCode" style="display:inline-block;"><font face="Arial">([A-Za-z0-9-]*)</font></span>"""
data = {}
address = re.findall(ADDRESS, text)
data['address'] = join([join([t.strip(' -') for t in addr.split()], ' ') for addr in address],', ')
taxes = re.findall(TAXES, text)
data['taxes'] = [(t[0], t[2]) for t in taxes]
data['taxes'].sort()
rate = re.findall(RATE, text)
data['rate'] = rate[0]
code = re.findall(CODE, text)
data['code'] = code[0]
print data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment