Skip to content

Instantly share code, notes, and snippets.

@garrettdashnelson
Created September 8, 2017 14:42
Show Gist options
  • Save garrettdashnelson/14bc668ea7cb1d2c01a482eb02402c3d to your computer and use it in GitHub Desktop.
Save garrettdashnelson/14bc668ea7cb1d2c01a482eb02402c3d to your computer and use it in GitHub Desktop.
Scraper for VGSI-style property tax database
from bs4 import BeautifulSoup
import urllib2
import re
from string import ascii_uppercase, replace
with open('results.tsv','w') as outfile:
for letter in ascii_uppercase:
street_listing_page = urllib2.urlopen('http://gis.vgsi.com/hanovernh/Streets.aspx?Letter={}'.format(letter))
parsed_street_listing_page = BeautifulSoup(street_listing_page,'html.parser')
for street_url in parsed_street_listing_page.find_all(href=re.compile('Streets.aspx\?Name=')):
req_url = "http://gis.vgsi.com/hanovernh/{}".format(street_url['href'])
single_street_page = urllib2.urlopen(replace(req_url," ","%20"))
parsed_single_street_page = BeautifulSoup(single_street_page,'html.parser')
for property_url in parsed_single_street_page.find_all(href=re.compile('Parcel.aspx')):
single_property_page = urllib2.urlopen("http://gis.vgsi.com/hanovernh/{}".format(property_url['href']))
parsed_single_property_page = BeautifulSoup(single_property_page,'html.parser')
outfile.write( parsed_single_property_page.find(id="MainContent_lblLocation").contents[0] + "\t ")
outfile.write( parsed_single_property_page.find(id="MainContent_lblGenOwner").contents[0] + "\t ")
table = parsed_single_property_page.find(id="MainContent_grdCurrentValueAsmt")
for n in table.find(class_="RowStyle").find_all('td'):
outfile.write( n.contents[0] + "\t ")
outfile.write('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment