Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created November 10, 2018 22:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/614a707043c2ff92df53ee2e3e864f48 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/614a707043c2ff92df53ee2e3e864f48 to your computer and use it in GitHub Desktop.
# import libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import datetime
# prepare the driver
link = 'https://api01.scarsdale.com/PropertyInquiry/PiSearchByStreet.aspx'
d = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
# get list of roads
d.get(link)
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbStreets"))))
roads = [o.text for o in raw.options]
d.close()
# extract the information
lines = []
errors = []
for road in roads:
try:
d = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
d.get(link)
# loop over road selection
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbStreets"))))
raw.select_by_visible_text(road)
# get list of units for road
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbAddresses"))))
units = [o.text for o in raw.options]
# loop over units selection
for unit in units:
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbAddresses"))))
raw.select_by_visible_text(unit)
line = [road, unit]
table = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "divParcel")))
for row in table.find_elements_by_tag_name('tr'):
for column in row.find_elements_by_tag_name('td'):
line.append(column.text)
lines.append(line)
d.back()
d.back()
d.close()
except:
errors.append(road)
d.close()
# clean data
fields = ["Property Number:", "Property Class", "Current Owner:", "Site No:", "Address:", "Neighborhood Code:", "School District:", "Lot Area:", "Wetlands:", "Zoning:", "County:", "Village:", "School:", "Land:", "Total:", "Full Market Value\nBased on Equalization Rate:", "Year Built:", "Living Area:", "Bldg Style:", "No. Stories:", "Bathrooms:", "Half-Bathrooms:", "Bedrooms:", "Bath Qual:", "Fireplaces:", "Overall Cond:", "Central Air:", "Basement Type:", "Grade:", "Effective Year Built:"]
rows = []
for line in lines:
try:
row = [line[0], line[1]]
for field in fields:
try:
val = line[line.index(field)+1]
except:
val = ""
row.append(val)
rows.append(row)
except:
print(line[0], line[1])
column_names = ["Street", "House"] + fields
scarsdale_properties = pd.DataFrame(rows, columns = column_names)
# export
t = datetime.datetime.now()
scarsdale_properties.to_csv("/Users/erikgregorywebb/Documents/Python/scarsdale/scarsdale_properties_{}.csv".format(t))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment