# import libraries | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.select import Select | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import pandas as pd | |
import datetime | |
# prepare the driver | |
link = 'https://api01.scarsdale.com/PropertyInquiry/PiSearchByStreet.aspx' | |
d = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2") | |
# get list of roads | |
d.get(link) | |
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbStreets")))) | |
roads = [o.text for o in raw.options] | |
d.close() | |
# extract the information | |
lines = [] | |
errors = [] | |
for road in roads: | |
try: | |
d = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2") | |
d.get(link) | |
# loop over road selection | |
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbStreets")))) | |
raw.select_by_visible_text(road) | |
# get list of units for road | |
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbAddresses")))) | |
units = [o.text for o in raw.options] | |
# loop over units selection | |
for unit in units: | |
raw = Select(WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "lbAddresses")))) | |
raw.select_by_visible_text(unit) | |
line = [road, unit] | |
table = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.ID, "divParcel"))) | |
for row in table.find_elements_by_tag_name('tr'): | |
for column in row.find_elements_by_tag_name('td'): | |
line.append(column.text) | |
lines.append(line) | |
d.back() | |
d.back() | |
d.close() | |
except: | |
errors.append(road) | |
d.close() | |
# clean data | |
fields = ["Property Number:", "Property Class", "Current Owner:", "Site No:", "Address:", "Neighborhood Code:", "School District:", "Lot Area:", "Wetlands:", "Zoning:", "County:", "Village:", "School:", "Land:", "Total:", "Full Market Value\nBased on Equalization Rate:", "Year Built:", "Living Area:", "Bldg Style:", "No. Stories:", "Bathrooms:", "Half-Bathrooms:", "Bedrooms:", "Bath Qual:", "Fireplaces:", "Overall Cond:", "Central Air:", "Basement Type:", "Grade:", "Effective Year Built:"] | |
rows = [] | |
for line in lines: | |
try: | |
row = [line[0], line[1]] | |
for field in fields: | |
try: | |
val = line[line.index(field)+1] | |
except: | |
val = "" | |
row.append(val) | |
rows.append(row) | |
except: | |
print(line[0], line[1]) | |
column_names = ["Street", "House"] + fields | |
scarsdale_properties = pd.DataFrame(rows, columns = column_names) | |
# export | |
t = datetime.datetime.now() | |
scarsdale_properties.to_csv("/Users/erikgregorywebb/Documents/Python/scarsdale/scarsdale_properties_{}.csv".format(t)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment