Skip to content

Instantly share code, notes, and snippets.

@athena15
Last active May 6, 2018 06:29
Show Gist options
  • Save athena15/93fff3a816c4f1248481266de1e83e57 to your computer and use it in GitHub Desktop.
Save athena15/93fff3a816c4f1248481266de1e83e57 to your computer and use it in GitHub Desktop.
Script for scraping pre-foreclosure real estate
from selenium import webdriver
import pandas as pd
import time
import numpy as np
from bs4 import BeautifulSoup
#Read the data from csv file, apply transformations
df = pd.read_csv('ParcelReport2.csv')
parcels = df['Parcel number']
parcels.apply(pd.to_numeric, errors='ignore')
print(df['Parcel number'])
parcel_list=[]
for i in range(len(parcels) - 1):
print(int(parcels[i]))
try:
x = int(parcels[i])
parcel_list.append(x)
except:
continue
print(parcel_list)
# Selenium
browser = webdriver.Firefox()
type(browser)
for parcel in parcel_list:
browser.get('http://info.kingcounty.gov/finance/treasury/propertytax/')
search_box = browser.find_element_by_id('cphContent_RealAccountNumber')
submit_button = browser.find_element_by_id('cphContent_RealSearch')
search_box.send_keys(7228502035)
time.sleep(1)
submit_button.click()
time.sleep(1)
# street_address = browser.find_element_by_id('cphContent_MailingAddress')
owed_2017 = browser.find_element_by_xpath('/html/body/form/table/tbody/tr/td[2]/table/tbody/tr[2]/td/div/div[5]/div[1]/table[2]/tbody/tr/td[1]/table/tbody/tr[11]/td[2]/div/strong').get_attribute('innerHTML')
owed_2016 = browser.find_element_by_xpath('/html/body/form/table/tbody/tr/td[2]/table/tbody/tr[2]/td/div/div[5]/div[1]/table[3]/tbody/tr/td[1]/table/tbody/tr[11]/td[2]/div/strong').get_attribute('innerHTML')
owed_2015 = browser.find_element_by_xpath('/html/body/form/table/tbody/tr/td[2]/table/tbody/tr[2]/td/div/div[5]/div[1]/table[4]/tbody/tr/td[1]/table/tbody/tr[11]/td[2]/div/strong').get_attribute('innerHTML')
# street_address_output = street_address.get_attribute('innerHTML')
output2017 = owed_2017.get_attribute('innerHTML')
output2016 = owed_2016.get_attribute('innerHTML')
output2015 = owed_2015.get_attribute('innerHTML')
print(f'Taxes owed for plot #{parcel}: 2017 = {owed_2017}, 2016 = {owed_2016}, 2015 = {owed_2015}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment