Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created February 20, 2023 04:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/043f612df454058b901194372c2c9fdb to your computer and use it in GitHub Desktop.
Save erikgregorywebb/043f612df454058b901194372c2c9fdb to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rvest)
# read in parcel list
url = 'https://gist.githubusercontent.com/erikgregorywebb/741b472a8ae1bb97d617eda3d6aaea3d/raw/aef7b5083670659788a009185e0e055d0ac1b350/gila-buttes-parcel-search.csv'
parcels_df = read_csv(url)
parcels = parcels_df %>% pull(`PARCEL NUMBER`) %>% str_replace_all(., '-', '')
# scrape details
datalist = list()
for (i in 1:length(parcels)) {
Sys.sleep(1)
# read page
url = paste('https://app1.pinal.gov/Search/Parcel-Details.aspx?parcel_ID=', parcels[i], sep = '')
print(url)
print(paste(i, ' of ', length(parcels), sep = ''))
page = read_html(url)
# grab fields
parcel_number = page %>% html_node('#lblParcelNum') %>% html_text2()
s_t_r = page %>% html_node('#lblSTR') %>% html_text2()
property_description = page %>% html_node('#lblPropDesc') %>% html_text2()
subdivision = page %>% html_node('#lblSubdivision') %>% html_text2()
unit = page %>% html_node('#lblUnit') %>% html_text2()
block = page %>% html_node('#lblBlock') %>% html_text2()
lot = page %>% html_node('#lblLot') %>% html_text2()
phase = page %>% html_node('#lblPhase') %>% html_text2()
cabinet = page %>% html_node('#lblCabinet') %>% html_text2()
slide = page %>% html_node('#lblSlide') %>% html_text2()
owner_1 = page %>% html_node('#lblOwner1') %>% html_text2()
owner_2 = page %>% html_node('#lblOwner2') %>% html_text2()
in_care_of = page %>% html_node('#lblCareof') %>% html_text2()
mailing_address = page %>% html_node('#lblMailingAddress') %>% html_text2()
property_address = page %>% html_node('#lblPropAddress') %>% html_text2()
recording_date = page %>% html_node('#lblRecDate') %>% html_text2()
sale_amount = page %>% html_node('#lblSaleAmt') %>% html_text2()
exemption_widow = page %>% html_node('#lblWidow') %>% html_text2()
exemption_widower = page %>% html_node('#lblWidower') %>% html_text2()
exemption_disabled = page %>% html_node('#lblDisabled') %>% html_text2()
exemption_senior_freeze = page %>% html_node('#lblSrFreeze') %>% html_text2()
temp = tibble(
parcel_number = parcel_number,
s_t_r = s_t_r,
property_description = property_description,
subdivision = subdivision,
unit = unit,
block = block,
lot = lot,
phase = phase,
cabinet = cabinet,
slide = slide,
owner_1 = owner_1,
owner_2 = owner_2,
in_care_of = in_care_of,
mailing_address = mailing_address,
property_address = property_address,
recording_date = recording_date,
sale_amount = sale_amount,
exemption_widow = exemption_widow,
exemption_widower = exemption_widower,
exemption_disabled = exemption_disabled,
exemption_senior_freeze = exemption_senior_freeze
)
datalist[[i]] = temp
}
raw = do.call(rbind, datalist)
# clean, export
setwd("~/Downloads")
write_csv(raw, 'gila-buttes-parcel-details.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment