Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
## R script to check TAD building age information against Zillow API
library(foreign)
library(xml2)
library(dplyr)
dbf <- "fw_footprints_mb.dbf"
dat <- read.dbf(dbf, as.is = TRUE)
set.seed(1983)
# Get sample of addresses from the buildings DBF
samp <- sample(dat$ADDRESS, 1000, replace = FALSE)
# You can get your own API key from Zillow
api_key <- "XXX"
# Format the address for the API call
formatted <- gsub(pattern = " ", replacement = "+", samp)
# Function to pull data from the Zillow API
get_year <- function(address) {
api_1 <- paste0("http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=",
api_key, "&address=")
api_2 <- "&citystatezip=Fort+Worth%2C+TX"
api_string <- paste0(api_1, address, api_2)
api_call <- read_xml(api_string)
find_year <- xml_find_all(api_call, ".//yearBuilt")
yb <- xml_text(find_year)
return(yb)
}
# Call the function over the vector of addresses
yrs <- sapply(formatted, function(x) get_year(x))
# Clean up
vec <- unlist(yrs)
df <- data.frame(address = names(vec), yearBuilt = vec, row.names = NULL)
df$address <- gsub("\\+", " ", df$address)
df$address <- gsub("\\d*$", "", df$address)
dup <- duplicated(df$address)
df2 <- df[!dup, ]
# Check for matches
check <- inner_join(dat, df2, by = c("ADDRESS" = "address"))
check_unique <- check[!duplicated(check$ADDRESS), ]
matches <- check_unique$year_label == check_unique$yearBuilt
# See where the match is TRUE
summary(matches)
# Check to see how many unmatched addresses were within 10 years
unmatched <- check_unique[!matches, ]
unmatched$diff <- as.numeric(as.character(unmatched$yearBuilt)) - unmatched$year_built
summary(unmatched$diff < 10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment