Last active
November 7, 2015 14:41
-
-
Save walkerke/54c98fef0d15cc6bcd1e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## R script to check TAD building age information against Zillow API | |
library(foreign) | |
library(xml2) | |
library(dplyr) | |
dbf <- "fw_footprints_mb.dbf" | |
dat <- read.dbf(dbf, as.is = TRUE) | |
set.seed(1983) | |
# Get sample of addresses from the buildings DBF | |
samp <- sample(dat$ADDRESS, 1000, replace = FALSE) | |
# You can get your own API key from Zillow | |
api_key <- "XXX" | |
# Format the address for the API call | |
formatted <- gsub(pattern = " ", replacement = "+", samp) | |
# Function to pull data from the Zillow API | |
get_year <- function(address) { | |
api_1 <- paste0("http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=", | |
api_key, "&address=") | |
api_2 <- "&citystatezip=Fort+Worth%2C+TX" | |
api_string <- paste0(api_1, address, api_2) | |
api_call <- read_xml(api_string) | |
find_year <- xml_find_all(api_call, ".//yearBuilt") | |
yb <- xml_text(find_year) | |
return(yb) | |
} | |
# Call the function over the vector of addresses | |
yrs <- sapply(formatted, function(x) get_year(x)) | |
# Clean up | |
vec <- unlist(yrs) | |
df <- data.frame(address = names(vec), yearBuilt = vec, row.names = NULL) | |
df$address <- gsub("\\+", " ", df$address) | |
df$address <- gsub("\\d*$", "", df$address) | |
dup <- duplicated(df$address) | |
df2 <- df[!dup, ] | |
# Check for matches | |
check <- inner_join(dat, df2, by = c("ADDRESS" = "address")) | |
check_unique <- check[!duplicated(check$ADDRESS), ] | |
matches <- check_unique$year_label == check_unique$yearBuilt | |
# See where the match is TRUE | |
summary(matches) | |
# Check to see how many unmatched addresses were within 10 years | |
unmatched <- check_unique[!matches, ] | |
unmatched$diff <- as.numeric(as.character(unmatched$yearBuilt)) - unmatched$year_built | |
summary(unmatched$diff < 10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment