Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created November 20, 2020 22:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/11594200e891e95fe0c9c24c61400996 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/11594200e891e95fe0c9c24c61400996 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(zipcode)
library(lubridate)
# import "market hotness" data from realtor.com
url = 'https://econdata.s3-us-west-2.amazonaws.com/Reports/Hotness/RDC_Inventory_Hotness_Metrics_Zip_History.csv'
download.file(url, 'realtor-hotness-zip.csv')
raw = read_csv('realtor-hotness-zip.csv')
glimpse(raw)
# clean the dataframe
rltr = raw %>%
mutate(month_date_yyyymm = lubridate::ymd(paste(month_date_yyyymm, '01', sep = ''))) %>%
separate(zip_name, into = c('city', 'state'), sep = ', ', remove = F)
# visualize how the hotness, demand, and supply scores are connected
rltr %>%
filter(month_date_yyyymm == '2020-10-01') %>%
ggplot(., aes(x = demand_score, y = supply_score, col = hotness_score)) +
geom_point()
# download zipcode to msa (metropolitan statistical area) mapping file
# source: https://planiverse.wordpress.com/2019/01/18/mapping-zip-codes-to-msas-and-cbsas/
url = 'https://s3.amazonaws.com/waterjet/Geography_MSA_ZIP_2018.zip'
download.file(url, 'geography-msa-zip-2018.zip')
# unzip
zipfile = "//naeast.ad.jpmorganchase.com/amerxbus$/xbus/nacarhome035/E842066/Files/6 - Personal/Projects/dallas/geography-msa-zip-2018.zip"
unzip(zipfile)
msa_zip_map = read_csv('Geography_MSA_ZIP_2018.csv')
# clean
msa_zip_map = msa_zip_map %>%
mutate(zip = as.numeric(zip)) %>%
rename(cbsa_zip_name = zip_name) %>%
select(zip, cbsa_name, cbsa_zip_name)
# remove duplicates (a single zip can only belong to a singel msa)
msa_zip_map = msa_zip_map %>% distinct(zip, .keep_all = T)
msa_zip_map %>% group_by(cbsa_name) %>% count(sort = T)
# merge realtor and msa mapping files
rltr_msa = left_join(x = rltr, y = msa_zip_map, by = c('postal_code' = 'zip'))
nrow(rltr) == nrow(rltr_msa)
# export file for Tableau
write_csv(rltr_msa, 'rltr_market_hotness_msa.csv')
rltr_msa %>%
mutate(net_hot = demand_score - supply_score)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment