Created
November 20, 2020 22:03
-
-
Save erikgregorywebb/11594200e891e95fe0c9c24c61400996 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(zipcode) | |
library(lubridate) | |
# import "market hotness" data from realtor.com | |
url = 'https://econdata.s3-us-west-2.amazonaws.com/Reports/Hotness/RDC_Inventory_Hotness_Metrics_Zip_History.csv' | |
download.file(url, 'realtor-hotness-zip.csv') | |
raw = read_csv('realtor-hotness-zip.csv') | |
glimpse(raw) | |
# clean the dataframe | |
rltr = raw %>% | |
mutate(month_date_yyyymm = lubridate::ymd(paste(month_date_yyyymm, '01', sep = ''))) %>% | |
separate(zip_name, into = c('city', 'state'), sep = ', ', remove = F) | |
# visualize how the hotness, demand, and supply scores are connected | |
rltr %>% | |
filter(month_date_yyyymm == '2020-10-01') %>% | |
ggplot(., aes(x = demand_score, y = supply_score, col = hotness_score)) + | |
geom_point() | |
# download zipcode to msa (metropolitan statistical area) mapping file | |
# source: https://planiverse.wordpress.com/2019/01/18/mapping-zip-codes-to-msas-and-cbsas/ | |
url = 'https://s3.amazonaws.com/waterjet/Geography_MSA_ZIP_2018.zip' | |
download.file(url, 'geography-msa-zip-2018.zip') | |
# unzip | |
zipfile = "//naeast.ad.jpmorganchase.com/amerxbus$/xbus/nacarhome035/E842066/Files/6 - Personal/Projects/dallas/geography-msa-zip-2018.zip" | |
unzip(zipfile) | |
msa_zip_map = read_csv('Geography_MSA_ZIP_2018.csv') | |
# clean | |
msa_zip_map = msa_zip_map %>% | |
mutate(zip = as.numeric(zip)) %>% | |
rename(cbsa_zip_name = zip_name) %>% | |
select(zip, cbsa_name, cbsa_zip_name) | |
# remove duplicates (a single zip can only belong to a singel msa) | |
msa_zip_map = msa_zip_map %>% distinct(zip, .keep_all = T) | |
msa_zip_map %>% group_by(cbsa_name) %>% count(sort = T) | |
# merge realtor and msa mapping files | |
rltr_msa = left_join(x = rltr, y = msa_zip_map, by = c('postal_code' = 'zip')) | |
nrow(rltr) == nrow(rltr_msa) | |
# export file for Tableau | |
write_csv(rltr_msa, 'rltr_market_hotness_msa.csv') | |
rltr_msa %>% | |
mutate(net_hot = demand_score - supply_score) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment