Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created May 12, 2021 00:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/95ae1a049a78ccea34520069e4effbf0 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/95ae1a049a78ccea34520069e4effbf0 to your computer and use it in GitHub Desktop.
# set working directory
setwd("~/Projects/zillow")
# import libraries
library(tidyverse)
library(lubridate)
library(scales)
# define urls for import
url_1_bed = 'https://files.zillowstatic.com/research/public_v2/zhvi/Zip_zhvi_bdrmcnt_1_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv?t=1620762095'
url_2_bed = 'https://files.zillowstatic.com/research/public_v2/zhvi/Zip_zhvi_bdrmcnt_2_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv?t=1620762095'
url_3_bed = 'https://files.zillowstatic.com/research/public_v2/zhvi/Zip_zhvi_bdrmcnt_3_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv?t=1620762095'
url_4_bed = 'https://files.zillowstatic.com/research/public_v2/zhvi/Zip_zhvi_bdrmcnt_4_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv?t=1620762095'
url_5_bed = 'https://files.zillowstatic.com/research/public_v2/zhvi/Zip_zhvi_bdrmcnt_5_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv?t=1620762095'
urls = c(url_1_bed, url_2_bed, url_3_bed, url_4_bed, url_5_bed)
# loop over urls, combine
datalist = list()
for (i in 1:length(urls)) {
download.file(urls[i], 'temp.csv')
temp = read_csv('temp.csv') %>% mutate(Bedrooms = i)
datalist[[i]] = temp
}
raw = do.call(rbind, datalist)
glimpse(raw)
# reshape, format date
zlw = raw %>%
gather(Month, zhvi, -Bedrooms, -RegionID, -SizeRank, -RegionName, -RegionType, -StateName, -State, -City, -Metro, -CountyName) %>%
mutate(Date = ymd(Month)) %>% mutate(Bedrooms = factor(Bedrooms, levels = c(1, 2, 3, 4, 5)))
print(paste('Number of Rows:', scales::comma(nrow(zlw))))
glimpse(zlw)
# import zipcodes
url = 'https://gist.githubusercontent.com/erikgregorywebb/ece26b7b749693ac84430b56f9999253/raw/a55b262ed2e84f5cc5f37dedb6f8d5008fac8aed/phoenix-metro-zipcodes.csv'
download.file(url, 'phoenix-metro-zipcodes.csv')
zip = read_csv('phoenix-metro-zipcodes.csv')
# filter for phoenix metro area
zlw_pho = zlw %>% filter(RegionName %in% (zip %>% pull(zipcode)))
glimpse(zlw_pho)
# chart 1: general trends
top_cities = zlw_pho %>% group_by(City) %>% count(sort = T) %>% head(10) %>% pull(City)
zlw_pho %>%
filter(City %in% top_cities) %>% filter(Bedrooms %in% c(1, 2, 3, 4)) %>%
group_by(Date, City, Bedrooms) %>% summarise(med_zhvi = median(zhvi, na.rm = T)) %>%
ggplot(., aes(x = Date, y = med_zhvi, col = Bedrooms)) +
geom_line(size = 1) +
labs(x = '', y = 'Zillow Home Value Index',
title = 'ZHVI for Large Cities in Pheonix Metro Area', subtitle = 'Jan 1996 to April 2021') +
facet_wrap(~City, nrow = 2, ncol = 5) +
scale_y_continuous(labels = dollar) +
theme(legend.position = 'top')
# save copy of file
glimpse(zlw_pho)
write_csv(zlw_pho, 'zillow_phoeonix_2021_05_011.csv', na = '')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment