Instantly share code, notes, and snippets.

Embed
What would you like to do?
A script automating the analysis of Commercial Bank Expansion Feasibility
library(blscrapeR)
library(tidyverse)
library(albersusa)
library(ggalt)
library(wesanderson)
library(cowplot)
library(tidycensus)
# The original blscrapeR::qcew_api() is not really suited to take multiple years
# or niac codes at once. Here is a function that grabs it all!
get_industry_qcew <- function(years, niac_codes, qtr = "a"){
# We want the years to repeat for each industry we're looking for
year_rep <- rep(years, length(niac_codes))
# We also want each code to be repeated for each year of interest
niac_each <- rep(niac_codes, each = length(years))
# Utilize purrr::map2() and purrr::possibly() to either grab our data
# or carry on when it hits an error
industry_dat <- map2(year_rep, niac_each,
possibly(
~ qcew_api(year = .x, qtr = qtr,
slice = "industry", sliceCode = .y),
otherwise = NULL
)
) %>%
compact() %>% # remove any NULL values from our list
map_df(bind_rows) # create one large data set
return(industry_dat)
}
qcew_dat <- get_industry_qcew(2013:2016, c(5221, 52213, 52231))
qcew_dat_clean <- qcew_dat %>%
# Reorder columns and drop unwanted variables
select(area_fips, year, qtr,
industry_code, everything(),
-own_code, -agglvl_code, -size_code, -disclosure_code,
-lq_disclosure_code, -oty_disclosure_code)
idaho_dat <- qcew_dat_clean %>%
left_join(area_titles, by = "area_fips") %>%
separate(area_title, c("county", "state"), sep = ",") %>%
mutate_at(vars(state, industry_code), trimws) %>%
left_join(niacs, by = "industry_code") %>%
mutate_at(vars(industry_title), as.character) %>%
select(area_fips, state, county,
year, qtr, industry_code, industry_title, everything()) %>%
filter(state %in% "Idaho")
# This will give me all of the lat/long geometry's for each county in Idaho
us <- counties_composite()
us_map <- broom::tidy(us, region = "fips") %>%
filter(id %in% idaho_dat$area_fips)
com_banking <- idaho_dat %>%
filter(industry_code %in% "5221")
ggplot() +
geom_map(data = us_map, map = us_map,
aes(long, lat, map_id = id),
color = "black", fill = NA) +
geom_map(data = com_banking, map = us_map,
aes(fill = lq_annual_avg_estabs, map_id = area_fips)) +
scale_fill_gradientn(colors = wes_palettes$Zissou1) +
coord_map() +
facet_grid(.~ year) +
labs(title = glue::glue("{tools::toTitleCase(com_banking$industry_title)}"),
fill = "LQ", x = NULL, y = NULL) +
theme_minimal() +
theme(strip.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
legend.title = element_text(face = "bold"),
plot.title = element_text(family = "Helvetica Bold Oblique"),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
# Not the most pretty function
# This is something I'm still learning on function development.
# My gut feeling is that I don't want to assume scope when writing a function
# So I don't want to call variables from my Global Environment.
# Therefore I'm going to give this function, everything I have create thus far.
plot_industry_map <- function(dat, map_dat, ind_code, fill_var, fill_pall = "Zissou1"){
# We want to plot a specific industry at a time
ind_dat <- dat %>%
filter(industry_code %in% ind_code)
# Here is where we use tidyeval to enquo the fill variable.
fill_var_enc <- enquo(fill_var)
# Here is the plot in question
ggplot() +
geom_map(data = map_dat, map = map_dat,
aes(long, lat, map_id = id),
color = "black", fill = NA) +
geom_map(data = ind_dat, map = map_dat,
aes(fill = !!fill_var_enc, map_id = area_fips)) +
scale_fill_gradientn(colors = wes_palette(fill_pall)) +
coord_map() +
facet_grid(.~ year) +
labs(title = glue::glue("{tools::toTitleCase(ind_dat$industry_title)}"),
fill = "LQ") +
theme_minimal() +
theme(strip.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
legend.title = element_text(face = "bold"),
plot.title = element_text(family = "Helvetica Bold Oblique"))
}
# Iterate across all industries and make a heatmap plot
plts <- map(c("5221","52213","52231"), plot_industry_map,
dat = idaho_dat,
map_dat = us_map,
fill_var = lq_annual_avg_estabs
)
# Arrange all the plots into a grid
cowplot::plot_grid(plotlist = plts, ncol = 1) %>%
add_sub(glue::glue("Source: Bureau of Labor Statistics QCEW Database",
"\n*Data not available for years with non-filled counties"),
y = .5, hjust = -.8, size = 8, fontface = "italic") %>%
ggdraw()
# Same idea as above but we are going to look at the annual employment level percent change
plts2 <- map(c("5221","52213","52231"), plot_industry_map,
dat = idaho_dat,
map_dat = us_map,
fill_var = oty_annual_avg_emplvl_pct_chg/100,
fill_pall = "Moonrise3")
cowplot::plot_grid(plotlist = plts2, ncol = 1) %>%
add_sub("Source: Bureau of Labor Statistics QCEW Database\n*Data not available for years with non-filled counties",
y = .5, hjust = -.8, size = 8, fontface = "italic") %>%
ggdraw()
# Gathering data from FDIC website, getting rid of corrupted data point
idaho_banks <- read_csv(here::here("Data/ID_2017_Data.csv")) %>%
filter(SIMS_LONGITUDE < -1)
# Gathering data from US Census
idaho_median_income <- get_acs(geography = "county",
variables = c(mediancome = "B19013_001"),
state = "ID", geometry = TRUE)
# Heat map plot including bank locations and median income
ggplot() +
geom_sf(data = idaho_median_income, aes(fill = estimate)) +
geom_point(data = idaho_banks,
aes(y = SIMS_LATITUDE, x = SIMS_LONGITUDE),
color = "grey95", size = .8) +
coord_sf(datum = NA) +
scale_fill_gradientn(colors = wes_palettes$Rushmore1,
labels = scales::dollar) +
labs(title = "Median Income Across Counties in Idaho",
subtitle = "Each white dot represents a physical branch\nlocation of a bank registered by the FDIC, updated June 2017",
fill = "Estimate") +
theme(axis.title = element_blank(),
plot.subtitle = element_text(face = "italic"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment