Skip to content

Instantly share code, notes, and snippets.

@statwonk
Last active December 25, 2020 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statwonk/c92b73b39ace5899586ad144abf16327 to your computer and use it in GitHub Desktop.
Save statwonk/c92b73b39ace5899586ad144abf16327 to your computer and use it in GitHub Desktop.
Some rough beliefs about market capitalization for a new entrant from a variety of sectors, sub-sectors and states.
library(tidyverse)
library(rvest)
library(gamlss)
library(brms)
library(tidybayes)
select <- dplyr::select
####################################################################################
# Model the market capitalizations of members of the S&P 500.
####################################################################################
####################################################################################
# Download S&P500 members from Wikipedia
####################################################################################
rvest::html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies") %>%
html_nodes("table") %>%
html_table(fill = TRUE) %>%
{ .[[1]] } %>%
as_tibble() %>%
janitor::clean_names() %>%
# download capitalizations using the fmpapi library
# https://github.com/iainwo/fmpclient
mutate(data = map(symbol, ~ safely(fmpapi::fmp_market_cap)(.x))) -> d
saveRDS(d, "mktcaps.rds") # best to save to avoid re-fetching and taxing the api.
readRDS("mktcaps.rds") -> d
d %>%
mutate(data = map(data, ~ .x$result %>% select(-symbol))) %>%
unnest(cols = c(data)) %>%
arrange(desc(market_cap)) %>%
# filter(!symbol %in% c("GOOGL", "BRK.B")) %>%
mutate(symbol = factor(symbol, levels = symbol)) -> d
posix_or_na <- function(x) {
tryCatch({ as.POSIXct(x, origin = "1970-01-01") },
error = function(e) return(NA))
}
d %>%
mutate(year_added = map_dbl(date_first_added, posix_or_na),
year_added = posix_or_na(year_added),
year_added = as.integer(format(year_added, "%Y"))) %>%
mutate_if(is.character, factor) -> d
rand_year_added <- function() {
sample(d$year_added[!is.na(d$year_added)], 1)
}
d %>%
mutate(year_added = map_int(year_added, ~ ifelse(is.na(.x), rand_year_added(), .x))) -> d
state_sub_search <- function(x) {
state.name[map_lgl(state.name, ~ grepl(.x, x))] -> x
if(length(x) != 1) {
return("Not certain")
} else {
return(x)
}
}
d %>%
mutate(state = map_chr(headquarters_location, ~ state_sub_search(.x))) %>%
mutate(state = factor(state)) %>%
mutate(l_year_added = log(year_added)) %>%
arrange(market_cap) %>%
# three lowest outliers are missing three digits, this fixes that.
mutate(market_cap = case_when(1:n() < 4 ~ market_cap * 1e3, TRUE ~ market_cap)) %>%
filter(symbol != "GOOGL") -> d
# Fit lognormal model.
# also see fitdistrplus::fitdist(x, "lnorm")
gamlss(market_cap ~ l_year_added +
re(random = ~ 1 | gics_sector/gics_sub_industry/state),
sigma.formula = ~ l_year_added,
data = d, family = LOGNO) -> lfit
plot(lfit) # check residuals
brm(bf(
market_cap ~ l_year_added +
(1|gics_sector / gics_sub_industry / state),
sigma ~ l_year_added
),
data = d,
iter = 4e3,
control = list(adapt_delta = 0.99),
family = "lognormal",
cores = 4, chains = 4) -> blfit
print(blfit)
d %>%
distinct(gics_sector, gics_sub_industry) %>%
mutate(state = "a generic state",
l_year_added = log(2020)) %>%
add_fitted_draws(blfit, allow_new_levels = TRUE) -> fits
# a view about middle expectations for a hypothetical new
# member from a hypothetical new state.
fits %>%
group_by(gics_sector, gics_sub_industry) %>%
summarise(.lower = quantile(.value/1e9, 0.25),
.upper = quantile(.value/1e9, 0.75)) %>%
ungroup() %>%
arrange(gics_sector, gics_sub_industry, .lower) %>%
mutate(gics_sector = factor(gics_sector, levels = unique(gics_sector))) %>%
arrange(gics_sector, .lower) %>%
mutate(gics_sub_industry = factor(gics_sub_industry, levels = unique(gics_sub_industry))) %>%
ggplot(aes(x = gics_sub_industry, color = gics_sector)) +
geom_errorbar(aes(ymin = .lower, ymax = .upper)) +
scale_y_continuous(labels = scales::dollar, breaks = seq(0, 150, 50)) +
ylab("Market Cap (Billions of Dollars $)") +
xlab("") +
scale_color_discrete(name = "Sector", guide = guide_legend(override.aes = list(size = 8))) +
coord_flip() +
theme_bw() +
expand_limits(y = 0) +
theme(axis.text.y = element_text(size = 7),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()) +
ggtitle("S&P 500 Market Capitalization by sector and sub-sector")
fits %>%
group_by(gics_sector, gics_sub_industry) %>%
summarise(.lower = quantile(.value/1e9, 0.25),
.upper = quantile(.value/1e9, 0.75)) %>%
ungroup() %>%
arrange(gics_sector, gics_sub_industry, .lower) %>%
mutate(gics_sector = factor(gics_sector, levels = unique(gics_sector))) %>%
arrange(gics_sector, .lower) %>%
mutate(gics_sub_industry = factor(gics_sub_industry, levels = unique(gics_sub_industry))) %>%
ggplot(aes(x = gics_sub_industry)) +
geom_errorbar(aes(ymin = .lower, ymax = .upper)) +
scale_y_continuous(labels = scales::dollar, breaks = seq(0, 150, 50)) +
ylab("Market Cap (Billions of Dollars $)") +
xlab("") +
scale_color_discrete(name = "Sector", guide = guide_legend(override.aes = list(size = 8))) +
coord_flip() +
theme_bw() +
facet_wrap(~ gics_sector, scales = "free_y") +
expand_limits(y = 0) +
theme(axis.text.y = element_text(size = 7),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_line(color = "black", size = 0.1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()) +
ggtitle("S&P 500 Market Capitalization by sector and sub-sector (inter-quartile ranges, middle 50% of beliefs about expectations for market cap)")
d %>%
distinct(state) %>%
mutate(gics_sector = "Communication Services",
gics_sub_industry = "Interactive Media & Services",
l_year_added = log(2020)) %>%
add_fitted_draws(blfit, allow_new_levels = TRUE) -> fits
fits %>%
group_by(state) %>%
summarise(.lower = quantile(.value/1e9, 0.25),
.upper = quantile(.value/1e9, 0.75)) %>%
ungroup() %>%
arrange(state, .lower) %>%
mutate(state = factor(state, levels = unique(state))) %>%
ggplot(aes(x = state)) +
geom_errorbar(aes(ymin = .lower, ymax = .upper)) +
scale_y_continuous(labels = scales::dollar, breaks = seq(0, 150, 50)) +
ylab("Market Cap (Billions of Dollars $)") +
xlab("") +
scale_color_discrete(name = "State", guide = guide_legend(override.aes = list(size = 8))) +
coord_flip() +
theme_bw() +
expand_limits(y = 0) +
theme(axis.text.y = element_text(size = 7),
panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_line(color = "black", size = 0.1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()) +
ggtitle("S&P 500 Market Capitalization by sector and sub-sector (inter-quartile ranges, middle 50% of beliefs about expectations for market cap)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment