|
library(tidyverse) |
|
library(extrafont) |
|
library(rstan) |
|
|
|
options(mc.cores = 4) |
|
|
|
# Color scheme (flatuicolors.com) |
|
flat_colors <- c( |
|
gray = "#7f8c8d", |
|
purple = "#8e44ad" |
|
) |
|
|
|
custom_theme <- |
|
cowplot::theme_cowplot() + |
|
theme(strip.background = element_rect(fill = "transparent"), |
|
strip.text = element_text(color = "black", family = "Open Sans"), |
|
axis.line = element_line(color = flat_colors['gray']), |
|
axis.ticks = element_line(color = flat_colors['gray']), |
|
axis.text = element_text(color = flat_colors['gray'], family = "Open Sans"), |
|
axis.title = element_text(color = flat_colors['gray']), |
|
legend.position = "bottom") |
|
|
|
# Create a numeric index for each candidate |
|
candidates <- tibble(candidate = c( |
|
"Bernard Sanders", |
|
"Beto O'Rourke", |
|
"Elizabeth Warren", |
|
"Joseph R. Biden Jr.", |
|
"Kamala D. Harris", |
|
"Pete Buttigieg" |
|
)) %>% mutate(c = 1:n()) |
|
|
|
data_url <- |
|
"https://projects.fivethirtyeight.com/polls-page/president_primary_polls.csv" |
|
|
|
# Read poll data from FiveThirtyEight |
|
polls <- read_csv(data_url, guess_max = 10000) %>% |
|
rename(candidate = candidate_name) %>% |
|
|
|
# Filter for national polls of 2020 Democraticprimary |
|
filter(cycle == "2020", party == "DEM", is.na(state)) %>% |
|
|
|
# Filter for subset of candidates |
|
filter(candidate %in% candidates$candidate) %>% |
|
|
|
# Parse poll start time into date |
|
mutate(start_date = lubridate::mdy(start_date)) %>% |
|
|
|
# Join in candidate index |
|
left_join(candidates, by = c(candidate = "candidate")) %>% |
|
|
|
# Create time point index |
|
mutate(t = as.numeric(start_date - min(start_date)) + 1) |
|
|
|
debates <- tribble( |
|
~date, |
|
"6/26/19", |
|
"7/30/19" |
|
) %>% |
|
mutate(date = lubridate::mdy(date), |
|
t = as.numeric(date - min(polls$start_date)) + 1) |
|
|
|
ggplot(polls, aes(x = start_date, y = pct)) + |
|
geom_point(size = 0.5) + |
|
scale_x_date() + |
|
geom_vline(data = debates, aes(xintercept = date), lty = 2) + |
|
facet_wrap(~candidate) + |
|
labs(x = "Date", y = "Poll result", caption = "Data: FiveThirtyEight", color = "") + |
|
scale_y_continuous(labels = function(x) paste0(x, "%")) + |
|
scale_x_date() + |
|
scale_color_manual(values = c("#8e44ad")) + |
|
scale_fill_manual(values = c("#8e44ad")) + |
|
custom_theme |
|
|
|
stan_data <- list( |
|
C = max(polls$c), |
|
T = max(polls$t), |
|
N = nrow(polls), |
|
y = round((polls$pct / 100) * polls$sample_size), |
|
sample_size = polls$sample_size, |
|
get_t_i = polls$t, |
|
get_c_i = polls$c, |
|
get_p_i = polls$p |
|
) |
|
|
|
fit <- sampling(model, stan_data, iter = 1000, control = list(adapt_delta = 0.8, max_treedepth = 8)) |
|
|
|
delta_post <- tidybayes::spread_draws(fit, delta[c, t]) %>% |
|
left_join(candidates) |
|
|
|
delta_post_summarized <- delta_post %>% |
|
group_by(c, candidate, t) %>% |
|
summarize(posterior_median = quantile(delta, 0.5), |
|
posterior_2.5 = quantile(delta, 0.025), |
|
posterior_25 = quantile(delta, 0.25), |
|
posterior_75 = quantile(delta, 0.75), |
|
posterior_97.5 = quantile(delta, 0.975)) |
|
|
|
|
|
|
|
delta_post_summarized %>% |
|
# Convert time index back into date |
|
mutate(date = min(polls$start_date) + t - 1) %>% |
|
ggplot() + |
|
|
|
# Mark the democratic primary dates |
|
geom_vline(data = debates, aes(xintercept = date, color = "Democratic Debates"), lty = 2) + |
|
|
|
# 75% and 95% credibility intervals |
|
geom_ribbon(aes(x = date, y = posterior_median * 100, ymin = posterior_2.5 * 100, ymax = posterior_97.5 * 100), |
|
alpha = 0.25, |
|
fill = "#3498db") + |
|
geom_ribbon(aes(x = date, y = posterior_median * 100, ymin = posterior_25 * 100, ymax = posterior_75 * 100), |
|
alpha = 0.25, |
|
fill = "#3498db") + |
|
|
|
# Observed data points |
|
geom_point(data = polls, aes(x = start_date, y = pct), color = "#2c3e50", alpha = 1, shape = 19, size = 0.5) + |
|
|
|
# Posterior median |
|
geom_line(aes(x = date, y = posterior_median * 100), color = "#2980b9", size = 1) + |
|
|
|
# Separate panel for each candidate |
|
facet_wrap(~candidate, scales = "free") + |
|
|
|
# Formatting |
|
labs(x = "Date", y = "Poll result", caption = "Data: FiveThirtyEight", color = "") + |
|
scale_y_continuous(labels = function(x) paste0(x, "%")) + |
|
scale_x_date() + |
|
scale_color_manual(values = c("#8e44ad")) + |
|
scale_fill_manual(values = c("#8e44ad")) |