Last active
August 10, 2019 21:15
-
-
Save herbps10/d274d3d9c579e4e9eb5c16a16949c315 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(extrafont) | |
library(rstan) | |
options(mc.cores = 4) | |
# Color scheme (flatuicolors.com) | |
flat_colors <- c( | |
gray = "#7f8c8d", | |
purple = "#8e44ad" | |
) | |
custom_theme <- | |
cowplot::theme_cowplot() + | |
theme(strip.background = element_rect(fill = "transparent"), | |
strip.text = element_text(color = "black", family = "Open Sans"), | |
axis.line = element_line(color = flat_colors['gray']), | |
axis.ticks = element_line(color = flat_colors['gray']), | |
axis.text = element_text(color = flat_colors['gray'], family = "Open Sans"), | |
axis.title = element_text(color = flat_colors['gray']), | |
legend.position = "bottom") | |
# Create a numeric index for each candidate | |
candidates <- tibble(candidate = c( | |
"Bernard Sanders", | |
"Beto O'Rourke", | |
"Elizabeth Warren", | |
"Joseph R. Biden Jr.", | |
"Kamala D. Harris", | |
"Pete Buttigieg" | |
)) %>% mutate(c = 1:n()) | |
data_url <- | |
"https://projects.fivethirtyeight.com/polls-page/president_primary_polls.csv" | |
# Read poll data from FiveThirtyEight | |
polls <- read_csv(data_url, guess_max = 10000) %>% | |
rename(candidate = candidate_name) %>% | |
# Filter for national polls of 2020 Democraticprimary | |
filter(cycle == "2020", party == "DEM", is.na(state)) %>% | |
# Filter for subset of candidates | |
filter(candidate %in% candidates$candidate) %>% | |
# Parse poll start time into date | |
mutate(start_date = lubridate::mdy(start_date)) %>% | |
# Join in candidate index | |
left_join(candidates, by = c(candidate = "candidate")) %>% | |
# Create time point index | |
mutate(t = as.numeric(start_date - min(start_date)) + 1) | |
debates <- tribble( | |
~date, | |
"6/26/19", | |
"7/30/19" | |
) %>% | |
mutate(date = lubridate::mdy(date), | |
t = as.numeric(date - min(polls$start_date)) + 1) | |
ggplot(polls, aes(x = start_date, y = pct)) + | |
geom_point(size = 0.5) + | |
scale_x_date() + | |
geom_vline(data = debates, aes(xintercept = date), lty = 2) + | |
facet_wrap(~candidate) + | |
labs(x = "Date", y = "Poll result", caption = "Data: FiveThirtyEight", color = "") + | |
scale_y_continuous(labels = function(x) paste0(x, "%")) + | |
scale_x_date() + | |
scale_color_manual(values = c("#8e44ad")) + | |
scale_fill_manual(values = c("#8e44ad")) + | |
custom_theme | |
stan_data <- list( | |
C = max(polls$c), | |
T = max(polls$t), | |
N = nrow(polls), | |
y = round((polls$pct / 100) * polls$sample_size), | |
sample_size = polls$sample_size, | |
get_t_i = polls$t, | |
get_c_i = polls$c, | |
get_p_i = polls$p | |
) | |
fit <- sampling(model, stan_data, iter = 1000, control = list(adapt_delta = 0.8, max_treedepth = 8)) | |
delta_post <- tidybayes::spread_draws(fit, delta[c, t]) %>% | |
left_join(candidates) | |
delta_post_summarized <- delta_post %>% | |
group_by(c, candidate, t) %>% | |
summarize(posterior_median = quantile(delta, 0.5), | |
posterior_2.5 = quantile(delta, 0.025), | |
posterior_25 = quantile(delta, 0.25), | |
posterior_75 = quantile(delta, 0.75), | |
posterior_97.5 = quantile(delta, 0.975)) | |
delta_post_summarized %>% | |
# Convert time index back into date | |
mutate(date = min(polls$start_date) + t - 1) %>% | |
ggplot() + | |
# Mark the democratic primary dates | |
geom_vline(data = debates, aes(xintercept = date, color = "Democratic Debates"), lty = 2) + | |
# 75% and 95% credibility intervals | |
geom_ribbon(aes(x = date, y = posterior_median * 100, ymin = posterior_2.5 * 100, ymax = posterior_97.5 * 100), | |
alpha = 0.25, | |
fill = "#3498db") + | |
geom_ribbon(aes(x = date, y = posterior_median * 100, ymin = posterior_25 * 100, ymax = posterior_75 * 100), | |
alpha = 0.25, | |
fill = "#3498db") + | |
# Observed data points | |
geom_point(data = polls, aes(x = start_date, y = pct), color = "#2c3e50", alpha = 1, shape = 19, size = 0.5) + | |
# Posterior median | |
geom_line(aes(x = date, y = posterior_median * 100), color = "#2980b9", size = 1) + | |
# Separate panel for each candidate | |
facet_wrap(~candidate, scales = "free") + | |
# Formatting | |
labs(x = "Date", y = "Poll result", caption = "Data: FiveThirtyEight", color = "") + | |
scale_y_continuous(labels = function(x) paste0(x, "%")) + | |
scale_x_date() + | |
scale_color_manual(values = c("#8e44ad")) + | |
scale_fill_manual(values = c("#8e44ad")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data { | |
int<lower=0> T; // Number of timepoints | |
int<lower=0> C; // Number of candidates | |
int<lower=0> N; // Number of poll yervations | |
int sample_size[N]; // Sample size of each poll | |
int y[N]; // Number of respondents in poll for candidate (approximate) | |
int<lower=1, upper=T> get_t_i[N]; // timepoint for ith observation | |
int<lower=1, upper=C> get_c_i[N]; // candidate for ith observation | |
} | |
parameters { | |
matrix[C, T] delta_logit; // Percent for candidate c at time t | |
real<lower=0, upper=1> phi[N]; // Percent of participants in poll for candidate | |
real<lower=0> tau; // Random walk variance | |
real<lower=0,upper=0.5> sigma; // Overdispersion of observations | |
} | |
model { | |
// Priors | |
tau ~ normal(0, 0.2); | |
sigma ~ normal(0, 1); | |
// Random walk | |
for(c in 1:C) { | |
delta_logit[c, 2:T] ~ normal(delta_logit[c, 1:(T - 1)], tau); | |
} | |
// Observed data | |
y ~ binomial(sample_size, phi); | |
for(i in 1:N) { | |
// Overdispersion | |
delta_logit[get_c_i[i], get_t_i[i]] ~ normal(logit(phi[i]), sigma); | |
} | |
} | |
generated quantities { | |
matrix[C, T] delta = inv_logit(delta_logit); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment