Skip to content

Instantly share code, notes, and snippets.

@JimGrange
Created November 7, 2021 16:09
Show Gist options
  • Save JimGrange/e6a7db041940be5cf383776087cef7c9 to your computer and use it in GitHub Desktop.
Save JimGrange/e6a7db041940be5cf383776087cef7c9 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(rstan)
rstan_options(auto_write = TRUE)
#--- declare the data
# n_c = total number of people who drink caffeine
# n_nc = total number of people who do not drink caffeine
# p_c = observed proportion of people who drink caffeine that have a favourite mug
# p_nc = observed proportion of people who drink caffeine that DO NOT have a favourite mug
stan_data <- list(
n_c = 1258,
n_nc = 280,
p_c = 966,
p_nc = 168
)
#--- declare the model code in Stan
stan_code <- "
data{
int n_c; // number of people who drink caffeine
int n_nc; // number of people who don't drink caffeine
int p_c; // proportion of caffeine drinkers with favourite mug
int p_nc; // proportion of caffeine drinkers with no favourite mug
}
parameters{
real theta_c; // population proportion of caffeine w/fave mug
real theta_nc; // population proportion of non-caffeine w/fave mug
}
model{ // read the model section from the bottom--up!
// priors
theta_c ~ beta(5, 5); // set the prior for theta_c
theta_nc ~ beta(5, 5); // set the prior for theta_nc
// likelihood
p_c ~ binomial(n_c, theta_c); // our generating model for caffeine
p_nc ~ binomial(n_nc, theta_nc); // our generating model for no caffeine
}
generated quantities {
real theta_delta; // difference in theta values
theta_delta = theta_c - theta_nc;
}
"
#--- fit the model
fit <- stan(model_code = stan_code,
data = stan_data,
iter = 5000,
warmup = 1000,
chains = 4,
cores = 4,
seed = 42)
#--- extract & plot the posterior predictions
posterior <- as.matrix(fit)
posterior <- tibble(
theta_c = posterior[, 1],
theta_nc = posterior[, 2],
theta_delta = posterior[, 3]
) %>%
select(theta_c, theta_nc, theta_delta) %>%
pivot_longer(cols = theta_c:theta_delta,
names_to = "parameter")
main_plot <- posterior %>%
filter(parameter %in% c("theta_c", "theta_nc")) %>%
ggplot(aes(x = value, group = parameter)) +
geom_density(aes(fill = parameter,
colour = parameter),
alpha = 0.6) +
geom_hline(yintercept = 0) +
theme_bw() +
labs(x = "theta", y = "Density",
title = "Estimates of proportion with favourite mug") +
theme(legend.position = "none") +
theme(text = element_text(size = 20)) +
scale_y_continuous(limits = c(0, 40)) +
annotate("text",
label = "Drink Caffeine",
x = 0.7,
y = 35,
size = 6) +
geom_curve(aes(x = 0.7,
y = 33,
xend = 0.74,
yend = 25),
arrow = arrow(length = unit(0.07, "inch")),
size = 0.8,
color = "gray20",
curvature = 0.2) +
annotate("text",
label = "Don't \nDrink Caffeine",
x = 0.55,
y = 22,
size = 6) +
geom_curve(aes(x = 0.55,
y = 20,
xend = 0.58,
yend = 14),
arrow = arrow(length = unit(0.07, "inch")),
size = 0.8,
color = "gray20",
curvature = 0.2)
difference_plot <- posterior %>%
filter(parameter == "theta_delta") %>%
ggplot(aes(x = value)) +
geom_density(alpha = 0.6,
fill = "skyblue",
colour = "skyblue") +
labs(x = "theta",
y = "Density",
title = "Estimate of the difference in theta values") +
theme_bw() +
theme(text = element_text(size = 20))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment