JimGrange/caffeine_and_mugs.R

## caffeine_and_mugs.R
library(tidyverse)
library(rstan)
rstan_options(auto_write = TRUE)

#--- declare the data
# n_c = total number of people who drink caffeine
# n_nc = total number of people who do not drink caffeine
# p_c = observed proportion of people who drink caffeine that have a favourite mug
# p_nc = observed proportion of people who drink caffeine that DO NOT have a favourite mug
stan_data <- list(
  n_c = 1258,
  n_nc = 280,
  p_c = 966,
  p_nc = 168
)


#--- declare the model code in Stan
stan_code <- "
data{
  int n_c;                  // number of people who drink caffeine
  int n_nc;                 // number of people who don't drink caffeine
  int p_c;                  // proportion of caffeine drinkers with favourite mug
  int p_nc;                 // proportion of caffeine drinkers with no favourite mug
}

parameters{
  real theta_c;             // population proportion of caffeine w/fave mug
  real theta_nc;            // population proportion of non-caffeine w/fave mug
}

model{                      // read the model section from the bottom--up!
  // priors
  theta_c ~ beta(5, 5);      // set the prior for theta_c
  theta_nc ~ beta(5, 5);     // set the prior for theta_nc

  // likelihood
  p_c ~ binomial(n_c, theta_c);     // our generating model for caffeine
  p_nc ~ binomial(n_nc, theta_nc);  // our generating model for no caffeine
}

generated quantities {

 real theta_delta;     // difference in theta values

 theta_delta = theta_c - theta_nc;
}
"

#--- fit the model
fit <- stan(model_code = stan_code,
            data = stan_data,
            iter = 5000,
            warmup = 1000,
            chains = 4,
            cores = 4,
            seed = 42)

#--- extract & plot the posterior predictions
posterior <- as.matrix(fit)

posterior <- tibble(
  theta_c = posterior[, 1],
  theta_nc = posterior[, 2],
  theta_delta = posterior[, 3]
) %>%
  select(theta_c, theta_nc, theta_delta) %>%
  pivot_longer(cols = theta_c:theta_delta,
               names_to = "parameter")


main_plot <- posterior %>%
  filter(parameter %in% c("theta_c", "theta_nc")) %>%
  ggplot(aes(x = value, group = parameter)) +
  geom_density(aes(fill = parameter,
                   colour = parameter),
               alpha = 0.6) +
  geom_hline(yintercept = 0) +
  theme_bw() +
  labs(x = "theta", y = "Density",
       title = "Estimates of proportion with favourite mug")  +
  theme(legend.position = "none") +
  theme(text = element_text(size = 20)) +
  scale_y_continuous(limits = c(0, 40)) +
  annotate("text",
           label = "Drink Caffeine",
           x = 0.7,
           y = 35,
           size = 6) +
  geom_curve(aes(x = 0.7,
                 y = 33,
                 xend = 0.74,
                 yend = 25),
             arrow = arrow(length = unit(0.07, "inch")),
             size = 0.8,
             color = "gray20",
             curvature = 0.2) +
  annotate("text",
           label = "Don't \nDrink Caffeine",
           x = 0.55,
           y = 22,
           size = 6) +
  geom_curve(aes(x = 0.55,
                 y = 20,
                 xend = 0.58,
                 yend = 14),
             arrow = arrow(length = unit(0.07, "inch")),
             size = 0.8,
             color = "gray20",
             curvature = 0.2)


difference_plot <- posterior %>%
  filter(parameter == "theta_delta") %>%
  ggplot(aes(x = value)) +
  geom_density(alpha = 0.6,
               fill = "skyblue",
               colour = "skyblue") +
  labs(x = "theta",
       y = "Density",
       title = "Estimate of the difference in theta values") +
  theme_bw() +
  theme(text = element_text(size = 20))
	library(tidyverse)
	library(rstan)
	rstan_options(auto_write = TRUE)

	#--- declare the data
	# n_c = total number of people who drink caffeine
	# n_nc = total number of people who do not drink caffeine
	# p_c = observed proportion of people who drink caffeine that have a favourite mug
	# p_nc = observed proportion of people who drink caffeine that DO NOT have a favourite mug
	stan_data <- list(
	n_c = 1258,
	n_nc = 280,
	p_c = 966,
	p_nc = 168
	)


	#--- declare the model code in Stan
	stan_code <- "
	data{
	int n_c; // number of people who drink caffeine
	int n_nc; // number of people who don't drink caffeine
	int p_c; // proportion of caffeine drinkers with favourite mug
	int p_nc; // proportion of caffeine drinkers with no favourite mug
	}

	parameters{
	real theta_c; // population proportion of caffeine w/fave mug
	real theta_nc; // population proportion of non-caffeine w/fave mug
	}

	model{ // read the model section from the bottom--up!
	// priors
	theta_c ~ beta(5, 5); // set the prior for theta_c
	theta_nc ~ beta(5, 5); // set the prior for theta_nc

	// likelihood
	p_c ~ binomial(n_c, theta_c); // our generating model for caffeine
	p_nc ~ binomial(n_nc, theta_nc); // our generating model for no caffeine
	}

	generated quantities {

	real theta_delta; // difference in theta values

	theta_delta = theta_c - theta_nc;
	}
	"

	#--- fit the model
	fit <- stan(model_code = stan_code,
	data = stan_data,
	iter = 5000,
	warmup = 1000,
	chains = 4,
	cores = 4,
	seed = 42)

	#--- extract & plot the posterior predictions
	posterior <- as.matrix(fit)

	posterior <- tibble(
	theta_c = posterior[, 1],
	theta_nc = posterior[, 2],
	theta_delta = posterior[, 3]
	) %>%
	select(theta_c, theta_nc, theta_delta) %>%
	pivot_longer(cols = theta_c:theta_delta,
	names_to = "parameter")


	main_plot <- posterior %>%
	filter(parameter %in% c("theta_c", "theta_nc")) %>%
	ggplot(aes(x = value, group = parameter)) +
	geom_density(aes(fill = parameter,
	colour = parameter),
	alpha = 0.6) +
	geom_hline(yintercept = 0) +
	theme_bw() +
	labs(x = "theta", y = "Density",
	title = "Estimates of proportion with favourite mug") +
	theme(legend.position = "none") +
	theme(text = element_text(size = 20)) +
	scale_y_continuous(limits = c(0, 40)) +
	annotate("text",
	label = "Drink Caffeine",
	x = 0.7,
	y = 35,
	size = 6) +
	geom_curve(aes(x = 0.7,
	y = 33,
	xend = 0.74,
	yend = 25),
	arrow = arrow(length = unit(0.07, "inch")),
	size = 0.8,
	color = "gray20",
	curvature = 0.2) +
	annotate("text",
	label = "Don't \nDrink Caffeine",
	x = 0.55,
	y = 22,
	size = 6) +
	geom_curve(aes(x = 0.55,
	y = 20,
	xend = 0.58,
	yend = 14),
	arrow = arrow(length = unit(0.07, "inch")),
	size = 0.8,
	color = "gray20",
	curvature = 0.2)


	difference_plot <- posterior %>%
	filter(parameter == "theta_delta") %>%
	ggplot(aes(x = value)) +
	geom_density(alpha = 0.6,
	fill = "skyblue",
	colour = "skyblue") +
	labs(x = "theta",
	y = "Density",
	title = "Estimate of the difference in theta values") +
	theme_bw() +
	theme(text = element_text(size = 20))