rasmusab/the-probability-my-son-will-be-stung-by-a-bumblebee.R

## the-probability-my-son-will-be-stung-by-a-bumblebee.R
library(tidyverse)
library(purrr)
library(rstan)

### Defining the data ###
#########################
bumblebees <- c(1, 0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                0, 0, 1, 0, 0, 0, 0, 0, 0)
toddler_steps <- c(26, 16, 37, 101, 12, 122, 90, 55, 56, 39, 55, 15, 45, 8)
full_squares <- 174
partial_squares <- 251
squares <- (full_squares + partial_squares) / 2
foot_cm2 <- squares / 4
foot_m2 <- foot_cm2 / 100^2


### Plotting the priors ###
###########################
cauchy_priors <-
  data_frame(parameter = c(    "mu_bees", "mu_steps", "precision_steps"),
             scale     = c( (4 + 4) / 100,     60,          1         ))
cauchy_priors$x <- map(cauchy_priors$scale, ~ seq(0, .x * 4, length.out = 100))
cauchy_priors <- unnest(cauchy_priors)
cauchy_priors$density <- dcauchy(cauchy_priors$x, 0, cauchy_priors$scale)

ggplot(cauchy_priors, aes(x, density, width = scale / 20, fill = parameter)) +
  geom_col() +
  geom_vline(aes(xintercept = scale), linetype = 2) +
  facet_wrap(~ parameter, scales = "free") +
  #theme_minimal() +
  theme(legend.position = "none") +
  xlab("The priors (the dashed line shows the median of the half-Cauchy distribution)")

### Defining and fitting the model ###
######################################
bee_model_code <- "
data {
  int n_bumblebee_observations;
  int bumblebees[n_bumblebee_observations];
  int n_toddler_steps_observations;
  int toddler_steps[n_toddler_steps_observations];
  real foot_m2;
}

parameters {
  real<lower=0> mu_bees;
  real<lower=0> mu_steps;
  real<lower=0> precision_steps;
}

model {
  // Since we have contrained the parameters to be positive we get implicit
  // half-cauchy distributions even if we declare them to be 'full'-cauchy.
  mu_bees ~ cauchy(0, (4.0 + 4.0) / (10 * 10) );
  mu_steps ~ cauchy(0, 60);
  precision_steps ~ cauchy(0, 1);
  bumblebees ~ poisson(mu_bees);
  toddler_steps ~ neg_binomial_2(mu_steps, precision_steps);
}

generated quantities {
  int stings_by_minute[60];
  int stings_by_hour;
  int pred_steps;
  real stepped_area;

  for(minute in 1:60) {
    pred_steps =  neg_binomial_2_rng(mu_steps, precision_steps);
    stepped_area = pred_steps * foot_m2;
    stings_by_minute[minute] = poisson_rng(mu_bees * stepped_area);
  }
  stings_by_hour = sum(stings_by_minute);
}
"

data_list <- list(
  n_bumblebee_observations = length(bumblebees),
  bumblebees = bumblebees,
  n_toddler_steps_observations = length(toddler_steps),
  toddler_steps = toddler_steps,
  foot_m2 = foot_m2
)
fit <- stan(model_code = bee_model_code, data = data_list)

### Looking at the result ###
#############################

stan_hist(fit, c("mu_bees", "mu_steps", "precision_steps"))
print(fit, c("mu_bees", "mu_steps", "precision_steps"))

# Getting the sample representing the prob. distribution over stings/hour .
stings_by_hour <- as.data.frame(fit)$stings_by_hour
# Now actually calculating the prob of 0 stings, 1 stings, 2 stiongs, etc.
strings_probability <- prop.table( table(stings_by_hour) )
# Plotin' n' printin'
barplot(strings_probability, col = "yellow",
        main = "Posterior predictive stings per hour",
        xlab = "Number of stings", ylab = "Probability")
round(strings_probability, 2)
	library(tidyverse)
	library(purrr)
	library(rstan)

	### Defining the data ###
	#########################
	bumblebees <- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
	0, 0, 1, 0, 0, 0, 0, 0, 0)
	toddler_steps <- c(26, 16, 37, 101, 12, 122, 90, 55, 56, 39, 55, 15, 45, 8)
	full_squares <- 174
	partial_squares <- 251
	squares <- (full_squares + partial_squares) / 2
	foot_cm2 <- squares / 4
	foot_m2 <- foot_cm2 / 100^2


	### Plotting the priors ###
	###########################
	cauchy_priors <-
	data_frame(parameter = c( "mu_bees", "mu_steps", "precision_steps"),
	scale = c( (4 + 4) / 100, 60, 1 ))
	cauchy_priors$x <- map(cauchy_priors$scale, ~ seq(0, .x * 4, length.out = 100))
	cauchy_priors <- unnest(cauchy_priors)
	cauchy_priors$density <- dcauchy(cauchy_priors$x, 0, cauchy_priors$scale)

	ggplot(cauchy_priors, aes(x, density, width = scale / 20, fill = parameter)) +
	geom_col() +
	geom_vline(aes(xintercept = scale), linetype = 2) +
	facet_wrap(~ parameter, scales = "free") +
	#theme_minimal() +
	theme(legend.position = "none") +
	xlab("The priors (the dashed line shows the median of the half-Cauchy distribution)")

	### Defining and fitting the model ###
	######################################
	bee_model_code <- "
	data {
	int n_bumblebee_observations;
	int bumblebees[n_bumblebee_observations];
	int n_toddler_steps_observations;
	int toddler_steps[n_toddler_steps_observations];
	real foot_m2;
	}

	parameters {
	real<lower=0> mu_bees;
	real<lower=0> mu_steps;
	real<lower=0> precision_steps;
	}

	model {
	// Since we have contrained the parameters to be positive we get implicit
	// half-cauchy distributions even if we declare them to be 'full'-cauchy.
	mu_bees ~ cauchy(0, (4.0 + 4.0) / (10 * 10) );
	mu_steps ~ cauchy(0, 60);
	precision_steps ~ cauchy(0, 1);
	bumblebees ~ poisson(mu_bees);
	toddler_steps ~ neg_binomial_2(mu_steps, precision_steps);
	}

	generated quantities {
	int stings_by_minute[60];
	int stings_by_hour;
	int pred_steps;
	real stepped_area;

	for(minute in 1:60) {
	pred_steps = neg_binomial_2_rng(mu_steps, precision_steps);
	stepped_area = pred_steps * foot_m2;
	stings_by_minute[minute] = poisson_rng(mu_bees * stepped_area);
	}
	stings_by_hour = sum(stings_by_minute);
	}
	"

	data_list <- list(
	n_bumblebee_observations = length(bumblebees),
	bumblebees = bumblebees,
	n_toddler_steps_observations = length(toddler_steps),
	toddler_steps = toddler_steps,
	foot_m2 = foot_m2
	)
	fit <- stan(model_code = bee_model_code, data = data_list)

	### Looking at the result ###
	#############################

	stan_hist(fit, c("mu_bees", "mu_steps", "precision_steps"))
	print(fit, c("mu_bees", "mu_steps", "precision_steps"))

	# Getting the sample representing the prob. distribution over stings/hour .
	stings_by_hour <- as.data.frame(fit)$stings_by_hour
	# Now actually calculating the prob of 0 stings, 1 stings, 2 stiongs, etc.
	strings_probability <- prop.table( table(stings_by_hour) )
	# Plotin' n' printin'
	barplot(strings_probability, col = "yellow",
	main = "Posterior predictive stings per hour",
	xlab = "Number of stings", ylab = "Probability")
	round(strings_probability, 2)