R and Stan script calculating the probability that my son will be stung by a bumblebee.
library(tidyverse) | |
library(purrr) | |
library(rstan) | |
### Defining the data ### | |
######################### | |
bumblebees <- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, | |
0, 0, 1, 0, 0, 0, 0, 0, 0) | |
toddler_steps <- c(26, 16, 37, 101, 12, 122, 90, 55, 56, 39, 55, 15, 45, 8) | |
full_squares <- 174 | |
partial_squares <- 251 | |
squares <- (full_squares + partial_squares) / 2 | |
foot_cm2 <- squares / 4 | |
foot_m2 <- foot_cm2 / 100^2 | |
### Plotting the priors ### | |
########################### | |
cauchy_priors <- | |
data_frame(parameter = c( "mu_bees", "mu_steps", "precision_steps"), | |
scale = c( (4 + 4) / 100, 60, 1 )) | |
cauchy_priors$x <- map(cauchy_priors$scale, ~ seq(0, .x * 4, length.out = 100)) | |
cauchy_priors <- unnest(cauchy_priors) | |
cauchy_priors$density <- dcauchy(cauchy_priors$x, 0, cauchy_priors$scale) | |
ggplot(cauchy_priors, aes(x, density, width = scale / 20, fill = parameter)) + | |
geom_col() + | |
geom_vline(aes(xintercept = scale), linetype = 2) + | |
facet_wrap(~ parameter, scales = "free") + | |
#theme_minimal() + | |
theme(legend.position = "none") + | |
xlab("The priors (the dashed line shows the median of the half-Cauchy distribution)") | |
### Defining and fitting the model ### | |
###################################### | |
bee_model_code <- " | |
data { | |
int n_bumblebee_observations; | |
int bumblebees[n_bumblebee_observations]; | |
int n_toddler_steps_observations; | |
int toddler_steps[n_toddler_steps_observations]; | |
real foot_m2; | |
} | |
parameters { | |
real<lower=0> mu_bees; | |
real<lower=0> mu_steps; | |
real<lower=0> precision_steps; | |
} | |
model { | |
// Since we have contrained the parameters to be positive we get implicit | |
// half-cauchy distributions even if we declare them to be 'full'-cauchy. | |
mu_bees ~ cauchy(0, (4.0 + 4.0) / (10 * 10) ); | |
mu_steps ~ cauchy(0, 60); | |
precision_steps ~ cauchy(0, 1); | |
bumblebees ~ poisson(mu_bees); | |
toddler_steps ~ neg_binomial_2(mu_steps, precision_steps); | |
} | |
generated quantities { | |
int stings_by_minute[60]; | |
int stings_by_hour; | |
int pred_steps; | |
real stepped_area; | |
for(minute in 1:60) { | |
pred_steps = neg_binomial_2_rng(mu_steps, precision_steps); | |
stepped_area = pred_steps * foot_m2; | |
stings_by_minute[minute] = poisson_rng(mu_bees * stepped_area); | |
} | |
stings_by_hour = sum(stings_by_minute); | |
} | |
" | |
data_list <- list( | |
n_bumblebee_observations = length(bumblebees), | |
bumblebees = bumblebees, | |
n_toddler_steps_observations = length(toddler_steps), | |
toddler_steps = toddler_steps, | |
foot_m2 = foot_m2 | |
) | |
fit <- stan(model_code = bee_model_code, data = data_list) | |
### Looking at the result ### | |
############################# | |
stan_hist(fit, c("mu_bees", "mu_steps", "precision_steps")) | |
print(fit, c("mu_bees", "mu_steps", "precision_steps")) | |
# Getting the sample representing the prob. distribution over stings/hour . | |
stings_by_hour <- as.data.frame(fit)$stings_by_hour | |
# Now actually calculating the prob of 0 stings, 1 stings, 2 stiongs, etc. | |
strings_probability <- prop.table( table(stings_by_hour) ) | |
# Plotin' n' printin' | |
barplot(strings_probability, col = "yellow", | |
main = "Posterior predictive stings per hour", | |
xlab = "Number of stings", ylab = "Probability") | |
round(strings_probability, 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment