Skip to content

Instantly share code, notes, and snippets.

@mike-lawrence
Last active November 27, 2020 15:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mike-lawrence/d5844af99dc32e6cb671b4012e16ceb0 to your computer and use it in GitHub Desktop.
Save mike-lawrence/d5844af99dc32e6cb671b4012e16ceb0 to your computer and use it in GitHub Desktop.
code to generate and plot posterior distributions for binomial outcomes
# load packages and define helper functions ----
library(cmdstanr) #for bayes goodness
library(posterior) #for posterior::as_draws_df()
library(progress) # for progress bars
library(viridis) #for better color scales
library(tidyverse) #for all that is good and holy
# helper function to create a progess bar in the global env
create_global_pb = function(x){
pb <<- progress::progress_bar$new(
total = nrow(x)
, format = ":elapsed [:bar] :percent eta: :eta"
)
return(x)
}
# express and compile the model ----
mod_code = '
data{
int N ;
int<lower=0,upper=N> num_fail ;
}
parameters{
real<lower=0,upper=1> true_fail_prob ;
}
model{
num_fail ~ binomial(N,true_fail_prob) ;
}
'
mod =
(
mod_code
%>% cmdstanr::write_stan_file()
%>% cmdstanr::cmdstan_model()
)
# run simulations ---
out =
(
#define the set of N's to explore
tibble::tibble(N=2^(1:6))
#split into list for purrr (must be better way to do this)
%>% dplyr::group_split(N)
#apply anonymous function to generate all possible outcomes
%>% purrr::map_dfr(
.f = function(x){tibble(N=x$N,num_fail = c(0:x$N))}
)
#create the global progress bar for use later
%>% create_global_pb()
#split again (seriously, surely I'm just using purrr wrong)
%>% dplyr::group_split(N,num_fail)
#apply anonymous function to each combo
%>% purrr::map_dfr(
.f = function(x){
pb$tick() #update the progress bar
sink('/dev/null')#silencing cmdstan on unix systems
post = mod$sample(
data = tibble::lst(N=x$N,num_fail=x$num_fail)
, refresh = 0
, chains = parallel::detectCores()/2 - 1
, parallel_chains = parallel::detectCores()/2 - 1
)
sink(NULL) #reinstating output
to_return =
(
post$draws(variables='true_fail_prob')
%>% posterior::as_draws_df()
%>% tibble::as_tibble()
%>% dplyr::select(true_fail_prob)
%>% dplyr::mutate(N=x$N,num_fail=x$num_fail)
)
return(to_return)
}
)
#re-group
%>% dplyr::group_by(N,num_fail)
# compute some quantities in the posterior
%>% dplyr::mutate(
p_fail = num_fail/N
, perc_rank = percent_rank(true_fail_prob)
, true_fail_prob_col = abs(.5-perc_rank)*2
)
)
# visualize ----
p =
(
out
#ensure arranged by perc_rank for gradient-bar hack
%>% dplyr::arrange(perc_rank)
#limit to 95% central quantile interval
%>% dplyr::filter(
perc_rank > (1-.95)/2
, perc_rank < 1-(1-.95)/2
)
#start plottin'
%>% ggplot()
+ facet_wrap(~N, ncol = 3)
+ geom_line(
mapping = aes(
x = p_fail
, y = true_fail_prob
, colour = true_fail_prob_col
, group = p_fail
)
, size = 1
)
+ labs(
y = 'true_fail_rate given obs_fail_rate'
, x = 'obs_fail_rate'
, colour = 'Central\nQuantile\nInterval'
)
+ scale_y_continuous(
breaks = seq(.1,.9,by=.2)
, limits = c(0,1)
, expand = c(0,0)
)
+ scale_x_continuous(
breaks = c(.25,.75)
)
+ viridis::scale_colour_viridis(
direction=-1
, breaks = c(.1,.5,.9)
)
+ theme(
aspect.ratio = 1
, panel.background = element_rect(fill='white',colour = 'grey80')
, panel.grid = element_line(colour='grey95')
)
)
#show the plot
print(p)
#save the plot
ggsave(
plot = p
, filename='cqi.png'
, width = 12/2 , height = 7/2 #recommended twitter aspect ratio
, dpi = 1000
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment