mike-lawrence/binom_sim.r

## binom_sim.r
# load packages and define helper functions ----
library(cmdstanr) #for bayes goodness
library(posterior) #for posterior::as_draws_df()
library(progress) # for progress bars
library(viridis) #for better color scales
library(tidyverse) #for all that is good and holy

# helper function to create a progess bar in the global env
create_global_pb = function(x){
	pb <<- progress::progress_bar$new(
		total = nrow(x)
		, format = ":elapsed [:bar] :percent  eta: :eta"
	)
	return(x)
}

# express and compile the model ----
mod_code = '
data{
	int N ;
	int<lower=0,upper=N> num_fail ;
}
parameters{
	real<lower=0,upper=1> true_fail_prob ;
}
model{
	num_fail ~ binomial(N,true_fail_prob) ;
}
'
mod =
	(
		mod_code
		%>% cmdstanr::write_stan_file()
		%>% cmdstanr::cmdstan_model()
	)


# run simulations ---
out =
	(
		#define the set of N's to explore
		tibble::tibble(N=2^(1:6))
		#split into list for purrr (must be better way to do this)
		%>% dplyr::group_split(N)
		#apply anonymous function to generate all possible outcomes
		%>% purrr::map_dfr(
			.f = function(x){tibble(N=x$N,num_fail = c(0:x$N))}
		)
		#create the global progress bar for use later
		%>% create_global_pb()
		#split again (seriously, surely I'm just using purrr wrong)
		%>% dplyr::group_split(N,num_fail)
		#apply anonymous function to each combo
		%>% purrr::map_dfr(
			.f = function(x){
				pb$tick() #update the progress bar
				sink('/dev/null')#silencing cmdstan on unix systems
				post = mod$sample(
					data = tibble::lst(N=x$N,num_fail=x$num_fail)
					, refresh = 0
					, chains = parallel::detectCores()/2 - 1
					, parallel_chains = parallel::detectCores()/2 - 1
				)
				sink(NULL) #reinstating output
				to_return =
					(
						post$draws(variables='true_fail_prob')
						%>% posterior::as_draws_df()
						%>% tibble::as_tibble()
						%>% dplyr::select(true_fail_prob)
						%>% dplyr::mutate(N=x$N,num_fail=x$num_fail)
					)
				return(to_return)
			}
		)
		#re-group
		%>% dplyr::group_by(N,num_fail)
		# compute some quantities in the posterior
		%>% dplyr::mutate(
			p_fail = num_fail/N
			, perc_rank = percent_rank(true_fail_prob)
			, true_fail_prob_col = abs(.5-perc_rank)*2
		)

	)

# visualize ----
p =
	(
		out
		#ensure arranged by perc_rank for gradient-bar hack
		%>% dplyr::arrange(perc_rank)
		#limit to 95% central quantile interval
		%>% dplyr::filter(
			perc_rank > (1-.95)/2
			, perc_rank < 1-(1-.95)/2
		)
		#start plottin'
		%>% ggplot()
		+ facet_wrap(~N, ncol = 3)
		+ geom_line(
			mapping = aes(
				x = p_fail
				, y = true_fail_prob
				, colour = true_fail_prob_col
				, group = p_fail
			)
			, size = 1
		)
		+ labs(
			y = 'true_fail_rate  given  obs_fail_rate'
			, x = 'obs_fail_rate'
			, colour = 'Central\nQuantile\nInterval'
		)
		+ scale_y_continuous(
			breaks = seq(.1,.9,by=.2)
			, limits = c(0,1)
			, expand = c(0,0)
		)
		+ scale_x_continuous(
			breaks = c(.25,.75)
		)
		+ viridis::scale_colour_viridis(
			direction=-1
			, breaks = c(.1,.5,.9)
		)
		+ theme(
			aspect.ratio = 1
			, panel.background = element_rect(fill='white',colour = 'grey80')
			, panel.grid = element_line(colour='grey95')
		)
	)

#show the plot
print(p)

#save the plot
ggsave(
	plot = p
	, filename='cqi.png'
	, width = 12/2 , height = 7/2 #recommended twitter aspect ratio
	, dpi = 1000
)
	# load packages and define helper functions ----
	library(cmdstanr) #for bayes goodness
	library(posterior) #for posterior::as_draws_df()
	library(progress) # for progress bars
	library(viridis) #for better color scales
	library(tidyverse) #for all that is good and holy

	# helper function to create a progess bar in the global env
	create_global_pb = function(x){
	pb <<- progress::progress_bar$new(
	total = nrow(x)
	, format = ":elapsed [:bar] :percent eta: :eta"
	)
	return(x)
	}

	# express and compile the model ----
	mod_code = '
	data{
	int N ;
	int<lower=0,upper=N> num_fail ;
	}
	parameters{
	real<lower=0,upper=1> true_fail_prob ;
	}
	model{
	num_fail ~ binomial(N,true_fail_prob) ;
	}
	'
	mod =
	(
	mod_code
	%>% cmdstanr::write_stan_file()
	%>% cmdstanr::cmdstan_model()
	)


	# run simulations ---
	out =
	(
	#define the set of N's to explore
	tibble::tibble(N=2^(1:6))
	#split into list for purrr (must be better way to do this)
	%>% dplyr::group_split(N)
	#apply anonymous function to generate all possible outcomes
	%>% purrr::map_dfr(
	.f = function(x){tibble(N=x$N,num_fail = c(0:x$N))}
	)
	#create the global progress bar for use later
	%>% create_global_pb()
	#split again (seriously, surely I'm just using purrr wrong)
	%>% dplyr::group_split(N,num_fail)
	#apply anonymous function to each combo
	%>% purrr::map_dfr(
	.f = function(x){
	pb$tick() #update the progress bar
	sink('/dev/null')#silencing cmdstan on unix systems
	post = mod$sample(
	data = tibble::lst(N=x$N,num_fail=x$num_fail)
	, refresh = 0
	, chains = parallel::detectCores()/2 - 1
	, parallel_chains = parallel::detectCores()/2 - 1
	)
	sink(NULL) #reinstating output
	to_return =
	(
	post$draws(variables='true_fail_prob')
	%>% posterior::as_draws_df()
	%>% tibble::as_tibble()
	%>% dplyr::select(true_fail_prob)
	%>% dplyr::mutate(N=x$N,num_fail=x$num_fail)
	)
	return(to_return)
	}
	)
	#re-group
	%>% dplyr::group_by(N,num_fail)
	# compute some quantities in the posterior
	%>% dplyr::mutate(
	p_fail = num_fail/N
	, perc_rank = percent_rank(true_fail_prob)
	, true_fail_prob_col = abs(.5-perc_rank)*2
	)

	)

	# visualize ----
	p =
	(
	out
	#ensure arranged by perc_rank for gradient-bar hack
	%>% dplyr::arrange(perc_rank)
	#limit to 95% central quantile interval
	%>% dplyr::filter(
	perc_rank > (1-.95)/2
	, perc_rank < 1-(1-.95)/2
	)
	#start plottin'
	%>% ggplot()
	+ facet_wrap(~N, ncol = 3)
	+ geom_line(
	mapping = aes(
	x = p_fail
	, y = true_fail_prob
	, colour = true_fail_prob_col
	, group = p_fail
	)
	, size = 1
	)
	+ labs(
	y = 'true_fail_rate given obs_fail_rate'
	, x = 'obs_fail_rate'
	, colour = 'Central\nQuantile\nInterval'
	)
	+ scale_y_continuous(
	breaks = seq(.1,.9,by=.2)
	, limits = c(0,1)
	, expand = c(0,0)
	)
	+ scale_x_continuous(
	breaks = c(.25,.75)
	)
	+ viridis::scale_colour_viridis(
	direction=-1
	, breaks = c(.1,.5,.9)
	)
	+ theme(
	aspect.ratio = 1
	, panel.background = element_rect(fill='white',colour = 'grey80')
	, panel.grid = element_line(colour='grey95')
	)
	)

	#show the plot
	print(p)

	#save the plot
	ggsave(
	plot = p
	, filename='cqi.png'
	, width = 12/2 , height = 7/2 #recommended twitter aspect ratio
	, dpi = 1000
	)