Skip to content

Instantly share code, notes, and snippets.

@jwdink
Last active September 2, 2016 14:09
Show Gist options
  • Save jwdink/8da4eff886dfbf8fb998451a4debd9b9 to your computer and use it in GitHub Desktop.
Save jwdink/8da4eff886dfbf8fb998451a4debd9b9 to your computer and use it in GitHub Desktop.
Should we run pilots?
library(dplyr)
library(purrr)
library(broom)
# A Single Study: ----
a_study <- function(effect_size_distr_function, main_n = 100, pilot_n = 0, pilot_effect_size_threshold = NULL) {
this_study_effect_size <- effect_size_distr_function()
pilot <- list(result_vec = rnorm(n = pilot_n, mean = this_study_effect_size, sd = 1),
occurred = pilot_n > 0)
pilot$observed_effect_size <- mean(pilot$result_vec) / sd(pilot$result_vec)
main <- list(result_vec = rnorm(n = main_n, mean = this_study_effect_size, sd = 1),
occurred = ( (pilot_n == 0) || ((pilot_n > 0) & (pilot$observed_effect_size > pilot_effect_size_threshold)) )
)
main$observed_effect_size <- mean(main$result_vec) / sd(main$result_vec)
list(pilot_exp = pilot,
main_exp = main,
true_effect_size = this_study_effect_size,
total_n_used = ifelse(pilot$occurred, pilot_n, 0) + ifelse(main$occurred, main_n, 0) )
}
# How the effect sizes are distributed in the types of phenomena these labs pursues: ----
effect_size_distr_function <- function() sample(c(0,.5), size = 1)
# A single principle-investigator: ----
an_experimenter <- function(subject_pool_size, pilot_n, pilot_effect_size_threshold) {
out <- list()
iter <- 0
while (subject_pool_size > 0) {
iter <- iter + 1
this_study <- a_study(effect_size_distr_function, pilot_n = pilot_n, pilot_effect_size_threshold = pilot_effect_size_threshold)
subject_pool_size <- subject_pool_size - this_study$total_n_used
if (this_study$main_exp$occurred)
out[[iter]] <- data.frame(significant = t.test(this_study$main_exp$result_vec)$p.value < .05,
true_es = this_study$true_effect_size)
}
bind_rows(out)
}
# Two types of principle-investigators ----
pi_without_pilots <- function(subject_pool_size) an_experimenter(subject_pool_size, pilot_n = 0)
pi_with_pilots <- function(subject_pool_size) an_experimenter(subject_pool_size, pilot_n = 16, pilot_effect_size_threshold = .10)
# The simulation ----
set.seed(1988)
num_sims <- 5000
sim_results <- bind_rows(
map_df(.x = 1:num_sims, .f = ~ mutate(pi_without_pilots(2500), simulation_number = .x)) %>% mutate(pi_type='no_pilots'),
map_df(.x = 1:num_sims, .f = ~ mutate(pi_with_pilots(2500), simulation_number = .x)) %>% mutate(pi_type='uses_pilots')
)
# The results ---
sim_results %>%
group_by(pi_type) %>%
mutate(avg_num_studies_per_pi = n()/num_sims) %>%
group_by(effect_exists = true_es > 0,significant, pi_type, avg_num_studies_per_pi) %>%
tally() %>%
group_by(avg_num_studies = n/num_sims, avg_num_studies_per_pi, add = TRUE) %>%
do(select(tidy(binom.test(.$avg_num_studies*num_sims, .$avg_num_studies_per_pi*num_sims)), prop = estimate, conf.low, conf.high))
# effect_exists significant pi_type avg_num_studies avg_num_studies_per_pi prop conf.low conf.high
# <lgl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 FALSE FALSE no_pilots 11.9006 25.0000 0.4760240000 0.4732519640 0.4787971484
# 2 FALSE FALSE uses_pilots 5.1924 20.4922 0.2533842145 0.2507231233 0.2560592721
# 3 FALSE TRUE no_pilots 0.6216 25.0000 0.0248640000 0.0240078361 0.0257423016
# 4 FALSE TRUE uses_pilots 0.2796 20.4922 0.0136442158 0.0129427990 0.0143733590
# 5 TRUE FALSE no_pilots 0.0140 25.0000 0.0005600000 0.0004365727 0.0007074741
# 6 TRUE FALSE uses_pilots 0.0204 20.4922 0.0009955007 0.0008117825 0.0012083413
# 7 TRUE TRUE no_pilots 12.4638 25.0000 0.4985520000 0.4957762645 0.5013278027
# 8 TRUE TRUE uses_pilots 14.9998 20.4922 0.7319760689 0.7292525586 0.7346864426
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment