Skip to content

Instantly share code, notes, and snippets.

@dalejbarr
Last active July 10, 2017 13:26
Show Gist options
  • Save dalejbarr/8f6adabdbf2ab9b3458cf51c7bfee484 to your computer and use it in GitHub Desktop.
Save dalejbarr/8f6adabdbf2ab9b3458cf51c7bfee484 to your computer and use it in GitHub Desktop.
Monte Carlo simulation showing how large samples can increase false positive rates when a source of variation is neglected
library("simgen") # devtools::install_github("dalejbarr/simgen")
library("tibble")
library("ggplot2")
library("parallel")
get_typeI <- function(ns, ni, pops, clust) {
res <- mcRun(fitanova, mcr.fnArgs = list(wsbi = FALSE),
mcr.cluster = clust,
mcr.datFn = mkDf,
mcr.datArgs = list(wsbi = FALSE, nsubj = ns, nitem = ni),
mcr.varying = pops)
sum(res$p1 < .05) / nrow(res)
}
## TODO: if you have a computing cluster, replace detectCores with hostnames
## e.g.:
## cl <- makeCluster(rep(c("localhost", "server2", "server3"), c(5, 6, 6)))
cl <- makeCluster(detectCores())
clusterCall(cl, function(x) {library("simgen")})
## use defaults from simgen; see ?genParamRanges
pranges <- genParamRanges()
pranges$eff <- 0 # H0: true
popdata <- randParams(pranges, 1000)
subj_ns <- seq(20, 500, 60)
t1_20i <- sapply(subj_ns, get_typeI, ni = 20, pops = popdata, clust = cl)
t1_40i <- sapply(subj_ns, get_typeI, ni = 40, pops = popdata, clust = cl)
t1dat <- tibble(nitems = as.character(rep(c(20, 40), each = length(subj_ns))),
nsubj = rep(subj_ns, times = 2),
type_I = c(t1_20i, t1_40i))
saveRDS(t1dat, "type_I_error_data.rds")
stopCluster(cl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment