Last active
July 10, 2017 13:26
-
-
Save dalejbarr/8f6adabdbf2ab9b3458cf51c7bfee484 to your computer and use it in GitHub Desktop.
Monte Carlo simulation showing how large samples can increase false positive rates when a source of variation is neglected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("simgen") # devtools::install_github("dalejbarr/simgen") | |
library("tibble") | |
library("ggplot2") | |
library("parallel") | |
get_typeI <- function(ns, ni, pops, clust) { | |
res <- mcRun(fitanova, mcr.fnArgs = list(wsbi = FALSE), | |
mcr.cluster = clust, | |
mcr.datFn = mkDf, | |
mcr.datArgs = list(wsbi = FALSE, nsubj = ns, nitem = ni), | |
mcr.varying = pops) | |
sum(res$p1 < .05) / nrow(res) | |
} | |
## TODO: if you have a computing cluster, replace detectCores with hostnames | |
## e.g.: | |
## cl <- makeCluster(rep(c("localhost", "server2", "server3"), c(5, 6, 6))) | |
cl <- makeCluster(detectCores()) | |
clusterCall(cl, function(x) {library("simgen")}) | |
## use defaults from simgen; see ?genParamRanges | |
pranges <- genParamRanges() | |
pranges$eff <- 0 # H0: true | |
popdata <- randParams(pranges, 1000) | |
subj_ns <- seq(20, 500, 60) | |
t1_20i <- sapply(subj_ns, get_typeI, ni = 20, pops = popdata, clust = cl) | |
t1_40i <- sapply(subj_ns, get_typeI, ni = 40, pops = popdata, clust = cl) | |
t1dat <- tibble(nitems = as.character(rep(c(20, 40), each = length(subj_ns))), | |
nsubj = rep(subj_ns, times = 2), | |
type_I = c(t1_20i, t1_40i)) | |
saveRDS(t1dat, "type_I_error_data.rds") | |
stopCluster(cl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment