Last active
September 27, 2015 11:02
-
-
Save vasishth/14192ae70a4ab4d7c56a to your computer and use it in GitHub Desktop.
False positives in a lifetime [revised 23 Nov 2014; comments and corrections welcome]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Our simulated scientist will declare | |
## significance only if he/she gets | |
## 2 replications with p<0.05: | |
stringent<-FALSE | |
## Set the above to FALSE if you want to | |
## have the scientist publish a single | |
## expt. as soon as it's significant: | |
#stringent <- FALSE | |
## num of scientists to simulate: | |
nscientists<-1000 | |
## sample size in each expt: | |
n<-1000 | |
## number of expts run by each scientist: | |
nexp<-200 | |
## prob of sampling from a population | |
## where the null is true: | |
prob<- 0.20 | |
## true mean: | |
## we use something low to reflect realistic | |
## studies done with low power: | |
effect_size <- 10 | |
## standard deviation: | |
s<-50 | |
## the above gives about 14% power: | |
power.t.test(n,delta=effect_size,sd=s) | |
## store proportion of false positives | |
## and true positives | |
## in one lifetime of 200 experiments: | |
prop_tps<-prop_fps<-rep(NA,nscientists) | |
## store count of false positives per scientist: | |
scientist<-rep(NA,nscientists) | |
do_stringent_test<-function(test1,n,mu=0,s){ | |
## | |
if(test1>0.05){ | |
return(test1) | |
} else { | |
## if result significant, do second | |
## test: | |
x<-rnorm(n,mean=mu,sd=s) | |
test2<-t.test(x)$p.value | |
## if both results significant: | |
if(test1<=0.05 && test2<=0.05){ | |
## just choose one of the two p's: | |
return(test1) | |
} else {## declare n.s.: | |
return(0.07) | |
} | |
} | |
} | |
## simulate 1000 scientists, each with | |
## a lifetime plan of doing 200 experiments, | |
## with a 20% chance of sampling from a | |
## distribution where null is true: | |
for(k in 1:nscientists){ | |
## vector for holding results (p-values): | |
tests_null_true<-c(0) | |
tests_null_false<-c(0) | |
for(i in 1:nexp){ | |
## does the sample come from a | |
## distribution with null true? | |
null_true<-rbinom(n=1,size=1,p=prob) | |
if(null_true==1){ | |
x<-rnorm(n,mean=0,sd=s) | |
test1<-t.test(x)$p.value | |
if(stringent==FALSE){ | |
## do test and store result: | |
tests_null_true[i]<-test1 | |
} else { ## if stringent==TRUE | |
result<-do_stringent_test(test1,n,0,s) | |
tests_null_true[i]<-result | |
} | |
} else { | |
## sample from distribution where | |
## null is false: | |
x<-rnorm(n,mean=effect_size,sd=s) | |
test1<-t.test(x)$p.value | |
if(stringent==FALSE){ | |
tests_null_false[i]<-test1 | |
} else { | |
## if stringent TRUE | |
result<-do_stringent_test(test1,n,effect_size,s) | |
tests_null_false[i]<-result | |
} | |
} | |
} | |
## Compute proportion of false positives | |
## for each scientist: | |
## remove NAs: | |
tests_null_true<-tests_null_true[!is.na(tests_null_true)] | |
tests_null_false<-tests_null_false[!is.na(tests_null_false)] | |
## Proportion of all null-true tests that ended up with a Type I error: | |
#mean(tests_null_true<0.05) | |
## number of false positives: | |
num_false_pos<-length(which(tests_null_true<0.05)) | |
## track current scientist's | |
## false positives: | |
scientist[k]<-num_false_pos | |
## proportion of all null-false tests | |
## that correctly detected effect | |
## ("observed" power): | |
#mean(tests_null_false<0.05) | |
## number of "true positives": | |
num_true_pos<-length(which(tests_null_false<0.05)) | |
## prop. of false positive results | |
## published in lifetime for scientist k: | |
prop_fps[k]<-num_false_pos/(num_false_pos+num_true_pos) | |
## prop. of true positives: | |
prop_tps[k]<-num_true_pos/(num_false_pos+num_true_pos) | |
} | |
hist(prop_fps,freq=FALSE,main="Distribution of | |
proportion of false positives") | |
hist(prop_tps,freq=FALSE,main="Distribution of | |
proportion of true positives") | |
summary(prop_fps) | |
summary(prop_tps) | |
## those scientists with one or more FP: | |
hist(scientist) | |
scientist<-scientist[which(scientist>0)] | |
length(scientist)/nscientists | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment