Skip to content

Instantly share code, notes, and snippets.

@hkilter
Forked from vasishth/falsepositivesversion2.R
Created September 27, 2015 11:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hkilter/b13832a43ef0c36201fb to your computer and use it in GitHub Desktop.
Save hkilter/b13832a43ef0c36201fb to your computer and use it in GitHub Desktop.
False positives in a lifetime [revised 23 Nov 2014; comments and corrections welcome]
## Our simulated scientist will declare
## significance only if he/she gets
## 2 replications with p<0.05:
stringent<-FALSE
## Set the above to FALSE if you want to
## have the scientist publish a single
## expt. as soon as it's significant:
#stringent <- FALSE
## num of scientists to simulate:
nscientists<-1000
## sample size in each expt:
n<-1000
## number of expts run by each scientist:
nexp<-200
## prob of sampling from a population
## where the null is true:
prob<- 0.20
## true mean:
## we use something low to reflect realistic
## studies done with low power:
effect_size <- 10
## standard deviation:
s<-50
## the above gives about 14% power:
power.t.test(n,delta=effect_size,sd=s)
## store proportion of false positives
## and true positives
## in one lifetime of 200 experiments:
prop_tps<-prop_fps<-rep(NA,nscientists)
## store count of false positives per scientist:
scientist<-rep(NA,nscientists)
do_stringent_test<-function(test1,n,mu=0,s){
##
if(test1>0.05){
return(test1)
} else {
## if result significant, do second
## test:
x<-rnorm(n,mean=mu,sd=s)
test2<-t.test(x)$p.value
## if both results significant:
if(test1<=0.05 && test2<=0.05){
## just choose one of the two p's:
return(test1)
} else {## declare n.s.:
return(0.07)
}
}
}
## simulate 1000 scientists, each with
## a lifetime plan of doing 200 experiments,
## with a 20% chance of sampling from a
## distribution where null is true:
for(k in 1:nscientists){
## vector for holding results (p-values):
tests_null_true<-c(0)
tests_null_false<-c(0)
for(i in 1:nexp){
## does the sample come from a
## distribution with null true?
null_true<-rbinom(n=1,size=1,p=prob)
if(null_true==1){
x<-rnorm(n,mean=0,sd=s)
test1<-t.test(x)$p.value
if(stringent==FALSE){
## do test and store result:
tests_null_true[i]<-test1
} else { ## if stringent==TRUE
result<-do_stringent_test(test1,n,0,s)
tests_null_true[i]<-result
}
} else {
## sample from distribution where
## null is false:
x<-rnorm(n,mean=effect_size,sd=s)
test1<-t.test(x)$p.value
if(stringent==FALSE){
tests_null_false[i]<-test1
} else {
## if stringent TRUE
result<-do_stringent_test(test1,n,effect_size,s)
tests_null_false[i]<-result
}
}
}
## Compute proportion of false positives
## for each scientist:
## remove NAs:
tests_null_true<-tests_null_true[!is.na(tests_null_true)]
tests_null_false<-tests_null_false[!is.na(tests_null_false)]
## Proportion of all null-true tests that ended up with a Type I error:
#mean(tests_null_true<0.05)
## number of false positives:
num_false_pos<-length(which(tests_null_true<0.05))
## track current scientist's
## false positives:
scientist[k]<-num_false_pos
## proportion of all null-false tests
## that correctly detected effect
## ("observed" power):
#mean(tests_null_false<0.05)
## number of "true positives":
num_true_pos<-length(which(tests_null_false<0.05))
## prop. of false positive results
## published in lifetime for scientist k:
prop_fps[k]<-num_false_pos/(num_false_pos+num_true_pos)
## prop. of true positives:
prop_tps[k]<-num_true_pos/(num_false_pos+num_true_pos)
}
hist(prop_fps,freq=FALSE,main="Distribution of
proportion of false positives")
hist(prop_tps,freq=FALSE,main="Distribution of
proportion of true positives")
summary(prop_fps)
summary(prop_tps)
## those scientists with one or more FP:
hist(scientist)
scientist<-scientist[which(scientist>0)]
length(scientist)/nscientists
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment