Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save praveenkumarpgiindia/bb9b5c7da80db5087f910236e2cf55c0 to your computer and use it in GitHub Desktop.
Save praveenkumarpgiindia/bb9b5c7da80db5087f910236e2cf55c0 to your computer and use it in GitHub Desktop.
figures to explain p-value misconceptions
options(scipen=999) #disable scientific notation for numbers
#Figure 1 & 2 (set to N <- 5000 for Figure 2)
# Set x-axis upper and lower scalepoint (to do: automate)
low_x<--1
high_x<-1
y_max<-2
#Set sample size per group and effect size d (assumes equal sample sizes per group)
N<-50 #sample size per group for indepndent t-test
d<-0.5 #please enter positive d only
#Calculate non-centrality parameter - equals t-value from sample
ncp<-d*sqrt(N/2)
ncp
#or Cumming, page 305
ncp<-d/(sqrt((1/N)+(1/N)))
#calc d-distribution
x=seq(low_x,high_x,length=10000) #create x values
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution
#Set max Y
y_max<-max(d_dist)+0.5
#create plot
#png(file="Fig1.png",width=4000,height=2500, res = 500)
par(bg = "white")
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null-hypothesis for N = ",N))
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1)
#lines(x,d_dist,col='black',type='l', lwd=2)
#add d = 0 line
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)
lines(x,d_dist,col='black',type='l', lwd=2)
#Line for 0
#abline(v=0)
#Line for critical t-value
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
#Add Type 1 error rate right
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(crit_d,10,length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5))
#Add Type 1 error rate left
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(-10, crit_d, length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5))
#dev.off()
#Figure 3 & 4 (set d <- 1.5 and high_x <- 1.5 for figure 4)-----
low_x<--1
high_x<-3
y_max<-2
#Set sample size per group and effect size d (assumes equal sample sizes per group)
N<-50 #sample size per group for indepndent t-test
d<-0.5 #please enter positive d only
#Calculate non-centrality parameter - equals t-value from sample
ncp<-d*sqrt(N/2)
ncp
#or Cumming, page 305
ncp<-d/(sqrt((1/N)+(1/N)))
#calc d-distribution
x=seq(low_x,high_x,length=10000) #create x values
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution
#Set max Y
y_max<-max(d_dist)+0.5
#create plot
#png(file="Fig1.png",width=4000,height=2500, res = 500)
par(bg = "white")
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null- and alternative hypothesis for N = ",N))
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1)
lines(x,d_dist,col='black',type='l', lwd=2)
#add d = 0 line
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)
lines(x,d_dist,col='darkgrey',type='l', lwd=2)
#Line for 0
#abline(v=0)
#Line for critical t-value
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
#Add Type 1 error rate right
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(crit_d,10,length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5))
#Add Type 1 error rate left
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(-10, crit_d, length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5))
#Add Type 2 error rate
#crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
#y=seq(-10,crit_d,length=10000)
#z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp=ncp)*sqrt(N/2)) #determine upperbounds polygon
#polygon(c(y,crit_d,crit_d),c(0,z,0),col=rgb(0, 0, 1,0.5))
#Lines for Cohen's d and Hedges' g
# abline(v=d, lwd=2, col=rgb(0, 0, 1,0.5)) #Line at d
# abline(v=d*(1-(3/(4*(N-2)-1))), lwd=2, col=rgb(1, 0, 0,0.5)) #Line at Hedges g
#segments(crit_d, 0, crit_d, y_max-0.8, col= 'black', lwd=2)
#text(crit_d, y_max-0.5, paste("Effects larger than d = ",round(crit_d,2),"will be statistically significant"), cex = 1)
#dev.off()
#Figure 5----
low_x<--1
high_x<-1.5
y_max<-2
#Set sample size per group and effect size d (assumes equal sample sizes per group)
N<-50 #sample size per group for indepndent t-test
d<-0.5 #please enter positive d only
#Calculate non-centrality parameter - equals t-value from sample
ncp<-d*sqrt(N/2)
ncp
#or Cumming, page 305
ncp<-d/(sqrt((1/N)+(1/N)))
#calc d-distribution
x=seq(low_x,high_x,length=10000) #create x values
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution
#Set max Y
y_max<-max(d_dist)+0.5
#create plot
#png(file="Fig1.png",width=4000,height=2500, res = 500)
par(bg = "white")
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null- and alternative hypothesis for N = ",N))
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1)
lines(x,d_dist,col='black',type='l', lwd=2)
#add d = 0 line
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)
lines(x,d_dist,col='darkgrey',type='l', lwd=2)
#Line for 0
#abline(v=0)
#Line for critical t-value
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
#Add Type 1 error rate right
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(crit_d,10,length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5))
#Add Type 1 error rate left
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(-10, crit_d, length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5))
#Add Type 2 error rate
#crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
#y=seq(-10,crit_d,length=10000)
#z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp=ncp)*sqrt(N/2)) #determine upperbounds polygon
#polygon(c(y,crit_d,crit_d),c(0,z,0),col=rgb(0, 0, 1,0.5))
#Lines for Cohen's d and Hedges' g
# abline(v=d, lwd=2, col=rgb(0, 0, 1,0.5)) #Line at d
# abline(v=d*(1-(3/(4*(N-2)-1))), lwd=2, col=rgb(1, 0, 0,0.5)) #Line at Hedges g
segments(0.35, 0, 0.35, 2.2, col= 'black', lwd=2)
text(0.35, 2.4, paste("Observed mean difference"), cex = 1)
#dev.off()
#Figure 6 & & (y_max <- 100 m----
# Set x-axis upper and lower scalepoint (to do: automate)
low_x<--1
high_x<-1
y_max<-2.5
#Set sample size per group and effect size d (assumes equal sample sizes per group)
N<-50 #sample size per group for indepndent t-test
d<-0.0 #please enter positive d only
#Calculate non-centrality parameter - equals t-value from sample
ncp<-d*sqrt(N/2)
ncp
#or Cumming, page 305
ncp<-d/(sqrt((1/N)+(1/N)))
#calc d-distribution
x=seq(low_x,high_x,length=10000) #create x values
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution
#create plot
#png(file="Fig1.png",width=4000,height=2500, res = 500)
par(bg = "white")
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null-hypothesis for N = ",N))
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1)
#lines(x,d_dist,col='black',type='l', lwd=2)
#add d = 0 line
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)
lines(x,d_dist,col='black',type='l', lwd=2)
#Line for 0
#abline(v=0)
#Line for critical t-value
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
#Add Type 1 error rate right
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(crit_d,10,length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5))
#Add Type 1 error rate left
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(-10, crit_d, length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5))
segments(0.5, 0, 0.5, 2.2, col= 'black', lwd=2)
text(0.5, 2.4, paste("Observed mean difference"), cex = 1)
#segments(0.01, 0, 0.01, 93, col= 'black', lwd=2)
#text(0.01, 96, paste("Observed mean difference"), cex = 1)
#dev.off()
#Figure 8-----
low_x<--0.5
high_x<-1
y_max<-5
#Set sample size per group and effect size d (assumes equal sample sizes per group)
N<-150 #sample size per group for indepndent t-test
d<-0.4175971 #please enter positive d only
#Calculate non-centrality parameter - equals t-value from sample
ncp<-d*sqrt(N/2)
ncp
#or Cumming, page 305
ncp<-d/(sqrt((1/N)+(1/N)))
#calc d-distribution
x=seq(low_x,high_x,length=10000) #create x values
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution
#Set max Y
#y_max<-max(d_dist)+0.5
#create plot
#png(file="Fig1.png",width=4000,height=2500, res = 500)
par(bg = "white")
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null- and alternative hypothesis for N = ",N))
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1)
lines(x,d_dist,col='black',type='l', lwd=2)
#add d = 0 line
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)
lines(x,d_dist,col='darkgrey',type='l', lwd=2)
#Line for 0
#abline(v=0)
#Line for critical t-value
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N)))
#Add Type 1 error rate right
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(crit_d,10,length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5))
#Add Type 1 error rate left
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(-10, crit_d, length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5))
#Add Type 2 error rate
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2)
y=seq(-10,crit_d,length=10000)
z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp=ncp)*sqrt(N/2)) #determine upperbounds polygon
polygon(c(y,crit_d,crit_d),c(0,z,0),col=rgb(0, 0, 1,0.5))
#Lines for Cohen's d and Hedges' g
# abline(v=d, lwd=2, col=rgb(0, 0, 1,0.5)) #Line at d
# abline(v=d*(1-(3/(4*(N-2)-1))), lwd=2, col=rgb(1, 0, 0,0.5)) #Line at Hedges g
segments(0.228, 0, 0.228, 3.9, col= 'black', lwd=2)
text(0.225, 4.4, paste("Observed mean difference"), cex = 1)
#dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment