praveenkumarpgiindia/CI_vs_CP.R

## CI_vs_CP.R
if(!require(ggplot2)){install.packages('ggplot2')}
library(ggplot2)

n=20 #set sample size
nSims<-100000 #set number of simulations

x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution

#95%CI
CIU<-mean(x)+qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
CIL<-mean(x)-qt(0.975, df = n-1)*sd(x)*sqrt(1/n)

#plot data
#png(file="CI_mean.png",width=2000,height=2000, res = 300)
ggplot(as.data.frame(x), aes(x))  +
  geom_rect(aes(xmin=CIL, xmax=CIU, ymin=0, ymax=Inf), fill="#E69F00") +
  geom_histogram(colour="black", fill="grey", aes(y=..density..), binwidth=2) +
  xlab("IQ") + ylab("number of people")  + ggtitle("Data") + theme_bw(base_size=20) +
  theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) +
  geom_vline(xintercept=100, colour="black", linetype="dashed", size=1) +
  coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) +
  annotate("text", x = mean(x), y = 0.02, label = paste("Mean = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep=""), size=6.5)
#dev.off()

#Simulate Confidence Intervals
CIU_sim<-numeric(nSims)
CIL_sim<-numeric(nSims)
mean_sim<-numeric(nSims)

for(i in 1:nSims){ #for each simulated experiment
  x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution
  CIU_sim[i]<-mean(x)+qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
  CIL_sim[i]<-mean(x)-qt(0.975, df = n-1)*sd(x)*sqrt(1/n)
  mean_sim[i]<-mean(x) #store means of each sample
}

#Save only those simulations where the true value was inside the 95% CI
CIU_sim<-CIU_sim[CIU_sim<100]
CIL_sim<-CIL_sim[CIL_sim>100]

cat((100*(1-(length(CIU_sim)/nSims+length(CIL_sim)/nSims))),"% of the 95% confidence intervals contained the true mean")

#Calculate how many times the observed mean fell within the 95% CI of the original study
mean_sim<-mean_sim[mean_sim>CIL&mean_sim<CIU]
cat("The capture percentage for the plotted study, or the % of values within the observed confidence interval from",CIL,"to",CIU,"is:",100*length(mean_sim)/nSims,"%")
	if(!require(ggplot2)){install.packages('ggplot2')}
	library(ggplot2)

	n=20 #set sample size
	nSims<-100000 #set number of simulations

	x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution

	#95%CI
	CIU<-mean(x)+qt(0.975, df = n-1)sd(x)sqrt(1/n)
	CIL<-mean(x)-qt(0.975, df = n-1)sd(x)sqrt(1/n)

	#plot data
	#png(file="CI_mean.png",width=2000,height=2000, res = 300)
	ggplot(as.data.frame(x), aes(x)) +
	geom_rect(aes(xmin=CIL, xmax=CIU, ymin=0, ymax=Inf), fill="#E69F00") +
	geom_histogram(colour="black", fill="grey", aes(y=..density..), binwidth=2) +
	xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) +
	theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) +
	geom_vline(xintercept=100, colour="black", linetype="dashed", size=1) +
	coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) +
	annotate("text", x = mean(x), y = 0.02, label = paste("Mean = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep=""), size=6.5)
	#dev.off()

	#Simulate Confidence Intervals
	CIU_sim<-numeric(nSims)
	CIL_sim<-numeric(nSims)
	mean_sim<-numeric(nSims)

	for(i in 1:nSims){ #for each simulated experiment
	x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution
	CIU_sim[i]<-mean(x)+qt(0.975, df = n-1)sd(x)sqrt(1/n)
	CIL_sim[i]<-mean(x)-qt(0.975, df = n-1)sd(x)sqrt(1/n)
	mean_sim[i]<-mean(x) #store means of each sample
	}

	#Save only those simulations where the true value was inside the 95% CI
	CIU_sim<-CIU_sim[CIU_sim<100]
	CIL_sim<-CIL_sim[CIL_sim>100]

	cat((100*(1-(length(CIU_sim)/nSims+length(CIL_sim)/nSims))),"% of the 95% confidence intervals contained the true mean")

	#Calculate how many times the observed mean fell within the 95% CI of the original study
	mean_sim<-mean_sim[mean_sim>CIL&mean_sim<CIU]
	cat("The capture percentage for the plotted study, or the % of values within the observed confidence interval from",CIL,"to",CIU,"is:",100*length(mean_sim)/nSims,"%")