Skip to content

Instantly share code, notes, and snippets.

# Lakens/MetaAnalyticThinking.R Last active Dec 7, 2015

People find it difficult to think about random variation. Our mind is more strongly geared towards recognizing patterns than randomness. In this blogpost, you can practice with getting used to what random variation looks like, how to reduce it by running well-powered studies, and how to meta-analyze multiple small studies.
 # # # # # # # # # # # #Initial settings---- # # # # # # # # # # # if(!require(ggplot2)){install.packages('ggplot2')} library(ggplot2) if(!require(MBESS)){install.packages('MBESS')} library(MBESS) if(!require(pwr)){install.packages('pwr')} library(pwr) if(!require(meta)){install.packages('meta')} library(meta) if(!require(metafor)){install.packages('metafor')} library(metafor) options(digits=10,scipen=999) #Set color palette cbbPalette<-c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") # # # # # # # # # # #Assignment 1---- # # # # # # # # # # #Simulate one group n=10 #set sample size x<-rnorm(n = n, mean = 100, sd = 15) #create sample from normal distribution #plot data ggplot(as.data.frame(x), aes(x)) + geom_histogram(colour="black", fill="grey", aes(y=..density..), binwidth=2) + stat_function(fun=dnorm, args=c(mean=100,sd=15), size=1, color="red", lty=2) + # geom_density(fill=NA, colour="black", size = 1) + xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) + theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) + geom_vline(xintercept=mean(x), colour="gray20", linetype="dashed") + coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) + annotate("text", x = mean(x), y = 0.02, label = paste("Mean = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep="")) # # # # # # # # # # #Assignment 2---- # # # # # # # # # # nSims <- 100000 #number of simulated experiments n<-10 #sample size M<-110 #Mean of the simulated data SD<-15 #SD of the simulated data p <-numeric(nSims) #set up empty container for all simulated p-values for(i in 1:nSims){ #for each simulated experiment x<-rnorm(n = n, mean = M, sd = SD) # z<-t.test(x, mu=100) #perform the t-test against mu (set to value you want to test against) p[i]<-z\$p.value #get the p-value and store it } (sum(p < 0.05)/nSims)*100 #power (in percentage) #plot pvalue distribution ggplot(as.data.frame(p), aes(p)) + geom_histogram(colour="black", fill="grey", binwidth = 0.05) #Perform power analysis for one sample t-test pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="one.sample",alternative="two.sided") # # # # # # # # # # #Assignment 3---- # # # # # # # # # # n1<-10 #set the sample size in group 1 n2<-10 #set the sample size in group 2 mx<-100 #set the mean in group 1 sdx<-15 #set the standard deviation in group 1 my<-106 #set the mean in group 2 sdy<-15 #set the standard deviation in group 2 #randomly draw data x<-rnorm(n = n1, mean = mx, sd = sdx) y<-rnorm(n = n2, mean = my, sd = sdy) DV<-c(x,y) #combine the two samples into a single variable IV<-as.factor(c(rep("1", n1), rep("2", n2))) #create the independent variable (1 and 2) dataset<-as.data.frame(cbind(IV,DV)) #create a dataframe (to make the plot) t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test #plot graph ggplot(dataset, aes(DV, fill = as.factor(IV))) + geom_histogram(alpha=0.4, binwidth=2, position="dodge", colour="black", aes(y = ..density..)) + scale_fill_manual(values=cbbPalette, name = "Condition") + stat_function(fun=dnorm, args=c(mean=mx,sd=sdx), size=1, color="#E69F00", lty=2) + stat_function(fun=dnorm, args=c(mean=my,sd=sdy), size=1, color="#56B4E9", lty=2) + xlab("IQ") + ylab("number of people") + ggtitle("Data") + theme_bw(base_size=20) + theme(panel.grid.major.x = element_blank(), axis.text.y = element_blank(), panel.grid.minor.x = element_blank()) + geom_vline(xintercept=mean(x), colour="black", linetype="dashed", size=1) + geom_vline(xintercept=mean(y), colour="black", linetype="dashed", size=1) + coord_cartesian(xlim=c(50,150)) + scale_x_continuous(breaks=c(50,60,70,80,90,100,110,120,130,140,150)) + annotate("text", x = 70, y = 0.06, label = paste("Mean X = ",round(mean(x)),"\n","SD = ",round(sd(x)),sep="")) + annotate("text", x = 130, y = 0.06, label = paste("Mean Y = ",round(mean(y)),"\n","SD = ",round(sd(y)),sep="")) #Perform power analysis for two-sample t-test pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="two.sample",alternative="two.sided") #Perform power analysis for dependent sample t-test pwr.t.test(d=0.6667,n=10,sig.level=0.05,type="paired",alternative="two.sided") # # # # # # # # # # #Assignment 4---- # # # # # # # # # # n <- 30 #set sample size cor.true <- 0.55 #set true correlation x <- rnorm(n = n, mean = 100, sd = 15) #This formula creates a second sample with identical mean and sd, which is correlated with the first sample. y <- (cor.true * (x-(mean(x))) + sqrt(1 - cor.true^2) * rnorm(n = n, mean = 100, sd = 15)) + 100*(1-sqrt(1 - cor.true^2)) dataset<-as.data.frame(cbind(x,y)) #Plot graph ggplot(dataset, aes(x=x, y=y)) + geom_point(size=2) + # Use hollow circles geom_smooth(method=lm, colour="#E69F00", size = 1, fill = "#56B4E9") + # Add linear regression line geom_abline(intercept = (100-(cor.true*100)), slope=cor.true, colour="black", linetype="dotted", size=1) + coord_cartesian(xlim=c(40,160), ylim=c(40,160)) + scale_x_continuous(breaks=c(40, 50,60,70,80,90,100,110,120,130,140,150,160)) + scale_y_continuous(breaks=c(40, 50,60,70,80,90,100,110,120,130,140,150,160)) + xlab("IQ twin 1") + ylab("IQ twin 2") + ggtitle(paste("Correlation = ",round(cor(x,y),digits=2),sep="")) + theme_bw(base_size=20) + theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) # # # # # # # # # # #Assignment 5---- # # # # # # # # # # #power analysis for correlation n <- 30 cor.true <- 0.55 nSims<-100000 #This formula creates a second sample with identical mean and sd, which is correlated with the first sample. p <-numeric(nSims) #set up empty container for all simulated p-values for(i in 1:nSims){ #for each simulated experiment x <- rnorm(n = n, mean = 100, sd = 15) y <- (cor.true * (x-(mean(x))) + sqrt(1 - cor.true^2) * rnorm(n = n, mean = 100, sd = 15)) + 100*(1-sqrt(1 - cor.true^2)) z<-cor.test(x,y) #perform the correlation test p[i]<-z\$p.value #get the p-value and store it } (sum(p < 0.05)/nSims)*100 #power (in percentage) #plot pvalue distribution ggplot(as.data.frame(p), aes(p)) + geom_histogram(colour="black", fill="grey", binwidth = 0.05) #Perform power analysis pwr.r.test(r=0.55,n=30,sig.level=0.05,alternative="two.sided") # # # # # # # # # # #Assignment 6---- # # # # # # # # # # #set.seed(1000) mx<-106 #Set mean in sample 1 sdx<-15 #Set standard deviation in sample 1 my<-100 #Set mean in sample 2 sdy<-15 #Set standard deviation in sample 2 nSims <- 1 #number of simulated experiments es.d <-numeric(nSims) #set up empty container for all simulated ES (cohen's d) SSn1 <-numeric(nSims) #set up empty container for random sample sizes group 1 SSn2 <-numeric(nSims) #set up empty container for random sample sizes group 2 for(i in 1:nSims){ #for each simulated experiment SampleSize<-sample(20:50, 1) #randomly draw a sample between 20 and 50 x<-rnorm(n = SampleSize, mean = mx, sd = sdx) #produce simulated participants y<-rnorm(n = SampleSize, mean = my, sd = sdy) #produce simulated participants SSn1[i]<-SampleSize #save sample size group 1 SSn2[i]<-SampleSize #save sample size group 2 es.d[i]<-smd(Mean.1= mean(x), Mean.2=mean(y), s.1=sd(x), s.2=sd(y), n.1=SampleSize, n.2=SampleSize, Unbiased=TRUE) #Use MBESS to calc d unbiased (Hedges g) } #Insert effect sizes and sample sizes n1<-c(SSn1) n2<-c(SSn2) J<-1-3/(4*( SSn1+ SSn2-2)-1) #correction for bias es.d.v <-(((SSn1+SSn2)/(SSn1*SSn2))+(es.d^2/(2*(SSn1+SSn2))))*J^2 #Some formulas add *((n+n)/(n+n-2)) - we don't so that results are consistent with meta-analysis #Calculate Standard Errors ES d.se<-sqrt(es.d.v) #calculate meta-analysis meta<-metagen(es.d, d.se) meta #show meta-analysis #compare against a t-test t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test to compare against meta-analysis forest(meta, leftcols=c("studlab"), studlab=TRUE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.i="black", fontsize=14) # # # # # # # # # # #Assignment 7---- # # # # # # # # # # #Small scale meta-analysis---- #Insert effect sizes and sample sizes es.d<-c(0.44, 0.7, 0.28, 0.35) n1<-c(60, 35, 23, 80) n2<-c(60, 36, 25, 80) #Calculate Variance ES J<-1-3/(4*(n1+n2-2)-1) #Correction for bias es.d.v <-(((n1+n2)/(n1*n2))+(es.d^2/(2*(n1+n2))))*J^2 #Calculate Standard Errors ES d.se<-c(sqrt(es.d.v)) meta<-metagen(es.d, d.se, comb.fixed=FALSE) meta #Forest Plot #If you add studies, make sure to match number of labels in studlab variable. forest(meta, leftcols=c("studlab"), studlab=TRUE, hetstat=FALSE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.diamond="#56B4E9", col.diamond.lines="black", col.i="black", fontsize=14) # # # # # # # # # # #Assignment 8---- # # # # # # # # # # #set.seed(1000) mx<-106 #Set mean in sample 1 sdx<-15 #Set standard deviation in sample 1 my<-100 #Set mean in sample 2 sdy<-15 #Set standard deviation in sample 2 nSims <- 8 #number of simulated experiments es.d <-numeric(nSims) #set up empty container for all simulated ES (cohen's d) SSn1 <-numeric(nSims) #set up empty container for random sample sizes group 1 SSn2 <-numeric(nSims) #set up empty container for random sample sizes group 2 for(i in 1:nSims){ #for each simulated experiment SampleSize<-sample(100:150, 1) #randomly draw a sample between 20 and 50 x<-rnorm(n = SampleSize, mean = mx, sd = sdx) #produce simulated participants y<-rnorm(n = SampleSize, mean = my, sd = sdy) #produce simulated participants SSn1[i]<-SampleSize #save sample size group 1 SSn2[i]<-SampleSize #save sample size group 2 es.d[i]<-smd(Mean.1= mean(x), Mean.2=mean(y), s.1=sd(x), s.2=sd(y), n.1=SampleSize, n.2=SampleSize, Unbiased=TRUE) #Use MBESS to calc d unbiased (Hedges g) } #Insert effect sizes and sample sizes n1<-c(SSn1) n2<-c(SSn2) J<-1-3/(4*( SSn1+ SSn2-2)-1) #correction for bias es.d.v <-(((SSn1+SSn2)/(SSn1*SSn2))+(es.d^2/(2*(SSn1+SSn2))))*J^2 #Some formulas add *((n+n)/(n+n-2)) - we don't so that results are consistent with meta-analysis #Calculate Standard Errors ES d.se<-sqrt(es.d.v) #calculate meta-analysis meta<-metagen(es.d, d.se) meta #show meta-analysis #compare against a t-test t.test(x, y, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) #t-test to compare against meta-analysis forest(meta, leftcols=c("studlab"), studlab=TRUE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.i="black", fontsize=14) # # # # # # # # # # #Assignment 9---- # # # # # # # # # # #Small scale meta-analysis---- #Insert effect sizes and sample sizes es.d<-c(0.44, 0.7, 0.28, 0.35) n1<-c(60, 35, 23, 80) n2<-c(60, 36, 25, 80) #Calculate Variance ES J<-1-3/(4*(n1+n2-2)-1) #Correction for bias es.d.v <-(((n1+n2)/(n1*n2))+(es.d^2/(2*(n1+n2))))*J^2 #Calculate Standard Errors ES d.se<-c(sqrt(es.d.v)) meta<-metagen(es.d, d.se, comb.fixed=FALSE) meta #Forest Plot #If you add studies, make sure to match number of labels in studlab variable. forest(meta, leftcols=c("studlab"), studlab=TRUE, hetstat=FALSE, xlab="Hedges' g", col.square="#E69F00", col.square.lines="black", col.diamond="#56B4E9", col.diamond.lines="black", col.i="black", fontsize=14) funnel(meta, lty.fixed = 3, lty.random = 5, studlab=TRUE, xlim = c(-1, 1), xlab = "Effect size", ylab = "Standard Error (SE)", cex = .75, col = 1, bg = 1, contour=c(0.00001, 0.95), col.contour=c("grey", "white"), main = "All Studies")
to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.