benmarwick/BANOVA.r

## BANOVA.r
# Joseph R. Mihaljevic
# July 2013
# (Partial) Bayesian analysis of variance, accounting for heteroscedasticity
# Generate some artificial data:
# Normally distributed groups, but heteroscedastic
a <- rnorm(25, mean=8, sd=10)
b <- rnorm(50, mean=5, sd=2)
c <- rnorm(25, mean=3, sd=.1)
d <- rnorm(25, mean=11, sd=3)
e <- rnorm(50, mean=13, sd=2)

data <- data.frame(meas = c(a,b,c,d,e), group = rep(c(1,2,3,4,5), times=c(length(a), length(b), length(c),  length(d),  length(e))))
lst <- split(data, data$group)

nobs <- length(unique(data$group))
levels <- data$group

# JAGS needs a list of the data
jags_d <- list(y=data$meas,
               levels=levels,
               nobs=nobs)


# write model
cat("
    model{
    ## Specify hyperpriors and priors

    a1tau ~ dgamma(.1, .1)
    scale ~ dunif(0, 1)
    rate ~ dunif(0, 1)

    ## Specify likelihood for ANOVA model
    for (i in 1:nobs){

    y[i] ~ dnorm(mu[i], tau[levels[i]]) #this creates a different tau for each level
    mu[i] <- a1[levels[i]]

    }

    ## Specify the effects for each level
    for (j in 1:nobs){
    a1[j] ~ dnorm(0, a1tau)
    }
    ## Specify variance for each level
    for (j in 1:nobs){
    tau[j] ~ dgamma(scale, rate)
    }

    ## Specify effect contrasts:
    ## You should follow the rules of independent contrasts.
    e1v2 <- a1[1]-a1[2]
    e1v3 <- a1[1]-a1[3]
    e1v4 <- a1[1]-a1[4]
    e1v5 <- a1[1]-a1[5]
    e2v3 <- a1[2]-a1[3]
    e2v4 <- a1[2]-a1[4]
    e2v5 <- a1[2]-a1[5]
    e3v4 <- a1[3]-a1[4]
    e3v5 <- a1[3]-a1[5]
    e4v5 <- a1[4]-a1[5]


    # back-transfrom tau so that you can visualze the standard deviation
    for (i in 1:nobs){
    sd[i] <- 1/sqrt(tau[i])
    }
    }
    ",
    fill=TRUE, file="FakeANOVAuneqvar.txt")

library(rjags)

# initiate model
mod1 <- jags.model(file="FakeANOVAuneqvar.txt",
                   data=jags_d, n.chains=3, n.adapt=1000)


# simulate the posterior
out <- coda.samples(mod1, n.iter=2000,
                    variable.names=c("a1", "tau", "a1tau",
                                     "e1v2", "e1v3", "e1v4", "e1v5",
                                     "e2v3", "e2v4", "e2v5",
                                     "e3v4", "e3v5",
                                     "e4v5",
                                     "sd"))

str(out) # view the structure of the output object
summary(out)
windows()
plot(out)

#Visualize the means
plot(out[, c("a1[1]","a1[2]","a1[3]")]) #you can plot the variables individually this way
# Notice the differences in variance! (Look at the x-axis limits)

#Visualize the standard deviation estimates
plot(out[, c("sd[1]","sd[2]","sd[3]")])
#Pretty accurate given our initial samples!

#Now look at the pairwise contrasts
plot(out[, c("e1v2", "e3v5")])
#Notice that the difference between group 1 and 2 includes zero with relatively high
#probability. Thus, these groups are not different in mean effect, accounting for
#differences in group-level variance.
# Groups 1&2 (their average) are clearly different from group 3.


# Let's make some fancier graphs!
library(ggmcmc)
all_data <- ggs(out) #Create a data frame of all the simulations
str(all_data) #Look at the structure

#Subset out all of the means
mus <- subset(all_data, Parameter=="a1[1]"|Parameter=="a1[2]"|Parameter=="a1[3]",
              select=c(Parameter, value))
head(mus)

library(ggplot2)

#Re-label the group names so that it looks better on the plot
levels(mus$Parameter)[1]
for(i in 1:3){
  levels(mus$Parameter)[i] <- paste("Mean of Group", i, sep=" ")
}
means <- ggplot(mus, aes(x=value))+
  geom_density()+
  theme_bw()+
  facet_wrap(~Parameter, nrow=1, scales="free")+
  labs(x="Mean Value", y="Density")
quartz(height=3, width=5.5)
plot(means)

#Look at the standard deviations
sds <- subset(all_data, Parameter=="sd[1]"|Parameter=="sd[2]"|Parameter=="sd[3]",
              select=c(Parameter, value))
head(sds)
levels(sds$Parameter)
for(i in 7:9){
  levels(sds$Parameter)[i] <- paste("Std. Dev., Group", i-6, sep=" ")
}
stddevs <- ggplot(sds, aes(x=value))+
  geom_density()+
  theme_bw()+
  facet_wrap(~Parameter, nrow=1, scales="free")+
  labs(x="Std. Dev. Value", y="Density")
quartz(height=3, width=5.5)
plot(stddevs)

#Look at the differences in group means

diffs <- subset(all_data, Parameter=="e1v2"|Parameter=="e3v4",
                select=c(Parameter, value))
head(diffs)
levels(diffs$Parameter)

levels(diffs$Parameter)[5] <- "Groups 1 v 2"
levels(diffs$Parameter)[6] <- "Average of Groups 1 & 2 v 3"
differences <- ggplot(diffs, aes(x=value))+
  geom_density()+
  theme_bw()+
  facet_wrap(~Parameter, nrow=1, scales="free")+
  labs(x="Effects (Differences)", y="Density")
quartz(height=3, width=5.5)
plot(differences)


# NOTE:
# This is a partial analysis. You will also have to do some model validation!

# freq ANOVA
aovresult = aov( meas ~ as.factor(group) , data = data ) # NHST ANOVA

print( summary( aovresult ) )

print( model.tables( aovresult , "means" ) , digits=4 )

boxplot( meas ~ as.factor(group) , data = data , cex.axis=1.25 , ylab="Y")

print( TukeyHSD( aovresult) )
plot( TukeyHSD( aovresult))
	# Joseph R. Mihaljevic
	# July 2013
	# (Partial) Bayesian analysis of variance, accounting for heteroscedasticity
	# Generate some artificial data:
	# Normally distributed groups, but heteroscedastic
	a <- rnorm(25, mean=8, sd=10)
	b <- rnorm(50, mean=5, sd=2)
	c <- rnorm(25, mean=3, sd=.1)
	d <- rnorm(25, mean=11, sd=3)
	e <- rnorm(50, mean=13, sd=2)

	data <- data.frame(meas = c(a,b,c,d,e), group = rep(c(1,2,3,4,5), times=c(length(a), length(b), length(c), length(d), length(e))))
	lst <- split(data, data$group)

	nobs <- length(unique(data$group))
	levels <- data$group

	# JAGS needs a list of the data
	jags_d <- list(y=data$meas,
	levels=levels,
	nobs=nobs)


	# write model
	cat("
	model{
	## Specify hyperpriors and priors

	a1tau ~ dgamma(.1, .1)
	scale ~ dunif(0, 1)
	rate ~ dunif(0, 1)

	## Specify likelihood for ANOVA model
	for (i in 1:nobs){

	y[i] ~ dnorm(mu[i], tau[levels[i]]) #this creates a different tau for each level
	mu[i] <- a1[levels[i]]

	}

	## Specify the effects for each level
	for (j in 1:nobs){
	a1[j] ~ dnorm(0, a1tau)
	}
	## Specify variance for each level
	for (j in 1:nobs){
	tau[j] ~ dgamma(scale, rate)
	}

	## Specify effect contrasts:
	## You should follow the rules of independent contrasts.
	e1v2 <- a1[1]-a1[2]
	e1v3 <- a1[1]-a1[3]
	e1v4 <- a1[1]-a1[4]
	e1v5 <- a1[1]-a1[5]
	e2v3 <- a1[2]-a1[3]
	e2v4 <- a1[2]-a1[4]
	e2v5 <- a1[2]-a1[5]
	e3v4 <- a1[3]-a1[4]
	e3v5 <- a1[3]-a1[5]
	e4v5 <- a1[4]-a1[5]


	# back-transfrom tau so that you can visualze the standard deviation
	for (i in 1:nobs){
	sd[i] <- 1/sqrt(tau[i])
	}
	}
	",
	fill=TRUE, file="FakeANOVAuneqvar.txt")

	library(rjags)

	# initiate model
	mod1 <- jags.model(file="FakeANOVAuneqvar.txt",
	data=jags_d, n.chains=3, n.adapt=1000)


	# simulate the posterior
	out <- coda.samples(mod1, n.iter=2000,
	variable.names=c("a1", "tau", "a1tau",
	"e1v2", "e1v3", "e1v4", "e1v5",
	"e2v3", "e2v4", "e2v5",
	"e3v4", "e3v5",
	"e4v5",
	"sd"))

	str(out) # view the structure of the output object
	summary(out)
	windows()
	plot(out)

	#Visualize the means
	plot(out[, c("a1[1]","a1[2]","a1[3]")]) #you can plot the variables individually this way
	# Notice the differences in variance! (Look at the x-axis limits)

	#Visualize the standard deviation estimates
	plot(out[, c("sd[1]","sd[2]","sd[3]")])
	#Pretty accurate given our initial samples!

	#Now look at the pairwise contrasts
	plot(out[, c("e1v2", "e3v5")])
	#Notice that the difference between group 1 and 2 includes zero with relatively high
	#probability. Thus, these groups are not different in mean effect, accounting for
	#differences in group-level variance.
	# Groups 1&2 (their average) are clearly different from group 3.


	# Let's make some fancier graphs!
	library(ggmcmc)
	all_data <- ggs(out) #Create a data frame of all the simulations
	str(all_data) #Look at the structure

	#Subset out all of the means
	mus <- subset(all_data, Parameter=="a1[1]"\|Parameter=="a1[2]"\|Parameter=="a1[3]",
	select=c(Parameter, value))
	head(mus)

	library(ggplot2)

	#Re-label the group names so that it looks better on the plot
	levels(mus$Parameter)[1]
	for(i in 1:3){
	levels(mus$Parameter)[i] <- paste("Mean of Group", i, sep=" ")
	}
	means <- ggplot(mus, aes(x=value))+
	geom_density()+
	theme_bw()+
	facet_wrap(~Parameter, nrow=1, scales="free")+
	labs(x="Mean Value", y="Density")
	quartz(height=3, width=5.5)
	plot(means)

	#Look at the standard deviations
	sds <- subset(all_data, Parameter=="sd[1]"\|Parameter=="sd[2]"\|Parameter=="sd[3]",
	select=c(Parameter, value))
	head(sds)
	levels(sds$Parameter)
	for(i in 7:9){
	levels(sds$Parameter)[i] <- paste("Std. Dev., Group", i-6, sep=" ")
	}
	stddevs <- ggplot(sds, aes(x=value))+
	geom_density()+
	theme_bw()+
	facet_wrap(~Parameter, nrow=1, scales="free")+
	labs(x="Std. Dev. Value", y="Density")
	quartz(height=3, width=5.5)
	plot(stddevs)

	#Look at the differences in group means

	diffs <- subset(all_data, Parameter=="e1v2"\|Parameter=="e3v4",
	select=c(Parameter, value))
	head(diffs)
	levels(diffs$Parameter)

	levels(diffs$Parameter)[5] <- "Groups 1 v 2"
	levels(diffs$Parameter)[6] <- "Average of Groups 1 & 2 v 3"
	differences <- ggplot(diffs, aes(x=value))+
	geom_density()+
	theme_bw()+
	facet_wrap(~Parameter, nrow=1, scales="free")+
	labs(x="Effects (Differences)", y="Density")
	quartz(height=3, width=5.5)
	plot(differences)


	# NOTE:
	# This is a partial analysis. You will also have to do some model validation!

	# freq ANOVA
	aovresult = aov( meas ~ as.factor(group) , data = data ) # NHST ANOVA

	print( summary( aovresult ) )

	print( model.tables( aovresult , "means" ) , digits=4 )

	boxplot( meas ~ as.factor(group) , data = data , cex.axis=1.25 , ylab="Y")

	print( TukeyHSD( aovresult) )
	plot( TukeyHSD( aovresult))