Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save benmarwick/9709811 to your computer and use it in GitHub Desktop.
Save benmarwick/9709811 to your computer and use it in GitHub Desktop.
# Joseph R. Mihaljevic
# July 2013
# (Partial) Bayesian analysis of variance, accounting for heteroscedasticity
# Generate some artificial data:
# Normally distributed groups, but heteroscedastic
a <- rnorm(25, mean=8, sd=10)
b <- rnorm(50, mean=5, sd=2)
c <- rnorm(25, mean=3, sd=.1)
d <- rnorm(25, mean=11, sd=3)
e <- rnorm(50, mean=13, sd=2)
data <- data.frame(meas = c(a,b,c,d,e), group = rep(c(1,2,3,4,5), times=c(length(a), length(b), length(c), length(d), length(e))))
lst <- split(data, data$group)
nobs <- length(unique(data$group))
levels <- data$group
# JAGS needs a list of the data
jags_d <- list(y=data$meas,
levels=levels,
nobs=nobs)
# write model
cat("
model{
## Specify hyperpriors and priors
a1tau ~ dgamma(.1, .1)
scale ~ dunif(0, 1)
rate ~ dunif(0, 1)
## Specify likelihood for ANOVA model
for (i in 1:nobs){
y[i] ~ dnorm(mu[i], tau[levels[i]]) #this creates a different tau for each level
mu[i] <- a1[levels[i]]
}
## Specify the effects for each level
for (j in 1:nobs){
a1[j] ~ dnorm(0, a1tau)
}
## Specify variance for each level
for (j in 1:nobs){
tau[j] ~ dgamma(scale, rate)
}
## Specify effect contrasts:
## You should follow the rules of independent contrasts.
e1v2 <- a1[1]-a1[2]
e1v3 <- a1[1]-a1[3]
e1v4 <- a1[1]-a1[4]
e1v5 <- a1[1]-a1[5]
e2v3 <- a1[2]-a1[3]
e2v4 <- a1[2]-a1[4]
e2v5 <- a1[2]-a1[5]
e3v4 <- a1[3]-a1[4]
e3v5 <- a1[3]-a1[5]
e4v5 <- a1[4]-a1[5]
# back-transfrom tau so that you can visualze the standard deviation
for (i in 1:nobs){
sd[i] <- 1/sqrt(tau[i])
}
}
",
fill=TRUE, file="FakeANOVAuneqvar.txt")
library(rjags)
# initiate model
mod1 <- jags.model(file="FakeANOVAuneqvar.txt",
data=jags_d, n.chains=3, n.adapt=1000)
# simulate the posterior
out <- coda.samples(mod1, n.iter=2000,
variable.names=c("a1", "tau", "a1tau",
"e1v2", "e1v3", "e1v4", "e1v5",
"e2v3", "e2v4", "e2v5",
"e3v4", "e3v5",
"e4v5",
"sd"))
str(out) # view the structure of the output object
summary(out)
windows()
plot(out)
#Visualize the means
plot(out[, c("a1[1]","a1[2]","a1[3]")]) #you can plot the variables individually this way
# Notice the differences in variance! (Look at the x-axis limits)
#Visualize the standard deviation estimates
plot(out[, c("sd[1]","sd[2]","sd[3]")])
#Pretty accurate given our initial samples!
#Now look at the pairwise contrasts
plot(out[, c("e1v2", "e3v5")])
#Notice that the difference between group 1 and 2 includes zero with relatively high
#probability. Thus, these groups are not different in mean effect, accounting for
#differences in group-level variance.
# Groups 1&2 (their average) are clearly different from group 3.
# Let's make some fancier graphs!
library(ggmcmc)
all_data <- ggs(out) #Create a data frame of all the simulations
str(all_data) #Look at the structure
#Subset out all of the means
mus <- subset(all_data, Parameter=="a1[1]"|Parameter=="a1[2]"|Parameter=="a1[3]",
select=c(Parameter, value))
head(mus)
library(ggplot2)
#Re-label the group names so that it looks better on the plot
levels(mus$Parameter)[1]
for(i in 1:3){
levels(mus$Parameter)[i] <- paste("Mean of Group", i, sep=" ")
}
means <- ggplot(mus, aes(x=value))+
geom_density()+
theme_bw()+
facet_wrap(~Parameter, nrow=1, scales="free")+
labs(x="Mean Value", y="Density")
quartz(height=3, width=5.5)
plot(means)
#Look at the standard deviations
sds <- subset(all_data, Parameter=="sd[1]"|Parameter=="sd[2]"|Parameter=="sd[3]",
select=c(Parameter, value))
head(sds)
levels(sds$Parameter)
for(i in 7:9){
levels(sds$Parameter)[i] <- paste("Std. Dev., Group", i-6, sep=" ")
}
stddevs <- ggplot(sds, aes(x=value))+
geom_density()+
theme_bw()+
facet_wrap(~Parameter, nrow=1, scales="free")+
labs(x="Std. Dev. Value", y="Density")
quartz(height=3, width=5.5)
plot(stddevs)
#Look at the differences in group means
diffs <- subset(all_data, Parameter=="e1v2"|Parameter=="e3v4",
select=c(Parameter, value))
head(diffs)
levels(diffs$Parameter)
levels(diffs$Parameter)[5] <- "Groups 1 v 2"
levels(diffs$Parameter)[6] <- "Average of Groups 1 & 2 v 3"
differences <- ggplot(diffs, aes(x=value))+
geom_density()+
theme_bw()+
facet_wrap(~Parameter, nrow=1, scales="free")+
labs(x="Effects (Differences)", y="Density")
quartz(height=3, width=5.5)
plot(differences)
# NOTE:
# This is a partial analysis. You will also have to do some model validation!
# freq ANOVA
aovresult = aov( meas ~ as.factor(group) , data = data ) # NHST ANOVA
print( summary( aovresult ) )
print( model.tables( aovresult , "means" ) , digits=4 )
boxplot( meas ~ as.factor(group) , data = data , cex.axis=1.25 , ylab="Y")
print( TukeyHSD( aovresult) )
plot( TukeyHSD( aovresult))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment