Skip to content

Instantly share code, notes, and snippets.

@gjkerns

gjkerns/visualizeANOVA.R Secret

Created Jan 1, 2012
Embed
What would you like to do?
Visualize standard balanced ANOVA
library(manipulate) # need RStudio for this version
# see other version below without sliders and/or no RStudio
# otherwise, should be copy-paste operation
# to restart, only need to re-run manipulate() command
anovaplot <- function(n,u,v,w){
par(mfrow = c(2,2))
# generate data
score <- rnorm(n*3, mean = c(u,v,w))
group <- factor(rep(LETTERS[1:3], n))
xbot <- min(c(u,v,w)) - 3
xtop <- max(c(u,v,w)) + 3
plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45),
type="n", xlab="score", ylab="density")
f <- function(x){dnorm(x, mean = u)}
curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red")
f <- function(x){dnorm(x, mean = v)}
curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green")
f <- function(x){dnorm(x, mean = w)}
curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue")
rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE)
rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE)
rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE)
# between-within plot
B <- anova(lm(score ~ group))$"Mean Sq"
names(B) <- c("between", "within")
barplot(B, main = "Between vs. within")
# stripcharts
stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"),
ylab = "group", pch = 1, main = "The data")
# f distn plots
means <- c(u,v,w)
cntr <- means - mean(means)
lambda <- n*sum(cntr^2)
f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1))
g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns",
xlab = "F-ratio", ylab = "density")
curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE)
fstat <- summary(lm(score ~ group))$fstatistic[1]
abline(v = fstat, lty = 2, col = "green", lwd = 3)
par(mfrow = c(1,1))
}
# below is all that's needed to restart the plot
manipulate(anovaplot(n,u,v,w),
n = slider(4, 40, step = 2, initial = 4, label = "Sample size per group"),
u = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group A"),
v = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group B"),
w = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group C")
)
# this one is for *not* with RStudio, and/or for finer control
# see the other one for (n,u,v,w) as sliders (requires RStudio)
# this one should be basically a copy/paste operation
# preface with set.seed(something) to manage randomness
n <- 20 # Sample size per group
u <- -1 # Mean for group A
v <- 0 # Mean for group B
w <- 1 # Mean for group C
par(mfrow = c(2,2))
# generate data
score <- rnorm(n*3, mean = c(u,v,w))
group <- factor(rep(LETTERS[1:3], n))
xbot <- min(c(u,v,w)) - 3
xtop <- max(c(u,v,w)) + 3
plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45),
type="n", xlab="score", ylab="density")
f <- function(x){dnorm(x, mean = u)}
curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red")
f <- function(x){dnorm(x, mean = v)}
curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green")
f <- function(x){dnorm(x, mean = w)}
curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue")
rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE)
rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE)
rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE)
# between-within plot
B <- anova(lm(score ~ group))$"Mean Sq"
names(B) <- c("between", "within")
barplot(B, main = "Between vs. within")
# stripcharts
stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"),
ylab = "group", pch = 1, main = "The data")
# f distn plots
means <- c(u,v,w)
cntr <- means - mean(means)
lambda <- n*sum(cntr^2)
f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1))
g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns",
xlab = "F-ratio", ylab = "density")
curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE)
fstat <- summary(lm(score ~ group))$fstatistic[1]
abline(v = fstat, lty = 2, col = "green", lwd = 3)
par(mfrow = c(1,1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment