Visualize standard balanced ANOVA
library(manipulate) # need RStudio for this version | |
# see other version below without sliders and/or no RStudio | |
# otherwise, should be copy-paste operation | |
# to restart, only need to re-run manipulate() command | |
anovaplot <- function(n,u,v,w){ | |
par(mfrow = c(2,2)) | |
# generate data | |
score <- rnorm(n*3, mean = c(u,v,w)) | |
group <- factor(rep(LETTERS[1:3], n)) | |
xbot <- min(c(u,v,w)) - 3 | |
xtop <- max(c(u,v,w)) + 3 | |
plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45), | |
type="n", xlab="score", ylab="density") | |
f <- function(x){dnorm(x, mean = u)} | |
curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red") | |
f <- function(x){dnorm(x, mean = v)} | |
curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green") | |
f <- function(x){dnorm(x, mean = w)} | |
curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue") | |
rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE) | |
rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE) | |
rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE) | |
# between-within plot | |
B <- anova(lm(score ~ group))$"Mean Sq" | |
names(B) <- c("between", "within") | |
barplot(B, main = "Between vs. within") | |
# stripcharts | |
stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"), | |
ylab = "group", pch = 1, main = "The data") | |
# f distn plots | |
means <- c(u,v,w) | |
cntr <- means - mean(means) | |
lambda <- n*sum(cntr^2) | |
f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1)) | |
g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda) | |
uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda) | |
curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns", | |
xlab = "F-ratio", ylab = "density") | |
curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE) | |
fstat <- summary(lm(score ~ group))$fstatistic[1] | |
abline(v = fstat, lty = 2, col = "green", lwd = 3) | |
par(mfrow = c(1,1)) | |
} | |
# below is all that's needed to restart the plot | |
manipulate(anovaplot(n,u,v,w), | |
n = slider(4, 40, step = 2, initial = 4, label = "Sample size per group"), | |
u = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group A"), | |
v = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group B"), | |
w = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group C") | |
) |
# this one is for *not* with RStudio, and/or for finer control | |
# see the other one for (n,u,v,w) as sliders (requires RStudio) | |
# this one should be basically a copy/paste operation | |
# preface with set.seed(something) to manage randomness | |
n <- 20 # Sample size per group | |
u <- -1 # Mean for group A | |
v <- 0 # Mean for group B | |
w <- 1 # Mean for group C | |
par(mfrow = c(2,2)) | |
# generate data | |
score <- rnorm(n*3, mean = c(u,v,w)) | |
group <- factor(rep(LETTERS[1:3], n)) | |
xbot <- min(c(u,v,w)) - 3 | |
xtop <- max(c(u,v,w)) + 3 | |
plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45), | |
type="n", xlab="score", ylab="density") | |
f <- function(x){dnorm(x, mean = u)} | |
curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red") | |
f <- function(x){dnorm(x, mean = v)} | |
curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green") | |
f <- function(x){dnorm(x, mean = w)} | |
curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue") | |
rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE) | |
rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE) | |
rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE) | |
# between-within plot | |
B <- anova(lm(score ~ group))$"Mean Sq" | |
names(B) <- c("between", "within") | |
barplot(B, main = "Between vs. within") | |
# stripcharts | |
stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"), | |
ylab = "group", pch = 1, main = "The data") | |
# f distn plots | |
means <- c(u,v,w) | |
cntr <- means - mean(means) | |
lambda <- n*sum(cntr^2) | |
f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1)) | |
g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda) | |
uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda) | |
curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns", | |
xlab = "F-ratio", ylab = "density") | |
curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE) | |
fstat <- summary(lm(score ~ group))$fstatistic[1] | |
abline(v = fstat, lty = 2, col = "green", lwd = 3) | |
par(mfrow = c(1,1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment