gjkerns/visualizeANOVA.R Secret

## visualizeANOVA.R
library(manipulate)  # need RStudio for this version

# see other version below without sliders and/or no RStudio
# otherwise, should be copy-paste operation
# to restart, only need to re-run manipulate() command

anovaplot <- function(n,u,v,w){
  par(mfrow = c(2,2))
  # generate data
  score <- rnorm(n*3, mean = c(u,v,w))
  group <- factor(rep(LETTERS[1:3], n))
  xbot <- min(c(u,v,w)) - 3
  xtop <- max(c(u,v,w)) + 3
  plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45),
       type="n", xlab="score", ylab="density")
  f <- function(x){dnorm(x, mean = u)}
  curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red")
  f <- function(x){dnorm(x, mean = v)}
  curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green")
  f <- function(x){dnorm(x, mean = w)}
  curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue")
  rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE)
  rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE)
  rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE)
  # between-within plot
  B <- anova(lm(score ~ group))$"Mean Sq"
  names(B) <- c("between", "within")
  barplot(B, main = "Between vs. within")
  # stripcharts
  stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"),
             ylab = "group", pch = 1, main = "The data")
  # f distn plots
  means <- c(u,v,w)
  cntr <- means - mean(means)
  lambda <- n*sum(cntr^2)
  f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1))
  g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
  uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
  curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns",
        xlab = "F-ratio", ylab = "density")
  curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE)
  fstat <- summary(lm(score ~ group))$fstatistic[1]
  abline(v = fstat, lty = 2, col = "green", lwd = 3)
  par(mfrow = c(1,1))
}

# below is all that's needed to restart the plot

manipulate(anovaplot(n,u,v,w),
  n = slider(4, 40, step = 2, initial = 4,   label = "Sample size per group"),
  u = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group A"),
  v = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group B"),
  w = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group C")
)

## visualizeANOVAnoSliders.R
# this one is for *not* with RStudio, and/or for finer control
# see the other one for (n,u,v,w) as sliders (requires RStudio)

# this one should be basically a copy/paste operation
# preface with set.seed(something) to manage randomness


  n <- 20    # Sample size per group
  u <- -1    # Mean for group A
  v <-  0    # Mean for group B
  w <-  1    # Mean for group C


  par(mfrow = c(2,2))
  # generate data
  score <- rnorm(n*3, mean = c(u,v,w))
  group <- factor(rep(LETTERS[1:3], n))
  xbot <- min(c(u,v,w)) - 3
  xtop <- max(c(u,v,w)) + 3
  plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45),
       type="n", xlab="score", ylab="density")
  f <- function(x){dnorm(x, mean = u)}
  curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red")
  f <- function(x){dnorm(x, mean = v)}
  curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green")
  f <- function(x){dnorm(x, mean = w)}
  curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue")
  rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE)
  rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE)
  rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE)
  # between-within plot
  B <- anova(lm(score ~ group))$"Mean Sq"
  names(B) <- c("between", "within")
  barplot(B, main = "Between vs. within")
  # stripcharts
  stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"),
             ylab = "group", pch = 1, main = "The data")
  # f distn plots
  means <- c(u,v,w)
  cntr <- means - mean(means)
  lambda <- n*sum(cntr^2)
  f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1))
  g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
  uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
  curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns",
        xlab = "F-ratio", ylab = "density")
  curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE)
  fstat <- summary(lm(score ~ group))$fstatistic[1]
  abline(v = fstat, lty = 2, col = "green", lwd = 3)
  par(mfrow = c(1,1))
	library(manipulate) # need RStudio for this version

	# see other version below without sliders and/or no RStudio
	# otherwise, should be copy-paste operation
	# to restart, only need to re-run manipulate() command

	anovaplot <- function(n,u,v,w){
	par(mfrow = c(2,2))
	# generate data
	score <- rnorm(n*3, mean = c(u,v,w))
	group <- factor(rep(LETTERS[1:3], n))
	xbot <- min(c(u,v,w)) - 3
	xtop <- max(c(u,v,w)) + 3
	plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45),
	type="n", xlab="score", ylab="density")
	f <- function(x){dnorm(x, mean = u)}
	curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red")
	f <- function(x){dnorm(x, mean = v)}
	curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green")
	f <- function(x){dnorm(x, mean = w)}
	curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue")
	rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE)
	rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE)
	rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE)
	# between-within plot
	B <- anova(lm(score ~ group))$"Mean Sq"
	names(B) <- c("between", "within")
	barplot(B, main = "Between vs. within")
	# stripcharts
	stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"),
	ylab = "group", pch = 1, main = "The data")
	# f distn plots
	means <- c(u,v,w)
	cntr <- means - mean(means)
	lambda <- n*sum(cntr^2)
	f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1))
	g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
	uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
	curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns",
	xlab = "F-ratio", ylab = "density")
	curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE)
	fstat <- summary(lm(score ~ group))$fstatistic[1]
	abline(v = fstat, lty = 2, col = "green", lwd = 3)
	par(mfrow = c(1,1))
	}

	# below is all that's needed to restart the plot

	manipulate(anovaplot(n,u,v,w),
	n = slider(4, 40, step = 2, initial = 4, label = "Sample size per group"),
	u = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group A"),
	v = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group B"),
	w = slider(-1, 1, step = 0.1, initial = 0, label = "Mean for group C")
	)
	# this one is for not with RStudio, and/or for finer control
	# see the other one for (n,u,v,w) as sliders (requires RStudio)

	# this one should be basically a copy/paste operation
	# preface with set.seed(something) to manage randomness


	n <- 20 # Sample size per group
	u <- -1 # Mean for group A
	v <- 0 # Mean for group B
	w <- 1 # Mean for group C


	par(mfrow = c(2,2))
	# generate data
	score <- rnorm(n*3, mean = c(u,v,w))
	group <- factor(rep(LETTERS[1:3], n))
	xbot <- min(c(u,v,w)) - 3
	xtop <- max(c(u,v,w)) + 3
	plot(1, main = "Population dist'ns", xlim = c(xbot,xtop), ylim = c(0,0.45),
	type="n", xlab="score", ylab="density")
	f <- function(x){dnorm(x, mean = u)}
	curve(f, from = u - 3, to = u + 3, add = TRUE, lwd = 2, col = "red")
	f <- function(x){dnorm(x, mean = v)}
	curve(f, from = v - 3, to = v + 3, add = TRUE, lwd = 2, col = "green")
	f <- function(x){dnorm(x, mean = w)}
	curve(f, from = w - 3, to = w + 3, add = TRUE, lwd = 2, col = "blue")
	rug(score[group == "A"], col = "red", lwd = 2, ticksize = 0.07, quiet = TRUE)
	rug(score[group == "B"], col = "green", lwd = 2, ticksize = 0.07, quiet = TRUE)
	rug(score[group == "C"], col = "blue", lwd = 2, ticksize = 0.07, quiet = TRUE)
	# between-within plot
	B <- anova(lm(score ~ group))$"Mean Sq"
	names(B) <- c("between", "within")
	barplot(B, main = "Between vs. within")
	# stripcharts
	stripchart(score ~ group, xlim = c(xbot,xtop), col = c("red", "green", "blue"),
	ylab = "group", pch = 1, main = "The data")
	# f distn plots
	means <- c(u,v,w)
	cntr <- means - mean(means)
	lambda <- n*sum(cntr^2)
	f <- function(x) df(x, df1 = 2, df2 = 3*(n - 1))
	g <- function(x) df(x, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
	uplim <- qf(0.975, df1 = 2, df2 = 3*(n - 1), ncp = lambda)
	curve(f, from = 0, to = uplim, lwd = 3, main = "(Non)central F dist'ns",
	xlab = "F-ratio", ylab = "density")
	curve(g, from = 0, to = uplim, lwd = 3, col = "red", add = TRUE)
	fstat <- summary(lm(score ~ group))$fstatistic[1]
	abline(v = fstat, lty = 2, col = "green", lwd = 3)
	par(mfrow = c(1,1))