timothyslau/ANCOVAadjmean

## ANCOVAadjmean

# My attempt to make Cohen's example into a formula (the correct calculations for the example in the textbook start on line 40)

adjmeans <- function(IV, DV, CV, data){
  ano1 <- lm(formula = getElement(object = data, name = DV) ~ getElement(object = data, name = CV))
  ano2 <- lm(formula = getElement(object = data, name = IV) ~ getElement(object = data, name = CV))

  SSYer <- anova(ano1)$"Sum Sq"[2]
  SSXer <- anova(ano2)$"Sum Sq"[2]

  scpw <- sum(sapply(X = levels(data$CV), FUN = function(condition){
    n <- length(data[data$condition == condition, IV]) - 1
    sdx <- sd(data[data$condition == condition, IV])
    sdy <- sd(data[data$condition == condition, DV])
    r <- cor(data[data$condition == condition, DV], data[data$condition == condition, IV])
    n * sdx * sdy * r}))

  r_pooled <- scpw / sqrt(SSXer * SSYer)

  bp <- r_pooled * sqrt(SSYer / SSXer)

  # adusted means
  return(sapply(X = levels(data$CV), FUN = function(condition){mean(data[data$condition == condition, DV]) - bp * (mean(data[data$condition == condition, "initial"]) - mean(data$initial))}))
} # made to work with a model with a linear model; 1 continuous outcome, 1 continuous predictor, 1 categorical predictor (treatment, control variable)
adjmeans(IV = "initial", DV = "final", CV = "condition", data = dat)


# data from p.645 Explaining Psychological Statistics
dat <- data.frame(initial = c(1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 2, 2, 3, 3, 4, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7), final = c(3, 4, 3, 5, 7, 7, 6, 9, 6, 8, 2, 6, 5, 8, 6, 9, 8, 6, 2, 4, 3, 5, 3, 7, 5, 7, 4, 9, 8, 6), condition = factor(x = c(rep(x = "psycho", times = 10), rep(x = "behav", times = 8), rep(x = "cont", times = 12)), levels = c("cont", "behav", "psycho")))

# the computations to verify my numbers
ano1 <- lm(formula = final ~ condition, data = dat)
ano2 <- lm(formula = initial ~ condition, data = dat)
anova(ano1)
anova(ano2)

SSYer <- anova(ano1)$"Sum Sq"[2]
SSXer <- anova(ano2)$"Sum Sq"[2]
sum(anova(ano)$"Sum Sq") * (1 - cor(dat$initial, dat$final)^2) # SSAtotal p. 646

scpw <- sum(sapply(X = levels(dat$condition), FUN = function(condition, x = "initial", y = "final"){{
  n <- length(dat[dat$condition == condition, x]) - 1
  sdx <- sd(dat[dat$condition == condition, x])
  sdy <- sd(dat[dat$condition == condition, y])
  r <- cor(dat[dat$condition == condition, y], dat[dat$condition == condition, x])
  n * sdx * sdy * r}}))

r_pooled <- scpw / sqrt(SSXer * SSYer)

bp <- r_pooled * sqrt(SSYer / SSXer)

sapply(X = levels(dat$condition), FUN = function(condition){mean(dat[dat$condition == condition, "final"]) - bp * (mean(dat[dat$condition == condition, "initial"]) - mean(dat$initial))})

	# My attempt to make Cohen's example into a formula (the correct calculations for the example in the textbook start on line 40)

	adjmeans <- function(IV, DV, CV, data){
	ano1 <- lm(formula = getElement(object = data, name = DV) ~ getElement(object = data, name = CV))
	ano2 <- lm(formula = getElement(object = data, name = IV) ~ getElement(object = data, name = CV))

	SSYer <- anova(ano1)$"Sum Sq"[2]
	SSXer <- anova(ano2)$"Sum Sq"[2]

	scpw <- sum(sapply(X = levels(data$CV), FUN = function(condition){
	n <- length(data[data$condition == condition, IV]) - 1
	sdx <- sd(data[data$condition == condition, IV])
	sdy <- sd(data[data$condition == condition, DV])
	r <- cor(data[data$condition == condition, DV], data[data$condition == condition, IV])
	n * sdx * sdy * r}))

	r_pooled <- scpw / sqrt(SSXer * SSYer)

	bp <- r_pooled * sqrt(SSYer / SSXer)

	# adusted means
	return(sapply(X = levels(data$CV), FUN = function(condition){mean(data[data$condition == condition, DV]) - bp * (mean(data[data$condition == condition, "initial"]) - mean(data$initial))}))
	} # made to work with a model with a linear model; 1 continuous outcome, 1 continuous predictor, 1 categorical predictor (treatment, control variable)
	adjmeans(IV = "initial", DV = "final", CV = "condition", data = dat)














	# data from p.645 Explaining Psychological Statistics
	dat <- data.frame(initial = c(1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 1, 2, 2, 3, 3, 4, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7), final = c(3, 4, 3, 5, 7, 7, 6, 9, 6, 8, 2, 6, 5, 8, 6, 9, 8, 6, 2, 4, 3, 5, 3, 7, 5, 7, 4, 9, 8, 6), condition = factor(x = c(rep(x = "psycho", times = 10), rep(x = "behav", times = 8), rep(x = "cont", times = 12)), levels = c("cont", "behav", "psycho")))

	# the computations to verify my numbers
	ano1 <- lm(formula = final ~ condition, data = dat)
	ano2 <- lm(formula = initial ~ condition, data = dat)
	anova(ano1)
	anova(ano2)

	SSYer <- anova(ano1)$"Sum Sq"[2]
	SSXer <- anova(ano2)$"Sum Sq"[2]
	sum(anova(ano)$"Sum Sq") * (1 - cor(dat$initial, dat$final)^2) # SSAtotal p. 646

	scpw <- sum(sapply(X = levels(dat$condition), FUN = function(condition, x = "initial", y = "final"){{
	n <- length(dat[dat$condition == condition, x]) - 1
	sdx <- sd(dat[dat$condition == condition, x])
	sdy <- sd(dat[dat$condition == condition, y])
	r <- cor(dat[dat$condition == condition, y], dat[dat$condition == condition, x])
	n * sdx * sdy * r}}))

	r_pooled <- scpw / sqrt(SSXer * SSYer)

	bp <- r_pooled * sqrt(SSYer / SSXer)

	sapply(X = levels(dat$condition), FUN = function(condition){mean(dat[dat$condition == condition, "final"]) - bp * (mean(dat[dat$condition == condition, "initial"]) - mean(dat$initial))})