bwiernik/compute_smd.R

## compute_smd.R
.cmicalc <- function (df) {
  cmi <- ifelse(df <= 1, NA,
                exp(lgamma(df/2) - log(sqrt(df/2)) - lgamma((df - 1)/2))
                )
  return(cmi)
}

smd_stats <-
  function(diff = NULL,
           se_diff = NULL,
           df = NULL,
           m1 = NULL, m2 = NULL,
           n1 = NULL, n2 = NULL,
           sd1 = NULL, sd2 = NULL,
           sigma = NULL,
           mse = NULL,
           t_ncp = NULL,
           R2_cov = 0,
           q = 0,
           method = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"),
           unbiased = FALSE,
           conf.int = TRUE,
           conf.level = 0.95,
           conf_method = c("t", "norm"),
           sd_method = c("unbiased", "ml"),
           std_method = c("unbiased", "ml"),
           se_var_method = c("ls", "unbiased"),
           ...) {
    method <- match.arg(method, choices = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"))

    # Compute degrees of freedom
    if (is.null(df)) {
      if (is.null(n1) & is.null(n2)) {
        stop("One of these must be provided:\n  (1) `df`\n  (2) `n1`, `n2`, `q`")
      } else {
        df <- n1 + n2 - 2 - q
      }
    }
    # TODO: Add Satterwhite degrees of freedom

    # Compute standardizer
    if (all(is.null(mse),
            is.null(sigma),
            any(is.null(sd1),
                is.null(sd2),
                is.null(n1),
                is.null(n2)))
        ) {
      stop("One of these must be provided:\n  (1) `sigma`\n  (2)`mse`\n  (3) `sd1`, `sd2`, `n1`, `n2`.")
    } else if (is.null(mse)) {
      if (!is.null(sigma)) {
        mse <- sigma^2
      } else {
        sd_method <- match.arg(sd_method, choices = c("unbiased", "ml"))
        std_method <- match.arg(std_method, choices = c("unbiased", "ml"))
        if (method == "smd_pooledSD") {
          if (sd_method == "unbiased") {
            ssq1 <-  (n1 - 1) * sd1^2
            ssq2 <-  (n2 - 1) * sd2^2
          } else {
            ssq1 <-  n1 * sd1^2
            ssq2 <-  n2 * sd2^2
          }
          ssq <- ssq2 + ssq2
          mse <-
            ifelse(std_method == "unbiased",
                   ssq / df,
                   ssq / (n1 + n2)
            )
        } else if (method == "smd_controlSD") {
          if (sd_method == "unbiased") {
            ssq <-  (n1 - 1) * sd1^2
          } else {
            ssq <-  n1 * sd1^2
          }
          mse <-
            ifelse(std_method == "unbiased",
                   ssq / df,
                   ssq / (n1 + n2)
            )
        } else {
          if (sd_method == "unbiased") {
            mse <-
              ifelse(std_method == "unbiased",
                     (sd1^2 + sd2^2) / 2,
                     (sd1^2 * (n1 - 1) / n1 + sd2^2 * (n2 - 1) / n2) / 2
            )
          } else {
            mse <-
              ifelse(std_method == "unbiased",
                     (sd1^2 * n1 / (n1 - 1) + sd2^2 * n2 / (n2 - 1)) / 2,
                     (sd1^2 + sd2^2) / 2
              )
          }
        }
      }
    }

    # TODO: Implement large-sample vs unbiased sampling error variance
    if (is.null(diff)) {
      if (is.null(m1) & is.null(m2)) {
        stop("One of these must be provided:\n  (1) `diff`\n  (2) `m1`, `m2`")
      } else {
        diff <- m2 - m1
      }
    }

    # Adjust standardizer for covariates
    A <- (1 - R2_cov)
    mse <- mse / A

    # Compute d
    d <- diff / sqrt(mse)

    # Compute variance of d
    se_var <-
      switch(method,
             smd_pooledSD = A * (n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2)),
             smd_controlSD = A * (1/n1 + (sd2^2 / sd1^2) / n2 + d^2 / (2 * n1)),
             smd_aveSD = A * (d^2 / (8 * mse^2) * (sd1^4 / (n1 - 1) + sd2^4 / (n2 - 1)) + (sd1^2 / mse) / (n1 - 1) + (sd2^2 / mse) / (n2 - 1))
      )
    if (method != "smd_pooledSD" & R2_cov != 0) {
      warning(paste0("Covariate-adjusted 'se_var' is only approximately accurate when `method` is ", method, "."))
    }

    # Small-sample bias correction
    if (unbiased) {
      J <- .cmicalc(df)
    } else {
      J <- 1
    }

    # Compute confidence interval
    if (conf.int) {
      conf_method <- match.arg(conf_method, choices = c("t", "norm"))
      # TODO: Add alternate ci methods from Bonett 2008
      if (method != "smd_pooledSD" & conf_method == "t") {
        # TODO: Derive ncp-based CIs for glass and bonnett d
        conf_method <- "norm"
        message(paste0("`conf_method` == 't' not available when `method` == ", method, ".\n`conf_method` == 'norm' used instead."))
      }
      if (conf_method == "norm") {
        limits <- d + c(-1, 1) * qnorm((1 - conf.level) / 2, lower.tail = FALSE) * sqrt(se_var)
      } else {
        if (is.null(t_ncp)) {
          if (!is.null(se_diff)) {
            t_ncp <- diff / se_diff
          } else {
            # Approximate NCP if neither `t_ncp` nor `se_diff` are given
            n_eff <- (n1 * n2)/((n1 + n2) * A)
            t_ncp <- d * sqrt(n_eff)
          }
        }
        # TODO: Implement conf.limits.nct natively
        limits <- unlist(MBESS::conf.limits.nct(t_ncp, df, conf.level, ...)[c("Lower.Limit", "Upper.Limit")])
        if (!is.null(se_diff)) {
          limits <- limits * se_diff / sqrt(mse)
        } else {
          limits <- limits / sqrt(n_eff)
        }
      }
      names(limits) <- c("lower", "upper")
    }

    # Correct for small-sample bias
    d <- d * J
    se_var <- se_var * J^2
    se <- sqrt(se_var)
    if (conf.int) {
      limits <- limits * J
      out <- list(es = d, df = df, se = se, se_var = se_var, ci.lb = limits['lower'], ci.ub = limits['upper'])
    } else {
      out <- list(es = d, df = df, se = se, se_var = se_var)
    }
    out <- lapply(out, unname)
    class(out) <- c("es_smd", "es", "list")
    attr(out, "es") <- "SMD"
    attr(out, "method") <- method
    attr(out, "unbiased") <- unbiased
    attr(out, "conf.level") <- conf.level
    return(out)
  }

print.es <- function(x, digits = 3, ...) {
  if (attr(x, "es") == "SMD") {
    if (attr(x, "method") == "smd_pooledSD") {
      if (attr(x, "unbiased") == TRUE) {
        cat("Standardized mean difference (Cohen's d)\n")
      } else {
        cat("Standardized mean difference (Hedges' g)\n")
      }
      cat("========================================")
    } else if (attr(x, "method") == "smd_controlSD") {
      if (attr(x, "unbiased") == TRUE) {
        cat("Standardized mean difference (Glass' \u0394 [unbiased])\n")
        cat("=======================================================")
      } else {
        cat("Standardized mean difference (Glass' \u0394)\n")
        cat("============================================")
      }
    } else {
      if (attr(x, "unbiased") == TRUE) {
        cat("Standardized mean difference (Bonett's d [unbiased])\n")
        cat("=======================================================")
      } else {
        cat("Standardized mean difference (Bonett's d)\n")
        cat("============================================")
      }
    }
    cat("\n\n")
    df <- as.data.frame(x)
    print.data.frame(df, digits = digits, row.names = FALSE)
  }
  invisible(x)
}

mod <- lm(mpg ~ factor(cyl, levels = c(4, 6, 8)) + hp, data = mtcars)
r2_cov <- summary(lm(mpg ~ hp, data = mtcars))$r.squared

print.es(
  smd_stats(diff = summary(mod)$coef[2,1],
            se_diff = summary(mod)$coef[2,2],
            t_ncp =  summary(mod)$coef[2,3],
            df = mod$df.residual,
            sigma = summary(mod)$sigma,
            R2_cov = r2_cov,
            n1 = table(model.frame(mod)[,2])[1],
            n2 = table(model.frame(mod)[,2])[2],
            conf.int = TRUE
  )
)

# TODO: Add smd.lm() method to automatically extract factors and compute d values.

# Formula to use with lm output
# d_adj <- t * se_b / sigma * sqrt(1 - R2_cov)
	.cmicalc <- function (df) {
	cmi <- ifelse(df <= 1, NA,
	exp(lgamma(df/2) - log(sqrt(df/2)) - lgamma((df - 1)/2))
	)
	return(cmi)
	}

	smd_stats <-
	function(diff = NULL,
	se_diff = NULL,
	df = NULL,
	m1 = NULL, m2 = NULL,
	n1 = NULL, n2 = NULL,
	sd1 = NULL, sd2 = NULL,
	sigma = NULL,
	mse = NULL,
	t_ncp = NULL,
	R2_cov = 0,
	q = 0,
	method = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"),
	unbiased = FALSE,
	conf.int = TRUE,
	conf.level = 0.95,
	conf_method = c("t", "norm"),
	sd_method = c("unbiased", "ml"),
	std_method = c("unbiased", "ml"),
	se_var_method = c("ls", "unbiased"),
	...) {
	method <- match.arg(method, choices = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"))

	# Compute degrees of freedom
	if (is.null(df)) {
	if (is.null(n1) & is.null(n2)) {
	stop("One of these must be provided:\n (1) `df`\n (2) `n1`, `n2`, `q`")
	} else {
	df <- n1 + n2 - 2 - q
	}
	}
	# TODO: Add Satterwhite degrees of freedom

	# Compute standardizer
	if (all(is.null(mse),
	is.null(sigma),
	any(is.null(sd1),
	is.null(sd2),
	is.null(n1),
	is.null(n2)))
	) {
	stop("One of these must be provided:\n (1) `sigma`\n (2)`mse`\n (3) `sd1`, `sd2`, `n1`, `n2`.")
	} else if (is.null(mse)) {
	if (!is.null(sigma)) {
	mse <- sigma^2
	} else {
	sd_method <- match.arg(sd_method, choices = c("unbiased", "ml"))
	std_method <- match.arg(std_method, choices = c("unbiased", "ml"))
	if (method == "smd_pooledSD") {
	if (sd_method == "unbiased") {
	ssq1 <- (n1 - 1) * sd1^2
	ssq2 <- (n2 - 1) * sd2^2
	} else {
	ssq1 <- n1 * sd1^2
	ssq2 <- n2 * sd2^2
	}
	ssq <- ssq2 + ssq2
	mse <-
	ifelse(std_method == "unbiased",
	ssq / df,
	ssq / (n1 + n2)
	)
	} else if (method == "smd_controlSD") {
	if (sd_method == "unbiased") {
	ssq <- (n1 - 1) * sd1^2
	} else {
	ssq <- n1 * sd1^2
	}
	mse <-
	ifelse(std_method == "unbiased",
	ssq / df,
	ssq / (n1 + n2)
	)
	} else {
	if (sd_method == "unbiased") {
	mse <-
	ifelse(std_method == "unbiased",
	(sd1^2 + sd2^2) / 2,
	(sd1^2 * (n1 - 1) / n1 + sd2^2 * (n2 - 1) / n2) / 2
	)
	} else {
	mse <-
	ifelse(std_method == "unbiased",
	(sd1^2 * n1 / (n1 - 1) + sd2^2 * n2 / (n2 - 1)) / 2,
	(sd1^2 + sd2^2) / 2
	)
	}
	}
	}
	}

	# TODO: Implement large-sample vs unbiased sampling error variance
	if (is.null(diff)) {
	if (is.null(m1) & is.null(m2)) {
	stop("One of these must be provided:\n (1) `diff`\n (2) `m1`, `m2`")
	} else {
	diff <- m2 - m1
	}
	}

	# Adjust standardizer for covariates
	A <- (1 - R2_cov)
	mse <- mse / A

	# Compute d
	d <- diff / sqrt(mse)

	# Compute variance of d
	se_var <-
	switch(method,
	smd_pooledSD = A * (n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2)),
	smd_controlSD = A * (1/n1 + (sd2^2 / sd1^2) / n2 + d^2 / (2 * n1)),
	smd_aveSD = A * (d^2 / (8 * mse^2) * (sd1^4 / (n1 - 1) + sd2^4 / (n2 - 1)) + (sd1^2 / mse) / (n1 - 1) + (sd2^2 / mse) / (n2 - 1))
	)
	if (method != "smd_pooledSD" & R2_cov != 0) {
	warning(paste0("Covariate-adjusted 'se_var' is only approximately accurate when `method` is ", method, "."))
	}

	# Small-sample bias correction
	if (unbiased) {
	J <- .cmicalc(df)
	} else {
	J <- 1
	}

	# Compute confidence interval
	if (conf.int) {
	conf_method <- match.arg(conf_method, choices = c("t", "norm"))
	# TODO: Add alternate ci methods from Bonett 2008
	if (method != "smd_pooledSD" & conf_method == "t") {
	# TODO: Derive ncp-based CIs for glass and bonnett d
	conf_method <- "norm"
	message(paste0("`conf_method` == 't' not available when `method` == ", method, ".\n`conf_method` == 'norm' used instead."))
	}
	if (conf_method == "norm") {
	limits <- d + c(-1, 1) * qnorm((1 - conf.level) / 2, lower.tail = FALSE) * sqrt(se_var)
	} else {
	if (is.null(t_ncp)) {
	if (!is.null(se_diff)) {
	t_ncp <- diff / se_diff
	} else {
	# Approximate NCP if neither `t_ncp` nor `se_diff` are given
	n_eff <- (n1 * n2)/((n1 + n2) * A)
	t_ncp <- d * sqrt(n_eff)
	}
	}
	# TODO: Implement conf.limits.nct natively
	limits <- unlist(MBESS::conf.limits.nct(t_ncp, df, conf.level, ...)[c("Lower.Limit", "Upper.Limit")])
	if (!is.null(se_diff)) {
	limits <- limits * se_diff / sqrt(mse)
	} else {
	limits <- limits / sqrt(n_eff)
	}
	}
	names(limits) <- c("lower", "upper")
	}

	# Correct for small-sample bias
	d <- d * J
	se_var <- se_var * J^2
	se <- sqrt(se_var)
	if (conf.int) {
	limits <- limits * J
	out <- list(es = d, df = df, se = se, se_var = se_var, ci.lb = limits['lower'], ci.ub = limits['upper'])
	} else {
	out <- list(es = d, df = df, se = se, se_var = se_var)
	}
	out <- lapply(out, unname)
	class(out) <- c("es_smd", "es", "list")
	attr(out, "es") <- "SMD"
	attr(out, "method") <- method
	attr(out, "unbiased") <- unbiased
	attr(out, "conf.level") <- conf.level
	return(out)
	}

	print.es <- function(x, digits = 3, ...) {
	if (attr(x, "es") == "SMD") {
	if (attr(x, "method") == "smd_pooledSD") {
	if (attr(x, "unbiased") == TRUE) {
	cat("Standardized mean difference (Cohen's d)\n")
	} else {
	cat("Standardized mean difference (Hedges' g)\n")
	}
	cat("========================================")
	} else if (attr(x, "method") == "smd_controlSD") {
	if (attr(x, "unbiased") == TRUE) {
	cat("Standardized mean difference (Glass' \u0394 [unbiased])\n")
	cat("=======================================================")
	} else {
	cat("Standardized mean difference (Glass' \u0394)\n")
	cat("============================================")
	}
	} else {
	if (attr(x, "unbiased") == TRUE) {
	cat("Standardized mean difference (Bonett's d [unbiased])\n")
	cat("=======================================================")
	} else {
	cat("Standardized mean difference (Bonett's d)\n")
	cat("============================================")
	}
	}
	cat("\n\n")
	df <- as.data.frame(x)
	print.data.frame(df, digits = digits, row.names = FALSE)
	}
	invisible(x)
	}

	mod <- lm(mpg ~ factor(cyl, levels = c(4, 6, 8)) + hp, data = mtcars)
	r2_cov <- summary(lm(mpg ~ hp, data = mtcars))$r.squared

	print.es(
	smd_stats(diff = summary(mod)$coef[2,1],
	se_diff = summary(mod)$coef[2,2],
	t_ncp = summary(mod)$coef[2,3],
	df = mod$df.residual,
	sigma = summary(mod)$sigma,
	R2_cov = r2_cov,
	n1 = table(model.frame(mod)[,2])[1],
	n2 = table(model.frame(mod)[,2])[2],
	conf.int = TRUE
	)
	)

	# TODO: Add smd.lm() method to automatically extract factors and compute d values.

	# Formula to use with lm output
	# d_adj <- t * se_b / sigma * sqrt(1 - R2_cov)