Skip to content

Instantly share code, notes, and snippets.

@munoztd0
Last active October 26, 2021 09:15
Show Gist options
  • Save munoztd0/383aa93ffa161665c0dca1103ef73d9d to your computer and use it in GitHub Desktop.
Save munoztd0/383aa93ffa161665c0dca1103ef73d9d to your computer and use it in GitHub Desktop.
FUNCTIONS FOR COMPUTING COHEN'S d AND PARTIAL ETA-SQUARED ALONG WITH THEIR CONFIDENCE INTERVAL BY YOANN STUSSI
##########################################################################
# #
# FUNCTION FOR COMPUTING COHEN'S d & HEDGES' g #
# ALONG WITH THEIR CONFIDENCE INTERVAL #
# FOR INDEPENDENT, ONE-SAMPLE, OR PAIRED T TESTS #
# #
##########################################################################
# LAST UPDATED ON: 18.09.19 by YS
# Based on:
# Lakens, D. (2013). Calculating and reporting effect sizes to facilitate cumulative science: A practical primer for t-tests and ANOVAs. Frontiers in Psychology, 4, 863. https://doi.org/10.3389/fpsyg.2013.00863
# Lakens, D. (2015). The perfect t-test. Retrieved from https://Neutralthub.com/Lakens/perfect-t-test. https://doi.org/10.5281/zenodo.17603
#---------------------------------------------------------------------------------------------------------------------------#
# INPUTS:
# x = data vector in wide format for the first condition
# y = data vector in wide format for the second condition [if missing -> one-sample t test]
# mu = (for one-sample t tests) reference mean [default -> 0]
# paired = TRUE -> paired t test, FALSE -> indepedent-sample t test
# var.equal = TRUE -> two variances treated as equal, FALSE [default] -> two variances treated as unequal (Welch's t test)
# conf.level = level of the confidence interval between 0 to 1 [default -> .95]
#---------------------------------------------------------------------------------------------------------------------------#
cohen_d_ci <- function(x, y, mu, paired, var.equal, conf.level)
{
#----INPUTS
# y input
ifelse(missing(y), y <- NA, y <- y)
# mu input
ifelse(missing(mu), mu <- 0, mu <- mu)
# paired input
ifelse(missing(paired), ifelse(length(x) != length(y), paired <- F, paired <- paired), paired <- paired)
# var.equal input
ifelse(missing(var.equal), var.equal <- F, var.equal <- var.equal)
# conf.level input
ifelse(missing(conf.level), conf.level <- .95, conf.level <- conf.level)
#---------------------------------------------------------------------------------------------------------#
#----INDEPENDENT-SAMPLE T TEST
if (is.na(y) == F && paired == F) {
# output from t test
ttest.output <- t.test(x = x, y = y, paired = F, var.equal = var.equal, conf.level = conf.level)
# effect size (Cohen's d_s)
m.x <- mean(x)
m.y <- mean(y)
sd.x <- sd(x)
sd.y <- sd(y)
n.x <- length(x)
n.y <- length(y)
tvalue <- ttest.output$statistic
d_s <- (m.x - m.y)/(sqrt(((n.x - 1) * sd.x^2 + (n.y - 1) * sd.y^2)/(n.x + n.y - 2)))
g_s <- d_s * (1 - 3/(4 * (n.x + n.y - 2) - 1))
require(MBESS)
ci_lower_d_s <- ci.smd(ncp = tvalue,
n.1 = n.x,
n.2 = n.y,
conf.level = conf.level)$Lower.Conf.Limit.smd
ci_upper_d_s <- ci.smd(ncp = tvalue,
n.1 = n.x,
n.2 = n.y,
conf.level = conf.level)$Upper.Conf.Limit.smd
# SUMMARY TABLE
table.effectsize <- matrix(c(d_s, g_s, ci_lower_d_s, ci_upper_d_s),
ncol = 4, byrow = F)
colnames(table.effectsize) <- c("Cohen's d_s", "Hedges' g_s",
paste(conf.level*100, "% CI lower bound"),
paste(conf.level*100, "% CI upper bound"))
rownames(table.effectsize) <- "Effect size"
print(table.effectsize)
# OUTPUT
output <- list(d_s = d_s, g_s = g_s, ci_lower_d_s = ci_lower_d_s, ci_upper_d_s = ci_upper_d_s)
return(invisible(output))
}
#---------------------------------------------------------------------------------------------------------#
#----PAIRED-SAMPLE T TEST
if (is.na(y) == F && paired == T) {
# output from t test
ttest.output <- t.test(x = x, y = y, paired = T, conf.level = conf.level)
# effect size
diff <- x - y
m_diff <- mean(diff)
sd_diff <- sd(diff)
sd.x <- sd(x)
sd.y <- sd(y)
N <- length(x)
r <- cor(x, y)
tvalue <- ttest.output$statistic
# Cohen's d_z
d_z <- tvalue/sqrt(N)
g_z <- d_z * (1 - 3/(4 * (N - 1) - 1))
require(MBESS)
nct_limits <- conf.limits.nct(t.value = tvalue, df = N - 1, conf.level = conf.level)
ci_lower_d_z <- nct_limits$Lower.Limit/sqrt(N)
ci_upper_d_z <- nct_limits$Upper.Limit/sqrt(N)
# Cohen's d_rm
s_rm <- sqrt(sd.x^2 + sd.y^2 - 2 * r * sd.x * sd.y) / sqrt(2 * (1 - r))
d_rm <- m_diff/s_rm
g_rm <- d_rm * (1 - 3/(4 * (N - 1) - 1))
ci_lower_d_rm <- nct_limits$Lower.Limit * sqrt((2 * (sd.x^2 + sd.y^2 - 2 * (r * sd.x * sd.y)))/(N * (sd.x^2 + sd.y^2)))
ci_upper_d_rm <- nct_limits$Upper.Limit * sqrt((2 * (sd.x^2 + sd.y^2 - 2 * (r * sd.x * sd.y)))/(N * (sd.x^2 + sd.y^2)))
# Cohen's d_av
s_av <- sqrt((sd.x^2 + sd.y^2)/2)
d_av <- m_diff/s_av
g_av <- d_av * (1 - (3 / (4 * (N - 1) - 1)))
ci_lower_d_av <- nct_limits$Lower.Limit * sd_diff/(s_av * sqrt(N))
ci_upper_d_av <- nct_limits$Upper.Limit * sd_diff/(s_av * sqrt(N))
# SUMMARY TABLE
table.effectsize <- matrix(c(d_z, g_z, ci_lower_d_z, ci_upper_d_z,
d_rm, g_rm, ci_lower_d_rm, ci_upper_d_rm,
d_av, g_av, ci_lower_d_av, ci_upper_d_av),
ncol = 4, byrow = T)
colnames(table.effectsize) <- c("Cohen's d", "Hedges' g",
paste(conf.level*100, "% CI lower bound"),
paste(conf.level*100, "% CI upper bound"))
rownames(table.effectsize) <- c("d_z", "d_rm", "d_av")
print(table.effectsize)
# OUTPUT
output <- list(d_z = d_z, g_z = g_z, ci_lower_d_z = ci_lower_d_z, ci_upper_d_z = ci_upper_d_z,
d_rm = d_rm, g_rm = g_rm, ci_lower_d_rm = ci_lower_d_rm, ci_upper_d_rm = ci_upper_d_rm,
d_av = d_av, g_av = g_av, ci_lower_d_av = ci_lower_d_av, ci_upper_d_av = ci_upper_d_av)
return(invisible(output))
}
#---------------------------------------------------------------------------------------------------------#
#----ONE-SAMPLE T TEST
if (is.na(y) == T) {
# output from t test
ttest.output <- t.test(x = x, mu = mu, conf.level = conf.level)
# effect size
diff <- x - mu
m_diff <- mean(diff)
sd_diff <- sd(diff)
sd.x <- sd(x)
N <- length(x)
tvalue <- ttest.output$statistic
# Cohen's d_z
d_z <- tvalue/sqrt(N)
g_z <- d_z * (1 - 3/(4 * (N - 1) - 1))
require(MBESS)
nct_limits <- conf.limits.nct(t.value = tvalue, df = N - 1, conf.level = conf.level)
ci_lower_d_z <- nct_limits$Lower.Limit/sqrt(N)
ci_upper_d_z <- nct_limits$Upper.Limit/sqrt(N)
# # ALTERNATIVE METHOD (gives the exact same results)
# d_z <- m_diff/sd.x
# g_z <- d_z * (1 - (3 / (4 * (N - 1) - 1)))
#
# ci_lower_d_z <- nct_limits$Lower.Limit * sd_diff/(sd.x * sqrt(N))
# ci_upper_d_z <- nct_limits$Upper.Limit * sd_diff/(sd.x * sqrt(N))
# SUMMARY TABLE
table.effectsize <- matrix(c(d_z, g_z, ci_lower_d_z, ci_upper_d_z),
ncol = 4, byrow = T)
colnames(table.effectsize) <- c("Cohen's d", "Hedges' g",
paste(conf.level*100, "% CI lower bound"),
paste(conf.level*100, "% CI upper bound"))
rownames(table.effectsize) <- c("d_z")
print(table.effectsize)
# OUTPUT
output <- list(d_z = d_z, g_z = g_z, ci_lower_d_z = ci_lower_d_z, ci_upper_d_z = ci_upper_d_z)
return(invisible(output))
}
}
##########################################################################
# #
# FUNCTION FOR COMPUTING PARTIAL ETA-SQUARED #
# ALONG WITH THEIR CONFIDENCE INTERVAL #
# FOR ANALYSES OF VARIANCE (ANOVAs) #
# #
##########################################################################
# FROM Yoann STUSSI , LAST UPDATED ON: 27.03.20 by David MUNOZ TORD
# Based on:
# Lakens, D. (2013). Calculating and reporting effect sizes to facilitate cumulative science: A practical primer for t-tests and ANOVAs.
# Frontiers in Psychology, 4, 863. https://doi.org/10.3389/fpsyg.2013.00863
# http://daniellakens.blogspot.com/2014/06/calculating-confidence-intervals-for.html
#------------------------------------------------------------------------------------------------------------------------#
# ARGUMENTS:
# formula = formula specifying the model using the aov format (e.g., DV ~ IVb * IVw1 * IVw2 + Error(id/(IVw1 * IVw2)))
# data = a data frame in which the variables specified in the formula will be found
# conf.level = level of the confidence interval between 0 to 1 [default -> .90]
# epsilon = epsilon correction used to adjust the lack of sphericity for factors involving repeated measures among
# list("default", "none, "GG", "HF")
# - default: use Greenhouse-Geisser (GG) correction when epsilon GG < .75, Huynh-Feldt (HF) correction
# when epsilon GG = [.75, .90], and no correction when epsilon GG >= .90 [default]
# - none: use no sphericity correction
# - GG: use Greenhouse-Geisser (GG) correction for all factors (>2 levels) involving repeated measures
# - HF: use Huynh-Feldt (HF) correction for all factors (>2 levels) involving repeated measures
# anova.type = type of sum of squares used in list("II", "III", 2, 3)
# - "II" or 2: use type II sum of squares (hierarchical or partially sequential)
# - "III" or 3: use type III sum of squares (marginal or orthogonal) [default]
# added observed and factorize arguments
#------------------------------------------------------------------------------------------------------------------------#
pes_ci <- function(formula, data, conf.level, epsilon, anova.type, observed=NULL, factorize=afex_options("factorize"))
{
#----------------------------------------------------------------------------------------------#
#----ARGUMENTS
# conf.level
ifelse(missing(conf.level), conf.level <- .90, conf.level <- conf.level)
# epsilon
ifelse(missing(epsilon), epsilon <- "default", epsilon <- epsilon)
iscorrectionok <- epsilon == list("default", "none", "GG", "HF")
if(all(iscorrectionok == F)) stop('Unknown correction for sphericity used. Please use (i) the default option ("default"; i.e., no correction applied when epsilon GG >= .90,
GG correction applied when GG epsilon < .75, and HF correction applied when GG epsilon = [.75, .90]),
(ii) no correction ("none"), (iii) Greenhous-Geisser ("GG") correction, or (iv) Huyhn-Feldt ("HF") correction.')
# anova.type
ifelse(missing(anova.type), anova.type <- "III", anova.type <- anova.type)
#----------------------------------------------------------------------------------------------#
#----COMPUTE ETA-SQUARED EFFECT SIZE ESTIMATES
# afex package for computing eta-squared estimates
require(afex)
# compute anova with partial (pes) eta-squared estimates
aov <- aov_car(formula = formula,
data = data,
anova_table = list(es = "pes"),
type = anova.type,
observed = observed,
factorize = factorize)
aov.sum <- summary(aov)
#----CREATE MATRIX TO STORE RESULTS
matrix.es <- matrix(nrow = length(rownames(aov$anova_table)), ncol = 3)
#----------------------------------------------------------------------------------------------#
#----COMPUTE CONFIDENCE INTERVAL FOR EACH FACTOR
# MBESS package for computing confidence intervals
require(MBESS)
for (i in 1:length(rownames(aov$anova_table))) {
# CHECK
if (length(aov.sum$pval.adjustments) != 0) {
#--------------------------------------------------------------------------------------------#
# DEFAULT OPTION
# (no correction if GG epsilon >= .90, GG correction if GG epsilon < .75, or HF correction if GG epsilon = [.75, .90])
if (epsilon == "default") {
# Search for factors with more than 2 levels
for (j in 1:length(rownames(aov.sum$pval.adjustments))) {
if (rownames(aov.sum$pval.adjustments)[j] == rownames(aov$anova_table)[i]) {
# Apply GG correction if GG epsilon < .75
if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && aov.sum$pval.adjustments[j, "GG eps"] < .75) {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "GG eps"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "GG eps"])
break
}
# Apply HF correction if GG epsilon = [.75, .90]
else if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && aov.sum$pval.adjustments[j, "GG eps"] >= .75 && aov.sum$pval.adjustments[j, "GG eps"] < .90) {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "HF eps"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "HF eps"])
break
}
# Apply no correction if GG epsilon >= .90
else if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && aov.sum$pval.adjustments[j, "GG eps"] >= .90) {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"])
break
}
} else {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"])
}
}
}
#--------------------------------------------------------------------------------------------#
# SPHERICITY ASSUMED
if (epsilon == "none") {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"])
}
#--------------------------------------------------------------------------------------------#
# GREENHOUSE-GEISSER (GG) CORRECTION
else if (epsilon == "GG") {
# Search for factors with more than 2 levels
for (j in 1:length(rownames(aov.sum$pval.adjustments))) {
if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && rownames(aov.sum$pval.adjustments)[j] == rownames(aov$anova_table)[i]) {
# Apply GG correction
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "GG eps"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "GG eps"])
break
} else {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"])
}
}
}
#--------------------------------------------------------------------------------------------#
# HUYNH-FELDT (HF) CORRECTION
else if (epsilon == "HF") {
# Search for factors with more than 2 levels
for (j in 1:length(rownames(aov.sum$pval.adjustments))) {
if (is.na(aov.sum$pval.adjustments[j, "GG eps"]) == F && rownames(aov.sum$pval.adjustments)[j] == rownames(aov$anova_table)[i]) {
# Apply HF correction
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"] * aov.sum$pval.adjustments[j, "HF eps"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"] * aov.sum$pval.adjustments[j, "HF eps"])
break
} else {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$univariate.tests[i + 1, "F value"],
conf.level = conf.level,
df.1 <- aov.sum$univariate.tests[i + 1, "num Df"],
df.2 <- aov.sum$univariate.tests[i + 1, "den Df"])
}
}
}
} else {
aov.sum.lim <- conf.limits.ncf(F.value = aov.sum$F[i], conf.level = .90,
df.1 <- aov.sum$"num Df"[i],
df.2 <- aov.sum$"den Df"[i])
}
#--------------------------------------------------------------------------------------------#
# LOWER LIMIT
aov.sum.lower_lim <- ifelse(is.na(aov.sum.lim$Lower.Limit/(aov.sum.lim$Lower.Limit + df.1 + df.2 + 1)),
0,
aov.sum.lim$Lower.Limit/(aov.sum.lim$Lower.Limit + df.1 + df.2 + 1))
# UPPER LIMIT
aov.sum.upper_lim <- aov.sum.lim$Upper.Limit/(aov.sum.lim$Upper.Limit + df.1 + df.2 + 1)
#--------------------------------------------------------------------------------------------#
#----STORE RESULTS IN THE MATRIX
matrix.es[i,] <- matrix(c(aov$anova_table$pes[i],
aov.sum.lower_lim,
aov.sum.upper_lim),
ncol = 3)
}
#----------------------------------------------------------------------------------------------#
#----OUTPUT MATRIX WITH PARTIAL ETA-SQUARED EFFECT SIZE ESTIMATES AND THEIR CONFIDENCE INTERVAL
# Rename rows and columns
rownames(matrix.es) <- rownames(aov$anova_table)
colnames(matrix.es) <- c("Partial eta-squared",
paste(conf.level * 100, "% CI lower limit"),
paste(conf.level * 100, "% CI upper limit"))
# Output
return(matrix.es)
}
se <- function (x,na.rm=TRUE) {
if (!is.vector(x)) STOP("'x' must be a vector.")
if (!is.numeric(x)) STOP("'x' must be numeric.")
if (na.rm) x <- x[stats::complete.cases(x)]
sqrt(stats::var(x)/length(x))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment