crsh/repeated_measures_correlation.R

## repeated_measures_correlation.R
# x       data.frame. Aggregated data for the factorial design, e.g., from aggregate(measure ~ subject * factor1 * factor2, data, mean).
# measure Character. Name of the measure.
# formula Formula. A formula specifying the factor (combination) for which to calculate the correlation, e.g., `subject ~ factor1` for a
#                   main effect or `subject ~ factor1 + factor2` (yes, it needs to be a "+") for an interaction. Note that G*Power can be
#                   used to perform power analyses for up to two repeated measures factors as long as one of them has only two levels.
#                   To do so, enter the larger number of factor levels into the field "Number of measurements" and multiply the effect
#                   size f by √p, where p is the number of levels of the other factor). If both factor have more than two levels G*Power
#                   will underestimate the required sample size! (See http://goo.gl/RgciM4 [German] for details)
# type    Character. Specifies the estimation approach. "min" corresponds to smallest observed correlation among repeated measures,
#                   "mean" corresponds to the average observed correlation.
# level   Numeric. Defines the width of the interval if confidence intervals are plotted. Defaults to 0.95 for 95% confidence intervals.

repeated_measures_correlation <- function(x, measure, formula, type = "mean", level = 0.95) {
  require("reshape2")

  # Define helper functions
  fisher_z <- function(r) {
    return(0.5 * log((1 + r) / (1 - r)))
  }

  inv_fisher_z <- function(z) {
    return((exp(2 * z) - 1) / (exp(2 * z) + 1))
  }

  r_ci <- function (r, n, level = 0.95)  {
    alpha <- 1 - level
    z <- fisher_z(r)
    se_z <- 1 / sqrt(n - 3)
    z_critical <- -qnorm(alpha / 2)
    z_ci <- z + c(-1, 1) * (z_critical * se_z)
    r_ci <- matrix(inv_fisher_z(z_ci), ncol = 2, byrow = TRUE)
    attr(r_ci, "conf.level") <- level

    return(r_ci)
  }

  # Restructure data
  melt_data <- melt(x, measure.vars = measure, na.rm = FALSE)
  wide_data <- dcast(melt_data, formula = formula, mean, value.var = "value")
  n_data <- nrow(wide_data)
  wide_data <- wide_data[complete.cases(wide_data), ]
  if(nrow(wide_data) < n_data) warning("Dropped ", nrow(wide_data) < n_data, " incomplete cases")

  # Calculate empirical correlations
  correlation_matrix <- cor(wide_data[, -1])
  correlation_matrix <- correlation_matrix[upper.tri(correlation_matrix)]

  # Select estimate
  if(type == "mean") {
    estimated_correlation <- inv_fisher_z(mean(fisher_z(correlation_matrix)))
  } else if(type == "min") {
    estimated_correlation <- inv_fisher_z(min(fisher_z(correlation_matrix)))
  } else {
    stop("Type must be either 'min' or 'mean'.")
  }

  estimated_correlation_ci <- r_ci(estimated_correlation, n = nrow(wide_data), level = level)

  results <- c(estimated_correlation, estimated_correlation_ci)
  names(results) <- c(paste0(type, "_r"), "ll", "ul")
  results
}
	# x data.frame. Aggregated data for the factorial design, e.g., from aggregate(measure ~ subject * factor1 * factor2, data, mean).
	# measure Character. Name of the measure.
	# formula Formula. A formula specifying the factor (combination) for which to calculate the correlation, e.g., `subject ~ factor1` for a
	# main effect or `subject ~ factor1 + factor2` (yes, it needs to be a "+") for an interaction. Note that G*Power can be
	# used to perform power analyses for up to two repeated measures factors as long as one of them has only two levels.
	# To do so, enter the larger number of factor levels into the field "Number of measurements" and multiply the effect
	# size f by √p, where p is the number of levels of the other factor). If both factor have more than two levels G*Power
	# will underestimate the required sample size! (See http://goo.gl/RgciM4 [German] for details)
	# type Character. Specifies the estimation approach. "min" corresponds to smallest observed correlation among repeated measures,
	# "mean" corresponds to the average observed correlation.
	# level Numeric. Defines the width of the interval if confidence intervals are plotted. Defaults to 0.95 for 95% confidence intervals.

	repeated_measures_correlation <- function(x, measure, formula, type = "mean", level = 0.95) {
	require("reshape2")

	# Define helper functions
	fisher_z <- function(r) {
	return(0.5 * log((1 + r) / (1 - r)))
	}

	inv_fisher_z <- function(z) {
	return((exp(2 * z) - 1) / (exp(2 * z) + 1))
	}

	r_ci <- function (r, n, level = 0.95) {
	alpha <- 1 - level
	z <- fisher_z(r)
	se_z <- 1 / sqrt(n - 3)
	z_critical <- -qnorm(alpha / 2)
	z_ci <- z + c(-1, 1) * (z_critical * se_z)
	r_ci <- matrix(inv_fisher_z(z_ci), ncol = 2, byrow = TRUE)
	attr(r_ci, "conf.level") <- level

	return(r_ci)
	}

	# Restructure data
	melt_data <- melt(x, measure.vars = measure, na.rm = FALSE)
	wide_data <- dcast(melt_data, formula = formula, mean, value.var = "value")
	n_data <- nrow(wide_data)
	wide_data <- wide_data[complete.cases(wide_data), ]
	if(nrow(wide_data) < n_data) warning("Dropped ", nrow(wide_data) < n_data, " incomplete cases")

	# Calculate empirical correlations
	correlation_matrix <- cor(wide_data[, -1])
	correlation_matrix <- correlation_matrix[upper.tri(correlation_matrix)]

	# Select estimate
	if(type == "mean") {
	estimated_correlation <- inv_fisher_z(mean(fisher_z(correlation_matrix)))
	} else if(type == "min") {
	estimated_correlation <- inv_fisher_z(min(fisher_z(correlation_matrix)))
	} else {
	stop("Type must be either 'min' or 'mean'.")
	}

	estimated_correlation_ci <- r_ci(estimated_correlation, n = nrow(wide_data), level = level)

	results <- c(estimated_correlation, estimated_correlation_ci)
	names(results) <- c(paste0(type, "_r"), "ll", "ul")
	results
	}