| #################################################################### | |
| # | |
| # All Code and Comments Below (except code provided by Boris Mayer) are | |
| # Copyright Marc Schwartz | |
| # e-mail: marc_schwartz@me.com | |
| # This code is made available under the GNU Public License V2.0 | |
| # This is free software and comes with ABSOLUTELY NO WARRANTY. | |
| # | |
| #################################################################### | |
| # R Code to calculate various Measures of | |
| # Association for m x n tables | |
| # References include: | |
| # 1. Agresti A. (2002): Categorical Data Analysis, Second Edition, J. Wiley and Sons | |
| # 2. Stokes M., Davis C. & Koch G. (1997): Categorical Data Analysis Using the SAS System, SAS Institute | |
| # 3. Liebetrau A.M. (1983): Measures of Association (Sage University Papers Series on Quantitative Applications | |
| # in the Social Sciences, Series no. 07-032), Sage Publications | |
| # 4. SAS Institute (1999): SAS/STAT User's Guide V8, SAS Institute | |
| # 5. SPSS, Inc. (2003): SPSS 11.5 Statistical Algorithms | |
| # (http://www.spss.com/tech/stat/Algorithms/11.5/crosstabs.pdf) | |
| # 6. Sheskin DJ. (2004): Handbook of Parametric and Nonparametric Statistical Procedures, Chapman & Hall/CRC | |
| # MOST MEASURES TAKE A 2 DIMENSIONAL TABLE/MATRIX "x" AS | |
| # AN ARGUMENT | |
| # See the 'vcd' CRAN package for some examples and code | |
| # on calculations and p values | |
| # Calculate CONcordant Pairs in a table | |
| # cycle through x[r, c] and multiply by | |
| # sum(x elements below and to the right of x[r, c]) | |
| # x = table | |
| concordant <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| # get sum(matrix values > r AND > c) | |
| # for each matrix[r, c] | |
| mat.lr <- function(r, c) | |
| { | |
| lr <- x[(r.x > r) & (c.x > c)] | |
| sum(lr) | |
| } | |
| # get row and column index for each | |
| # matrix element | |
| r.x <- row(x) | |
| c.x <- col(x) | |
| # return the sum of each matrix[r, c] * sums | |
| # using mapply to sequence thru each matrix[r, c] | |
| sum(x * mapply(mat.lr, r = r.x, c = c.x)) | |
| } | |
| # Calculate DIScordant Pairs in a table | |
| # cycle through x[r, c] and multiply by | |
| # sum(x elements below and to the left of x[r, c]) | |
| # x = table | |
| discordant <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| # get sum(matrix values > r AND < c) | |
| # for each matrix[r, c] | |
| mat.ll <- function(r, c) | |
| { | |
| ll <- x[(r.x > r) & (c.x < c)] | |
| sum(ll) | |
| } | |
| # get row and column index for each | |
| # matrix element | |
| r.x <- row(x) | |
| c.x <- col(x) | |
| # return the sum of each matrix[r, c] * sums | |
| # using mapply to sequence thru each matrix[r, c] | |
| sum(x * mapply(mat.ll, r = r.x, c = c.x)) | |
| } | |
| # Calculate Pairs tied on Rows | |
| # cycle through each row of x and multiply by | |
| # sum(x elements to the right of x[r, c]) | |
| # x = table | |
| ties.row <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| total.pairs <- 0 | |
| rows <- dim(x)[1] | |
| cols <- dim(x)[2] | |
| for (r in 1:rows) | |
| { | |
| for (c in 1:(cols - 1)) | |
| { | |
| total.pairs <- total.pairs + (x[r, c] * sum(x[r, (c + 1):cols])) | |
| } | |
| } | |
| total.pairs | |
| } | |
| # Calculate Pairs tied on Columns | |
| # cycle through each col of x and multiply by | |
| # sum(x elements below x[r, c]) | |
| # x = table | |
| ties.col <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| total.pairs <- 0 | |
| rows <- dim(x)[1] | |
| cols <- dim(x)[2] | |
| for (c in 1:cols) | |
| { | |
| for (r in 1:(rows - 1)) | |
| { | |
| total.pairs <- total.pairs + (x[r, c] * sum(x[(r + 1):rows, c])) | |
| } | |
| } | |
| total.pairs | |
| } | |
| # Calculate Phi Coefficient | |
| # x = table | |
| calc.phi <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| phi <- sqrt(chisq.test(x, correct = FALSE)$statistic / sum(x)) | |
| as.numeric(phi) | |
| } | |
| # Calculate Contingency Coefficient (Pearson's C) | |
| # and Sakoda's Adjusted Pearson's C | |
| # x = table | |
| calc.cc <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| # CC - Pearson's C | |
| chisq <- chisq.test(x, correct = FALSE)$statistic | |
| C <- sqrt(chisq / (chisq + sum(x))) | |
| # Sakoda's adjusted Pearson's C | |
| k <- min(dim(x)) | |
| SC <- C / sqrt((k - 1) / k) | |
| CClist <- list(as.numeric(C), as.numeric(SC)) | |
| names(CClist) <- c("Pearson.C", "Sakoda.C") | |
| CClist | |
| } | |
| # Calculate Tshuprow's T | |
| # Not meaningful for non-square tables | |
| # For 2 x 2 tables T = Phi | |
| # x = table | |
| calc.TT <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| TT <- sqrt(chisq.test(x, correct = FALSE)$statistic / | |
| (sum(x) * sqrt((dim(x)[1] - 1) * (dim(x)[2] - 1)))) | |
| as.numeric(TT) | |
| } | |
| # Calculate Cramer's V | |
| # For 2 x 2 tables V = Phi | |
| # x = table | |
| calc.CV <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| CV <- sqrt(chisq.test(x, correct = FALSE)$statistic / | |
| (sum(x) * min(dim(x) - 1))) | |
| as.numeric(CV) | |
| } | |
| # Calculate Lambda | |
| # Return 3 values: | |
| # 1. Lambda C~R | |
| # 2. Lambda R~C | |
| # 3. Lambda Symmetric (Mean of above) | |
| # x = table | |
| calc.lambda <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| SumRmax <- sum(apply(x, 1, max)) | |
| SumCmax <- sum(apply(x, 2, max)) | |
| MaxCSum <- max(colSums(x)) | |
| MaxRSum <- max(rowSums(x)) | |
| n <- sum(x) | |
| L.CR <- (SumRmax - MaxCSum) / (n - MaxCSum) | |
| L.RC <- (SumCmax - max(rowSums(x))) / (n - MaxRSum) | |
| L.S <- (SumRmax + SumCmax - MaxCSum - MaxRSum) / | |
| ((2 * n) - MaxCSum - MaxRSum) | |
| Llist <- list(L.CR, L.RC, L.S) | |
| names(Llist) <- c("L.CR", "L.RC", "L.S") | |
| Llist | |
| } | |
| # Calculate The Uncertainty Coefficient | |
| # "Theil's U" | |
| # Return 3 values: | |
| # 1. UC C~R | |
| # 2. UC R~C | |
| # 3. UC Symmetric (Mean of above) | |
| # x = table | |
| # Note: Asymmetric formulae denomiators corrected on May 4, 2007 | |
| # thanks to Antti Arppe | |
| calc.UC <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| SumR <- rowSums(x) | |
| SumC <- colSums(x) | |
| n <- sum(x) | |
| HY <- -sum((SumC / n) * log(SumC / n)) | |
| HX <- -sum((SumR / n) * log(SumR / n)) | |
| HXY <- -sum((x / n) * log(x / n)) | |
| UC.RC <- (HX + HY - HXY) / HX | |
| UC.CR <- (HY + HX - HXY) / HY | |
| UC.S <- 2 * (HX + HY - HXY) / (HX + HY) | |
| UClist <- list(UC.RC, UC.CR, UC.S) | |
| names(UClist) <- c("UC.RC", "UC.CR", "UC.S") | |
| UClist | |
| } | |
| # Calculate Goodman-Kruskal gamma | |
| # x = table | |
| calc.gamma <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| c <- concordant(x) | |
| d <- discordant(x) | |
| gamma <- (c - d) / (c + d) | |
| gamma | |
| } | |
| # Calculate Kendall's Tau-b | |
| # x = table | |
| calc.KTb <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| c <- concordant(x) | |
| d <- discordant(x) | |
| # An alternative computation is: | |
| #Tr <- ties.row(x) | |
| #Tc <- ties.col(x) | |
| #KTb <- (c - d) / sqrt((c + d + Tc) * (c + d + Tr)) | |
| # The "preferred" computation is: | |
| n <- sum(x) | |
| SumR <- rowSums(x) | |
| SumC <- colSums(x) | |
| KTb <- (2 * (c - d)) / sqrt(((n ^ 2) - (sum(SumR ^ 2))) * ((n ^ 2) - (sum(SumC ^ 2)))) | |
| KTb | |
| } | |
| # Calculate Kendall-Stuart Tau-c | |
| # x = table | |
| calc.KSTc <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| c <- concordant(x) | |
| d <- discordant(x) | |
| m <- min(dim(x)) | |
| n <- sum(x) | |
| KSTc <- (m * 2 * (c - d)) / ((n ^ 2) * (m - 1)) | |
| KSTc | |
| } | |
| # Calculate Somers' d | |
| # Return 3 values: | |
| # 1. Sd C~R | |
| # 2. Sd R~C | |
| # 3. Sd Symmetric | |
| # x = table | |
| calc.Sd <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| c <- concordant(x) | |
| d <- discordant(x) | |
| n <- sum(x) | |
| SumR <- rowSums(x) | |
| SumC <- colSums(x) | |
| Sd.CR <- (2 * (c - d)) / ((n ^ 2) - (sum(SumR ^ 2))) | |
| Sd.RC <- (2 * (c - d)) / ((n ^ 2) - (sum(SumC ^ 2))) | |
| Sd.S <- (2 * (c - d)) / ((n ^ 2) - (((sum(SumR ^ 2)) + (sum(SumC ^ 2))) / 2)) | |
| Sdlist <- list(Sd.CR, Sd.RC, Sd.S) | |
| names(Sdlist) <- c("Sd.CR", "Sd.RC", "Sd.S") | |
| Sdlist | |
| } | |
| # Calculate Cochran's Q | |
| # Test for proportions in dependent samples | |
| # a k > 2 generalization of the mcnemar test | |
| # 'mat' is a matrix, where: | |
| # each row is a subject | |
| # each column is the 0/1 result of a test condition | |
| cochranq.test <- function(mat) | |
| { | |
| k <- ncol(mat) | |
| C <- sum(colSums(mat) ^ 2) | |
| R <- sum(rowSums(mat) ^ 2) | |
| T <- sum(rowSums(mat)) | |
| num <- (k - 1) * ((k * C) - (T ^ 2)) | |
| den <- (k * T) - R | |
| Q <- num / den | |
| df <- k - 1 | |
| names(df) <- "df" | |
| names(Q) <- "Cochran's Q" | |
| p.val <- pchisq(Q, df, lower = FALSE) | |
| QVAL <- list(statistic = Q, parameter = df, p.value = p.val, | |
| method = "Cochran's Q Test for Dependent Samples", | |
| data.name = deparse(substitute(mat))) | |
| class(QVAL) <- "htest" | |
| return(QVAL) | |
| } | |
| # Functions calc.WeP and calc.WeA to calculate Wilson's e | |
| # Code kindly provided and copyrighted by Boris Mayer | |
| # June 19, 2015 | |
| # boris.mayer@psy.unibe.ch | |
| # This code utilizes functions above | |
| # Calculate Wilsons's e (based on "Preferred computation") | |
| # x = table | |
| calc.WeP <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| c <- concordant(x) | |
| d <- discordant(x) | |
| # An alternative computation is: | |
| #Tr <- ties.row(x) | |
| #Tc <- ties.col(x) | |
| #We <- (c - d) / (c + d + Tc + Tr) | |
| # The "preferred" computation is: | |
| n <- sum(x) | |
| SumR <- rowSums(x) | |
| SumC <- colSums(x) | |
| We <- (2 *(c - d)) / ((n^2 - (sum(SumR ^ 2))) + (n^2 - (sum(SumC ^ 2))) - (2*(c+d))) | |
| We | |
| } | |
| # Calculate Wilsons's e (based on "Alternative computation") | |
| # x = table | |
| calc.WeA <- function(x) | |
| { | |
| x <- matrix(as.numeric(x), dim(x)) | |
| c <- concordant(x) | |
| d <- discordant(x) | |
| # An alternative computation is: | |
| Tr <- ties.row(x) | |
| Tc <- ties.col(x) | |
| We <- (c - d) / (c + d + Tc + Tr) | |
| # The "preferred" computation is: | |
| #n <- sum(x) | |
| #SumR <- rowSums(x) | |
| #SumC <- colSums(x) | |
| #We <- (2 *(c - d)) / ((n^2 - (sum(SumR ^ 2))) + (n^2 - (sum(SumC ^ 2))) - (2*(c+d))) | |
| We | |
| } |