Categorical data to indicator matrix to log odds ratios
# Starting with categorical data, ending with a table of log odds ratios | |
doInstall <- TRUE # Change to FALSE if you don't want packages installed. | |
toInstall <- c("plyr", "reshape2") | |
if(doInstall){install.packages(toInstall, | |
repos = "http://cran.us.r-project.org")} | |
lapply(toInstall, library, character.only = TRUE) | |
# Canonical example of categorical data | |
HEC <- melt(HairEyeColor) | |
HEC <- HEC[rep(1:nrow(HEC), HEC[, 4]), -4] | |
colnames(HEC) <- c("Hair", "Eye", "Gender") | |
head(HEC) # This df has a row for each observation | |
# Convert matrix of factors to matrix of indicator variables | |
indicatorMatrix <- model.matrix(~ ., data = HEC, | |
contrasts.arg = sapply(HEC, contrasts, contrasts = FALSE))[, -1] | |
# (from http://stackoverflow.com/a/4569239/479554) | |
head(indicatorMatrix) | |
# Make a table of log odd ratios between categories | |
TT <- t(indicatorMatrix) %*% indicatorMatrix # Has both (two true) | |
TF <- t(indicatorMatrix) %*% !indicatorMatrix # Has one, but not other | |
FT <- t(!indicatorMatrix) %*% indicatorMatrix # etc. | |
FF <- t(!indicatorMatrix) %*% !indicatorMatrix # etc. | |
oddsRatios <- (TT / TF) / (FT / FF) | |
logOddsRatios <- log(oddsRatios) | |
arrange(melt(logOddsRatios)[melt(upper.tri(logOddsRatios))[, 3], ], value) | |
# upper.tri indicates which items in a matrix are in the upper triangle. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment