Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Categorical data to indicator matrix to log odds ratios
# Starting with categorical data, ending with a table of log odds ratios
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("plyr", "reshape2")
repos = "")}
lapply(toInstall, library, character.only = TRUE)
# Canonical example of categorical data
HEC <- melt(HairEyeColor)
HEC <- HEC[rep(1:nrow(HEC), HEC[, 4]), -4]
colnames(HEC) <- c("Hair", "Eye", "Gender")
head(HEC) # This df has a row for each observation
# Convert matrix of factors to matrix of indicator variables
indicatorMatrix <- model.matrix(~ ., data = HEC,
contrasts.arg = sapply(HEC, contrasts, contrasts = FALSE))[, -1]
# (from
# Make a table of log odd ratios between categories
TT <- t(indicatorMatrix) %*% indicatorMatrix # Has both (two true)
TF <- t(indicatorMatrix) %*% !indicatorMatrix # Has one, but not other
FT <- t(!indicatorMatrix) %*% indicatorMatrix # etc.
FF <- t(!indicatorMatrix) %*% !indicatorMatrix # etc.
oddsRatios <- (TT / TF) / (FT / FF)
logOddsRatios <- log(oddsRatios)
arrange(melt(logOddsRatios)[melt(upper.tri(logOddsRatios))[, 3], ], value)
# upper.tri indicates which items in a matrix are in the upper triangle.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.