Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save clayford/5b719d27e8790fbb0a62cf6a6309902b to your computer and use it in GitHub Desktop.
Save clayford/5b719d27e8790fbb0a62cf6a6309902b to your computer and use it in GitHub Desktop.
Simulate data from a POLR model with proportional odds assumption satisfied and a POLR model without the assumption satisfied to demonstrate how the comparison to a multinomial logit model can provide some evidence of the proportional odds assumption or lack thereof.
# Clay Ford
# 2020-06-22
# Simulate data from a proportional odds model with proportional odds assumption
# satisfied.
# 300 observations and a grouping variable (example: democratic/republican)
n <- 300
set.seed(1)
grp <- sample(0:1, size = n, replace = TRUE)
# Generate boundary probabilities of professed ideology.
ideology <- c("Very Liberal", "Slightly Liberal", "Moderate", "Slightly Conservative", "Very Conservative")
# Notice the slope is always -1.
# That is, the proportional odds assumption is satisfied.
# plogis() is the inverse logit function
r1 <- plogis(q = -2.5 - -1*grp)
r2 <- plogis(q = -1.5 - -1*grp)
r3 <- plogis(q = 0.2 - -1*grp)
r4 <- plogis(q = 1.1 - -1*grp)
grp[1:2]
r1[1:2]
# Probability of "Very Liberal" if Republican
head(r1[grp==0],1)
# Probability of "Very Liberal" if Democrat
head(r1[grp==1],1)
# Probability of "Very Liberal" or "Slightly Liberal" if Republican
head(r2[grp==0],1)
# Probability of "Very Liberal" or "Slightly Liberal" if Democrat
head(r2[grp==1],1)
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Republican
head(r3[grp==0],1)
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Democrat
head(r3[grp==1],1)
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Republican
head(r4[grp==0],1)
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Democrat
head(r4[grp==1],1)
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" or "Slightly
# Conservative" if Republican
head(r4[grp==0],1)
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" or "Slightly
# Conservative" if Democrat
head(r4[grp==1],1)
# Place probabilities into a matrix
probs <- matrix(c(r1, r2, r3, r4), ncol = 4)
# Generate probabilities for each ideology:
# Very Liberal = x[1]
# Slightly Liberal = x[2] - x[1]
# Moderate = x[3] - x[2]
# Slightly Conservative = x[4] - x[3]
# Very Conservative = 1 - x[4]
p <- apply(probs, 1, function(x) c(x[1], diff(x), 1 - x[4]))
# Now generate ideologies per the probabilities
y <- apply(p, 2, function(x)sample(ideology, size = 1, replace = TRUE, prob = x))
head(y)
# convert y to factor
y <- factor(y, levels = ideology)
# convert grp to factor
party <- factor(grp, labels = c("Rep", "Dem"))
# cross tabulation of party and ideology
table(party, y)
# Fit proportional odds model
library(MASS)
m <- polr(y ~ grp)
summary(m)
# We come pretty close to recovering "true" values used to simulate data
# Compare to multinomial logit model to asses proportional odds assumption
library(nnet)
mlm <- multinom(y ~ grp)
M1 <- logLik(m)
M2 <- logLik(mlm)
(G <- -2*(M1[1] - M2[1]))
pchisq(G,3,lower.tail = FALSE)
# p > 0.05 provides some evidence that the proportional odds assumption is
# satisfied.
##################################
# Now generate data where proportional odds assumption is NOT satisfied.
# The difference is in the slopes below. Notice they all differ.
set.seed(2)
# grp <- sample(0:1, size = n, replace = TRUE)
r1 <- plogis(q = -2.5 - -1.6*grp)
r2 <- plogis(q = -1.5 - -1.1*grp)
r3 <- plogis(q = 0.2 - -0.6*grp)
r4 <- plogis(q = 1.1 - -0.2*grp)
# generate ideology labels as before and fit models
probs <- matrix(c(r1, r2, r3, r4), ncol = 4)
p <- apply(probs, 1, function(x) c(x[1], diff(x), 1 - x[4]))
y <- apply(p, 2, function(x)sample(ideology, size = 1, replace = TRUE, prob = x))
y <- factor(y, levels = ideology)
party <- factor(grp, labels = c("Rep", "Dem"))
# proportional odds model
m <- polr(y ~ grp)
# multinomial logit model
mlm <- multinom(y ~ grp)
# compare models
M1 <- logLik(m)
M2 <- logLik(mlm)
(G <- -2*(M1[1] - M2[1]))
pchisq(G,3,lower.tail = FALSE)
# p < 0.5 provides some evidence that the proportional odds assumption is
# suspect, though this will not always be the case.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment