Created
June 25, 2020 14:54
-
-
Save clayford/5b719d27e8790fbb0a62cf6a6309902b to your computer and use it in GitHub Desktop.
Simulate data from a POLR model with proportional odds assumption satisfied and a POLR model without the assumption satisfied to demonstrate how the comparison to a multinomial logit model can provide some evidence of the proportional odds assumption or lack thereof.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Clay Ford | |
# 2020-06-22 | |
# Simulate data from a proportional odds model with proportional odds assumption | |
# satisfied. | |
# 300 observations and a grouping variable (example: democratic/republican) | |
n <- 300 | |
set.seed(1) | |
grp <- sample(0:1, size = n, replace = TRUE) | |
# Generate boundary probabilities of professed ideology. | |
ideology <- c("Very Liberal", "Slightly Liberal", "Moderate", "Slightly Conservative", "Very Conservative") | |
# Notice the slope is always -1. | |
# That is, the proportional odds assumption is satisfied. | |
# plogis() is the inverse logit function | |
r1 <- plogis(q = -2.5 - -1*grp) | |
r2 <- plogis(q = -1.5 - -1*grp) | |
r3 <- plogis(q = 0.2 - -1*grp) | |
r4 <- plogis(q = 1.1 - -1*grp) | |
grp[1:2] | |
r1[1:2] | |
# Probability of "Very Liberal" if Republican | |
head(r1[grp==0],1) | |
# Probability of "Very Liberal" if Democrat | |
head(r1[grp==1],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" if Republican | |
head(r2[grp==0],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" if Democrat | |
head(r2[grp==1],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Republican | |
head(r3[grp==0],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Democrat | |
head(r3[grp==1],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Republican | |
head(r4[grp==0],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" if Democrat | |
head(r4[grp==1],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" or "Slightly | |
# Conservative" if Republican | |
head(r4[grp==0],1) | |
# Probability of "Very Liberal" or "Slightly Liberal" or "Moderate" or "Slightly | |
# Conservative" if Democrat | |
head(r4[grp==1],1) | |
# Place probabilities into a matrix | |
probs <- matrix(c(r1, r2, r3, r4), ncol = 4) | |
# Generate probabilities for each ideology: | |
# Very Liberal = x[1] | |
# Slightly Liberal = x[2] - x[1] | |
# Moderate = x[3] - x[2] | |
# Slightly Conservative = x[4] - x[3] | |
# Very Conservative = 1 - x[4] | |
p <- apply(probs, 1, function(x) c(x[1], diff(x), 1 - x[4])) | |
# Now generate ideologies per the probabilities | |
y <- apply(p, 2, function(x)sample(ideology, size = 1, replace = TRUE, prob = x)) | |
head(y) | |
# convert y to factor | |
y <- factor(y, levels = ideology) | |
# convert grp to factor | |
party <- factor(grp, labels = c("Rep", "Dem")) | |
# cross tabulation of party and ideology | |
table(party, y) | |
# Fit proportional odds model | |
library(MASS) | |
m <- polr(y ~ grp) | |
summary(m) | |
# We come pretty close to recovering "true" values used to simulate data | |
# Compare to multinomial logit model to asses proportional odds assumption | |
library(nnet) | |
mlm <- multinom(y ~ grp) | |
M1 <- logLik(m) | |
M2 <- logLik(mlm) | |
(G <- -2*(M1[1] - M2[1])) | |
pchisq(G,3,lower.tail = FALSE) | |
# p > 0.05 provides some evidence that the proportional odds assumption is | |
# satisfied. | |
################################## | |
# Now generate data where proportional odds assumption is NOT satisfied. | |
# The difference is in the slopes below. Notice they all differ. | |
set.seed(2) | |
# grp <- sample(0:1, size = n, replace = TRUE) | |
r1 <- plogis(q = -2.5 - -1.6*grp) | |
r2 <- plogis(q = -1.5 - -1.1*grp) | |
r3 <- plogis(q = 0.2 - -0.6*grp) | |
r4 <- plogis(q = 1.1 - -0.2*grp) | |
# generate ideology labels as before and fit models | |
probs <- matrix(c(r1, r2, r3, r4), ncol = 4) | |
p <- apply(probs, 1, function(x) c(x[1], diff(x), 1 - x[4])) | |
y <- apply(p, 2, function(x)sample(ideology, size = 1, replace = TRUE, prob = x)) | |
y <- factor(y, levels = ideology) | |
party <- factor(grp, labels = c("Rep", "Dem")) | |
# proportional odds model | |
m <- polr(y ~ grp) | |
# multinomial logit model | |
mlm <- multinom(y ~ grp) | |
# compare models | |
M1 <- logLik(m) | |
M2 <- logLik(mlm) | |
(G <- -2*(M1[1] - M2[1])) | |
pchisq(G,3,lower.tail = FALSE) | |
# p < 0.5 provides some evidence that the proportional odds assumption is | |
# suspect, though this will not always be the case. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment