Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save BioSciEconomist/168347a77d6e44aca99be51e9cbc707d to your computer and use it in GitHub Desktop.
Save BioSciEconomist/168347a77d6e44aca99be51e9cbc707d to your computer and use it in GitHub Desktop.
Simulation and power analysis for ordinal logistic regression with covariates
# *-----------------------------------------------------------------
# | PROGRAM NAME: power simulation multiple ordinal logistic.R
# | DATE: 2/3/24
# | CREATED BY: MATT BOGARD
# | PROJECT FILE: R Training
# *----------------------------------------------------------------
# | PURPOSE: power calculations and simulations
# *----------------------------------------------------------------
rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html
library(simstudy)
library(ordinal)
library(Hmisc)
#
# define and simulate the population
#
baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities
# define binary predictor
def <- defData(varname = "age", dist = "normal", formula = 30,
variance = 5)
def <- defData(def,varname = "D", formula = 0.5, dist = "binary")
def <- defData(def, varname = "z", formula = "-0.25*D + .10*age", dist = "nonrandom")
# generate data
set.seed(130)
dT_1_cat <- genData(25000, def)
dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r")
summary(dat)
# fit population level model
clmFit <- clm(r ~ D + age, data = dat)
summary(clmFit)
1/exp(-.23)
exp(.09)
# Wilcoxon test is equivalent to ordinal logistic
#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney
# two-sample rank-sum test that allows for covariates. When there are no covariates,
# the Wilcoxon test and PO model are equivalent in two ways:
# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model,
# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic
# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the
# R^2 for predicting the logit of c from the PO log(OR) is 0.996
# https://www.fharrell.com/post/powilcoxon/
dat$y <- as.integer(dat$r)
wilcox.test(y ~ D,data = dat,exact = FALSE)
#
# sample size calc not considering covariates
#
p <- c(0.31, 0.29, .20, 0.20)
posamsize(p, 1.25, alpha = .05, power = .80) # 2034
#
# power simuilation ordinal logistic regression
#
set.seed(9876) # make this repeatable
Sn = 2000
pval <- c()
repNum <- c()
for (i in 1:500) {
sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group
summ <- summary(clm(r ~ D + age, data = sample))
p <- summ$coefficients[19] # identify p-val associated with estimate we are interested in
pval<- c(pval, p)
repNum <- c(repNum,i)
print(cbind("Iteration:", i)) # comment this out to stop printing to log
}
rpt <- data.frame(repNum,pval)
sum(rpt$pval<.05)/nrow(rpt) # calculate power
# adding covariates seems to require addiitonal n - and standard power calculations
# lead to underpowered samples
# does a power analysis using the wilcoxon test provide a similar estimate
# https://www.fharrell.com/post/powilcoxon/
dat$y <- as.integer(dat$r)
wilcox.test(y ~ D,data = dat,exact = FALSE)
# basic power calc
# example out of:
## Zhao YD, Rahardja D, Qu Yongming.
## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties.
## Statistics in Medicine 2008; 27:462-468
mytable <- table(dat$r,dat$D)
prop.table(mytable,2)
table(dat$D)
library(samplesize)
n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.02,.05,.09,.84), q = c(.03,.06,.12,.79))
# n = 1915 - this is smaller than the ordinal model calculation above but according to
# the simulations an underestimate
Sn = 2000
set.seed(9876) # make this repeatable 1234567
pval <- c()
repNum <- c()
for (i in 1:500) {
sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample
wcx <- wilcox.test(y ~ D,
data = sample,
exact = FALSE)$p.value
pval <- c(pval,wcx)
repNum <- c(repNum,i) # get p-value from this bs sample
print(cbind("Iteration:", i)) # comment this out to stop printing to log
}
rpt <- data.frame(pval)
rm(pval,i,sample,repNum) # cleanup
sum(rpt$pval<.05)/nrow(rpt)
# adding covariates seems to require addiitonal n - and standard power calculations
# lead to underpowered samples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment