Last active
February 8, 2024 00:12
-
-
Save BioSciEconomist/168347a77d6e44aca99be51e9cbc707d to your computer and use it in GitHub Desktop.
Simulation and power analysis for ordinal logistic regression with covariates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: power simulation multiple ordinal logistic.R | |
# | DATE: 2/3/24 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: R Training | |
# *---------------------------------------------------------------- | |
# | PURPOSE: power calculations and simulations | |
# *---------------------------------------------------------------- | |
rm(list=ls()) # get rid of any existing data | |
cat("\f") # clear console | |
dev.off() # clear plots | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html | |
library(simstudy) | |
library(ordinal) | |
library(Hmisc) | |
# | |
# define and simulate the population | |
# | |
baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities | |
# define binary predictor | |
def <- defData(varname = "age", dist = "normal", formula = 30, | |
variance = 5) | |
def <- defData(def,varname = "D", formula = 0.5, dist = "binary") | |
def <- defData(def, varname = "z", formula = "-0.25*D + .10*age", dist = "nonrandom") | |
# generate data | |
set.seed(130) | |
dT_1_cat <- genData(25000, def) | |
dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r") | |
summary(dat) | |
# fit population level model | |
clmFit <- clm(r ~ D + age, data = dat) | |
summary(clmFit) | |
1/exp(-.23) | |
exp(.09) | |
# Wilcoxon test is equivalent to ordinal logistic | |
#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney | |
# two-sample rank-sum test that allows for covariates. When there are no covariates, | |
# the Wilcoxon test and PO model are equivalent in two ways: | |
# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model, | |
# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic | |
# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the | |
# R^2 for predicting the logit of c from the PO log(OR) is 0.996 | |
# https://www.fharrell.com/post/powilcoxon/ | |
dat$y <- as.integer(dat$r) | |
wilcox.test(y ~ D,data = dat,exact = FALSE) | |
# | |
# sample size calc not considering covariates | |
# | |
p <- c(0.31, 0.29, .20, 0.20) | |
posamsize(p, 1.25, alpha = .05, power = .80) # 2034 | |
# | |
# power simuilation ordinal logistic regression | |
# | |
set.seed(9876) # make this repeatable | |
Sn = 2000 | |
pval <- c() | |
repNum <- c() | |
for (i in 1:500) { | |
sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group | |
summ <- summary(clm(r ~ D + age, data = sample)) | |
p <- summ$coefficients[19] # identify p-val associated with estimate we are interested in | |
pval<- c(pval, p) | |
repNum <- c(repNum,i) | |
print(cbind("Iteration:", i)) # comment this out to stop printing to log | |
} | |
rpt <- data.frame(repNum,pval) | |
sum(rpt$pval<.05)/nrow(rpt) # calculate power | |
# adding covariates seems to require addiitonal n - and standard power calculations | |
# lead to underpowered samples | |
# does a power analysis using the wilcoxon test provide a similar estimate | |
# https://www.fharrell.com/post/powilcoxon/ | |
dat$y <- as.integer(dat$r) | |
wilcox.test(y ~ D,data = dat,exact = FALSE) | |
# basic power calc | |
# example out of: | |
## Zhao YD, Rahardja D, Qu Yongming. | |
## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties. | |
## Statistics in Medicine 2008; 27:462-468 | |
mytable <- table(dat$r,dat$D) | |
prop.table(mytable,2) | |
table(dat$D) | |
library(samplesize) | |
n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.02,.05,.09,.84), q = c(.03,.06,.12,.79)) | |
# n = 1915 - this is smaller than the ordinal model calculation above but according to | |
# the simulations an underestimate | |
Sn = 2000 | |
set.seed(9876) # make this repeatable 1234567 | |
pval <- c() | |
repNum <- c() | |
for (i in 1:500) { | |
sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample | |
wcx <- wilcox.test(y ~ D, | |
data = sample, | |
exact = FALSE)$p.value | |
pval <- c(pval,wcx) | |
repNum <- c(repNum,i) # get p-value from this bs sample | |
print(cbind("Iteration:", i)) # comment this out to stop printing to log | |
} | |
rpt <- data.frame(pval) | |
rm(pval,i,sample,repNum) # cleanup | |
sum(rpt$pval<.05)/nrow(rpt) | |
# adding covariates seems to require addiitonal n - and standard power calculations | |
# lead to underpowered samples | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment