Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active February 8, 2024 00:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BioSciEconomist/3d1b61b202b1e164574211a20ec15951 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/3d1b61b202b1e164574211a20ec15951 to your computer and use it in GitHub Desktop.
Power and sample size simulation for ordinal regression outcome
# *-----------------------------------------------------------------
# | PROGRAM NAME: power simulation ordinal logistic.R
# | DATE: 2/3/24
# | CREATED BY: MATT BOGARD
# | PROJECT FILE: R Training
# *----------------------------------------------------------------
# | PURPOSE: power calculations and simulations
# *----------------------------------------------------------------
rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html
library(simstudy)
library(ordinal)
library(Hmisc)
#
# define and simulate the population
#
baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities
# define binary predictor
defA<- defData(varname = "D", formula = 0.5, dist = "binary")
defA <- defData(defA, varname = "z", formula = "-0.7*D", dist = "nonrandom")
# generate data
set.seed(130)
dT_1_cat <- genData(25000, defA)
dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r")
# fit population level model
clmFit <- clm(r ~ D, data = dat)
summary(clmFit)
exp(-.7275) # OR = .4831
#
# power simulation
#
set.seed(9876) # make this repeatable
Sn = 200
pval <- c()
repNum <- c()
for (i in 1:500) {
sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group
summ <- summary(clm(r ~ D, data = sample))
p <- summ$coefficients[16] # identify p-val associated with estimate we are interested in
pval<- c(pval, p)
repNum <- c(repNum,i)
print(cbind("Iteration:", i)) # comment this out to stop printing to log
}
rpt <- data.frame(repNum,pval)
sum(rpt$pval<.05)/nrow(rpt) # calculate power
#
# how does this compare to the sample size function in the Hmisc package
#
p <- c(0.31, 0.29, .20, 0.20)
posamsize(p, .48, alpha = .05, power = .80) # 188
# the simulation and 'Hmisc' sample size packages give similar results
# does a power analysis using the wilcoxon test provide a conservative estimate
# Wilcoxon test is equivalent to ordinal logistic
#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney
# two-sample rank-sum test that allows for covariates. When there are no covariates,
# the Wilcoxon test and PO model are equivalent in two ways:
# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model,
# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic
# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the
# R^2 for predicting the logit of c from the PO log(OR) is 0.996
# https://www.fharrell.com/post/powilcoxon/
dat$y <- as.integer(dat$r)
wilcox.test(y ~ D,data = dat,exact = FALSE)
# example out of:
## Zhao YD, Rahardja D, Qu Yongming.
## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties.
## Statistics in Medicine 2008; 27:462-468
mytable <- table(dat$r,dat$D)
prop.table(mytable,2)
table(dat$D)
library(samplesize)
n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.31,.29,.21,.19), q = c(.48,.28,.14,.10))
Sn = 200
set.seed(9876) # make this repeatable 1234567
pval <- c()
repNum <- c()
for (i in 1:500) {
sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample
wcx <- wilcox.test(y ~ D,
data = sample,
exact = FALSE)$p.value
pval <- c(pval,wcx)
repNum <- c(repNum,i) # get p-value from this bs sample
print(cbind("Iteration:", i)) # comment this out to stop printing to log
}
rpt <- data.frame(pval)
rm(pval,i,sample,repNum) # cleanup
sum(rpt$pval<.05)/nrow(rpt)
# results are similar in this case
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment