Last active
February 8, 2024 00:00
-
-
Save BioSciEconomist/3d1b61b202b1e164574211a20ec15951 to your computer and use it in GitHub Desktop.
Power and sample size simulation for ordinal regression outcome
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: power simulation ordinal logistic.R | |
# | DATE: 2/3/24 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: R Training | |
# *---------------------------------------------------------------- | |
# | PURPOSE: power calculations and simulations | |
# *---------------------------------------------------------------- | |
rm(list=ls()) # get rid of any existing data | |
cat("\f") # clear console | |
dev.off() # clear plots | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html | |
library(simstudy) | |
library(ordinal) | |
library(Hmisc) | |
# | |
# define and simulate the population | |
# | |
baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities | |
# define binary predictor | |
defA<- defData(varname = "D", formula = 0.5, dist = "binary") | |
defA <- defData(defA, varname = "z", formula = "-0.7*D", dist = "nonrandom") | |
# generate data | |
set.seed(130) | |
dT_1_cat <- genData(25000, defA) | |
dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r") | |
# fit population level model | |
clmFit <- clm(r ~ D, data = dat) | |
summary(clmFit) | |
exp(-.7275) # OR = .4831 | |
# | |
# power simulation | |
# | |
set.seed(9876) # make this repeatable | |
Sn = 200 | |
pval <- c() | |
repNum <- c() | |
for (i in 1:500) { | |
sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group | |
summ <- summary(clm(r ~ D, data = sample)) | |
p <- summ$coefficients[16] # identify p-val associated with estimate we are interested in | |
pval<- c(pval, p) | |
repNum <- c(repNum,i) | |
print(cbind("Iteration:", i)) # comment this out to stop printing to log | |
} | |
rpt <- data.frame(repNum,pval) | |
sum(rpt$pval<.05)/nrow(rpt) # calculate power | |
# | |
# how does this compare to the sample size function in the Hmisc package | |
# | |
p <- c(0.31, 0.29, .20, 0.20) | |
posamsize(p, .48, alpha = .05, power = .80) # 188 | |
# the simulation and 'Hmisc' sample size packages give similar results | |
# does a power analysis using the wilcoxon test provide a conservative estimate | |
# Wilcoxon test is equivalent to ordinal logistic | |
#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney | |
# two-sample rank-sum test that allows for covariates. When there are no covariates, | |
# the Wilcoxon test and PO model are equivalent in two ways: | |
# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model, | |
# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic | |
# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the | |
# R^2 for predicting the logit of c from the PO log(OR) is 0.996 | |
# https://www.fharrell.com/post/powilcoxon/ | |
dat$y <- as.integer(dat$r) | |
wilcox.test(y ~ D,data = dat,exact = FALSE) | |
# example out of: | |
## Zhao YD, Rahardja D, Qu Yongming. | |
## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties. | |
## Statistics in Medicine 2008; 27:462-468 | |
mytable <- table(dat$r,dat$D) | |
prop.table(mytable,2) | |
table(dat$D) | |
library(samplesize) | |
n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.31,.29,.21,.19), q = c(.48,.28,.14,.10)) | |
Sn = 200 | |
set.seed(9876) # make this repeatable 1234567 | |
pval <- c() | |
repNum <- c() | |
for (i in 1:500) { | |
sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample | |
wcx <- wilcox.test(y ~ D, | |
data = sample, | |
exact = FALSE)$p.value | |
pval <- c(pval,wcx) | |
repNum <- c(repNum,i) # get p-value from this bs sample | |
print(cbind("Iteration:", i)) # comment this out to stop printing to log | |
} | |
rpt <- data.frame(pval) | |
rm(pval,i,sample,repNum) # cleanup | |
sum(rpt$pval<.05)/nrow(rpt) | |
# results are similar in this case |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment