BioSciEconomist/power simulation multiple ordinal logistic.R

## power simulation multiple ordinal logistic.R

# *-----------------------------------------------------------------
# | PROGRAM NAME: power simulation multiple ordinal logistic.R
# | DATE: 2/3/24
# | CREATED BY: MATT BOGARD
# | PROJECT FILE: R Training
# *----------------------------------------------------------------
# | PURPOSE: power calculations and simulations
# *----------------------------------------------------------------

rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html

library(simstudy)
library(ordinal)
library(Hmisc)


#
# define and simulate the  population
#


baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities

# define binary predictor
def <- defData(varname = "age", dist = "normal", formula = 30,
              variance = 5)
def <- defData(def,varname = "D", formula = 0.5, dist = "binary")
def <- defData(def, varname = "z", formula = "-0.25*D + .10*age", dist = "nonrandom")


# generate data
set.seed(130)
dT_1_cat <- genData(25000, def)
dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r")

summary(dat)

# fit population level model
clmFit <- clm(r ~ D + age, data = dat)
summary(clmFit)
1/exp(-.23)
exp(.09)


# Wilcoxon test is equivalent to ordinal logistic
#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney
# two-sample rank-sum test that allows for covariates. When there are no covariates,
# the Wilcoxon test and PO model are equivalent in two ways:
# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model,
# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic
# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the
# R^2 for predicting the logit of c from the PO log(OR) is 0.996

# https://www.fharrell.com/post/powilcoxon/

dat$y <- as.integer(dat$r)
wilcox.test(y ~ D,data = dat,exact = FALSE)


#
# sample size calc not considering covariates
#

p <- c(0.31, 0.29, .20, 0.20)
posamsize(p, 1.25, alpha = .05, power = .80) # 2034


#
# power simuilation ordinal logistic regression
#


set.seed(9876) # make this repeatable
Sn = 2000
pval <- c()
repNum <- c()
for (i in 1:500) {
  sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group
  summ <- summary(clm(r ~ D + age, data = sample))
  p <- summ$coefficients[19] # identify p-val associated with estimate we are interested in
  pval<- c(pval, p)
  repNum <- c(repNum,i)
  print(cbind("Iteration:", i)) # comment this out to stop printing to log
}

rpt <- data.frame(repNum,pval)


sum(rpt$pval<.05)/nrow(rpt) # calculate power


# adding covariates seems to require addiitonal n - and standard power calculations
# lead to underpowered samples


# does a power analysis using the wilcoxon test provide a similar estimate


# https://www.fharrell.com/post/powilcoxon/

dat$y <- as.integer(dat$r)
wilcox.test(y ~ D,data = dat,exact = FALSE)


# basic power calc

# example out of:
## Zhao YD, Rahardja D, Qu Yongming.
## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties.
## Statistics in Medicine 2008; 27:462-468

mytable <- table(dat$r,dat$D)
prop.table(mytable,2)
table(dat$D)

library(samplesize)

n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.02,.05,.09,.84), q = c(.03,.06,.12,.79))

# n = 1915 - this is smaller than the ordinal model calculation above but according to
# the simulations an underestimate


Sn = 2000
set.seed(9876) # make this repeatable 1234567

pval <- c()
repNum <- c()

for (i in 1:500) {
  sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample
  wcx <- wilcox.test(y ~ D,
                     data = sample,
                     exact = FALSE)$p.value
  pval <- c(pval,wcx)

  repNum <- c(repNum,i) # get p-value from this bs sample
  print(cbind("Iteration:", i)) # comment this out to stop printing to log
}


rpt <- data.frame(pval)

rm(pval,i,sample,repNum) # cleanup

sum(rpt$pval<.05)/nrow(rpt)


# adding covariates seems to require addiitonal n - and standard power calculations
# lead to underpowered samples

	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: power simulation multiple ordinal logistic.R
	# \| DATE: 2/3/24
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE: R Training
	# *----------------------------------------------------------------
	# \| PURPOSE: power calculations and simulations
	# *----------------------------------------------------------------

	rm(list=ls()) # get rid of any existing data
	cat("\f") # clear console
	dev.off() # clear plots
	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


	# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html

	library(simstudy)
	library(ordinal)
	library(Hmisc)


	#
	# define and simulate the population
	#


	baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities

	# define binary predictor
	def <- defData(varname = "age", dist = "normal", formula = 30,
	variance = 5)
	def <- defData(def,varname = "D", formula = 0.5, dist = "binary")
	def <- defData(def, varname = "z", formula = "-0.25D + .10age", dist = "nonrandom")


	# generate data
	set.seed(130)
	dT_1_cat <- genData(25000, def)
	dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r")

	summary(dat)

	# fit population level model
	clmFit <- clm(r ~ D + age, data = dat)
	summary(clmFit)
	1/exp(-.23)
	exp(.09)



	# Wilcoxon test is equivalent to ordinal logistic
	#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney
	# two-sample rank-sum test that allows for covariates. When there are no covariates,
	# the Wilcoxon test and PO model are equivalent in two ways:
	# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model,
	# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic
	# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the
	# R^2 for predicting the logit of c from the PO log(OR) is 0.996

	# https://www.fharrell.com/post/powilcoxon/

	dat$y <- as.integer(dat$r)
	wilcox.test(y ~ D,data = dat,exact = FALSE)


	#
	# sample size calc not considering covariates
	#

	p <- c(0.31, 0.29, .20, 0.20)
	posamsize(p, 1.25, alpha = .05, power = .80) # 2034


	#
	# power simuilation ordinal logistic regression
	#


	set.seed(9876) # make this repeatable
	Sn = 2000
	pval <- c()
	repNum <- c()
	for (i in 1:500) {
	sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group
	summ <- summary(clm(r ~ D + age, data = sample))
	p <- summ$coefficients[19] # identify p-val associated with estimate we are interested in
	pval<- c(pval, p)
	repNum <- c(repNum,i)
	print(cbind("Iteration:", i)) # comment this out to stop printing to log
	}

	rpt <- data.frame(repNum,pval)


	sum(rpt$pval<.05)/nrow(rpt) # calculate power



	# adding covariates seems to require addiitonal n - and standard power calculations
	# lead to underpowered samples



	# does a power analysis using the wilcoxon test provide a similar estimate


	# https://www.fharrell.com/post/powilcoxon/

	dat$y <- as.integer(dat$r)
	wilcox.test(y ~ D,data = dat,exact = FALSE)


	# basic power calc

	# example out of:
	## Zhao YD, Rahardja D, Qu Yongming.
	## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties.
	## Statistics in Medicine 2008; 27:462-468

	mytable <- table(dat$r,dat$D)
	prop.table(mytable,2)
	table(dat$D)

	library(samplesize)

	n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.02,.05,.09,.84), q = c(.03,.06,.12,.79))

	# n = 1915 - this is smaller than the ordinal model calculation above but according to
	# the simulations an underestimate


	Sn = 2000
	set.seed(9876) # make this repeatable 1234567

	pval <- c()
	repNum <- c()

	for (i in 1:500) {
	sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample
	wcx <- wilcox.test(y ~ D,
	data = sample,
	exact = FALSE)$p.value
	pval <- c(pval,wcx)

	repNum <- c(repNum,i) # get p-value from this bs sample
	print(cbind("Iteration:", i)) # comment this out to stop printing to log
	}


	rpt <- data.frame(pval)

	rm(pval,i,sample,repNum) # cleanup

	sum(rpt$pval<.05)/nrow(rpt)


	# adding covariates seems to require addiitonal n - and standard power calculations
	# lead to underpowered samples