BioSciEconomist/power simulation ordinal logistic.R

## power simulation ordinal logistic.R

# *-----------------------------------------------------------------
# | PROGRAM NAME: power simulation ordinal logistic.R
# | DATE: 2/3/24
# | CREATED BY: MATT BOGARD
# | PROJECT FILE: R Training
# *----------------------------------------------------------------
# | PURPOSE: power calculations and simulations
# *----------------------------------------------------------------

rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html

library(simstudy)
library(ordinal)
library(Hmisc)


#
# define and simulate the  population
#


baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities

# define binary predictor
defA<- defData(varname = "D", formula = 0.5, dist = "binary")
defA <- defData(defA, varname = "z", formula = "-0.7*D", dist = "nonrandom")

# generate data
set.seed(130)
dT_1_cat <- genData(25000, defA)
dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r")

# fit population level model
clmFit <- clm(r ~ D, data = dat)
summary(clmFit)
exp(-.7275) # OR = .4831

#
# power simulation
#

set.seed(9876) # make this repeatable
Sn = 200
pval <- c()
repNum <- c()
for (i in 1:500) {
  sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group
  summ <- summary(clm(r ~ D, data = sample))
  p <- summ$coefficients[16] # identify p-val associated with estimate we are interested in
  pval<- c(pval, p)
  repNum <- c(repNum,i)
  print(cbind("Iteration:", i)) # comment this out to stop printing to log
}

rpt <- data.frame(repNum,pval)


sum(rpt$pval<.05)/nrow(rpt) # calculate power


#
# how does this compare to the sample size function in the Hmisc package
#

p <- c(0.31, 0.29, .20, 0.20)
posamsize(p, .48, alpha = .05, power = .80) # 188


# the simulation and 'Hmisc' sample size packages give similar results


# does a power analysis using the wilcoxon test provide a conservative estimate

# Wilcoxon test is equivalent to ordinal logistic
#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney
# two-sample rank-sum test that allows for covariates. When there are no covariates,
# the Wilcoxon test and PO model are equivalent in two ways:
# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model,
# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic
# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the
# R^2 for predicting the logit of c from the PO log(OR) is 0.996

# https://www.fharrell.com/post/powilcoxon/

dat$y <- as.integer(dat$r)
wilcox.test(y ~ D,data = dat,exact = FALSE)


# example out of:
## Zhao YD, Rahardja D, Qu Yongming.
## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties.
## Statistics in Medicine 2008; 27:462-468

mytable <- table(dat$r,dat$D)
prop.table(mytable,2)
table(dat$D)

library(samplesize)

n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.31,.29,.21,.19), q = c(.48,.28,.14,.10))


Sn = 200
set.seed(9876) # make this repeatable 1234567

pval <- c()
repNum <- c()

for (i in 1:500) {
  sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample
    wcx <- wilcox.test(y ~ D,
                       data = sample,
                       exact = FALSE)$p.value
    pval <- c(pval,wcx)

  repNum <- c(repNum,i) # get p-value from this bs sample
  print(cbind("Iteration:", i)) # comment this out to stop printing to log
}


rpt <- data.frame(pval)

rm(pval,i,sample,repNum) # cleanup

sum(rpt$pval<.05)/nrow(rpt)


# results are similar in this case

	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: power simulation ordinal logistic.R
	# \| DATE: 2/3/24
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE: R Training
	# *----------------------------------------------------------------
	# \| PURPOSE: power calculations and simulations
	# *----------------------------------------------------------------

	rm(list=ls()) # get rid of any existing data
	cat("\f") # clear console
	dev.off() # clear plots
	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation


	# reference: https://kgoldfeld.github.io/simstudy/articles/ordinal.html

	library(simstudy)
	library(ordinal)
	library(Hmisc)


	#
	# define and simulate the population
	#


	baseprobs <- c(0.31, 0.29, .20, 0.20) # select basline category/level probabilities

	# define binary predictor
	defA<- defData(varname = "D", formula = 0.5, dist = "binary")
	defA <- defData(defA, varname = "z", formula = "-0.7*D", dist = "nonrandom")

	# generate data
	set.seed(130)
	dT_1_cat <- genData(25000, defA)
	dat <- genOrdCat(dT_1_cat, adjVar = "z", baseprobs, catVar = "r")

	# fit population level model
	clmFit <- clm(r ~ D, data = dat)
	summary(clmFit)
	exp(-.7275) # OR = .4831

	#
	# power simulation
	#

	set.seed(9876) # make this repeatable
	Sn = 200
	pval <- c()
	repNum <- c()
	for (i in 1:500) {
	sample <- dat[sample(nrow(dat), Sn, replace = T), ] # sample from treatment group
	summ <- summary(clm(r ~ D, data = sample))
	p <- summ$coefficients[16] # identify p-val associated with estimate we are interested in
	pval<- c(pval, p)
	repNum <- c(repNum,i)
	print(cbind("Iteration:", i)) # comment this out to stop printing to log
	}

	rpt <- data.frame(repNum,pval)


	sum(rpt$pval<.05)/nrow(rpt) # calculate power


	#
	# how does this compare to the sample size function in the Hmisc package
	#

	p <- c(0.31, 0.29, .20, 0.20)
	posamsize(p, .48, alpha = .05, power = .80) # 188


	# the simulation and 'Hmisc' sample size packages give similar results


	# does a power analysis using the wilcoxon test provide a conservative estimate

	# Wilcoxon test is equivalent to ordinal logistic
	#The proportional odds (PO) ordinal logistic model is a generalization of the Wilcoxon-Mann-Whitney
	# two-sample rank-sum test that allows for covariates. When there are no covariates,
	# the Wilcoxon test and PO model are equivalent in two ways:
	# 1 The numerator of the Rao efficient score test for comparing two groups in the unadjusted PO model,
	# which tests for an odds ratio (OR) of 1.0, is identical to the Wilcoxon test statistic
	# 2 Over a wide variety of datasets exhibiting both PO and non-PO, the
	# R^2 for predicting the logit of c from the PO log(OR) is 0.996

	# https://www.fharrell.com/post/powilcoxon/

	dat$y <- as.integer(dat$r)
	wilcox.test(y ~ D,data = dat,exact = FALSE)



	# example out of:
	## Zhao YD, Rahardja D, Qu Yongming.
	## Sample size calculation for the Wilcoxon-Mann-Whitney test adjsuting for ties.
	## Statistics in Medicine 2008; 27:462-468

	mytable <- table(dat$r,dat$D)
	prop.table(mytable,2)
	table(dat$D)

	library(samplesize)

	n.wilcox.ord(power = 0.8, alpha = 0.05, t = 0.50, p = c(.31,.29,.21,.19), q = c(.48,.28,.14,.10))


	Sn = 200
	set.seed(9876) # make this repeatable 1234567

	pval <- c()
	repNum <- c()

	for (i in 1:500) {
	sample <- dat[sample(nrow(dat), Sn, replace = F), ] # sample
	wcx <- wilcox.test(y ~ D,
	data = sample,
	exact = FALSE)$p.value
	pval <- c(pval,wcx)

	repNum <- c(repNum,i) # get p-value from this bs sample
	print(cbind("Iteration:", i)) # comment this out to stop printing to log
	}


	rpt <- data.frame(pval)

	rm(pval,i,sample,repNum) # cleanup

	sum(rpt$pval<.05)/nrow(rpt)


	# results are similar in this case