saudiwin/powercalc_ord_binary.R

## powercalc_ord_binary.R
# Ordinal versus binary outcome power calculation
# Robert Kubinec
# New York University Abu Dhabi
# January 31, 2022

library(dplyr)
library(ordinal)
library(ggplot2)


N <- 500

exist_cut <- c(-2,2)

effects <- seq(0.25, 1.25, by=0.01)

iter <- 1000


ord_samp <- parallel::mclapply(effects, function(e) {

  treat_vec <- c(rep(e, 30), rep(0, 470))

  over_iter <- lapply(1:iter, function(i) {

    pr_mid1 <- plogis(treat_vec - exist_cut[1]) - plogis(treat_vec - exist_cut[2])

    pr_mid2 <- plogis(treat_vec - exist_cut[2]) - plogis(treat_vec - exist_cut[3])

    pr_end <- plogis(treat_vec - exist_cut[3])

    pr_beg <- 1 - plogis(treat_vec - exist_cut[1])

    pr_mat <- cbind(pr_beg, pr_mid1, pr_mid2, pr_end)

    out_samp <- apply(pr_mat, 1, function(c) {

      sample(1:4, size=1, prob=c)

    })

    this_data <- tibble(outcome=ordered(out_samp),
                        treat_vec=as.numeric(treat_vec>0))

    est_mod <- summary(ordinal::clm(outcome ~ treat_vec,data=this_data))

    tibble(treat_est=est_mod$coefficients[4,1],
           p_val=est_mod$coefficients[4,4],
           real_treat=e)

  })

},mc.cores=parallel::detectCores()) %>%
  bind_rows


bin_samp <- parallel::mclapply(effects, function(e) {

  treat_vec <- c(rep(e, 30), rep(0, 470))

  over_iter <- lapply(1:iter, function(i) {

    this_data <- tibble(outcome=rbinom(n=N,size=1,
                                       prob=plogis(treat_vec)),
                        treat_vec=as.numeric(treat_vec>0))

    est_mod <- summary(glm(outcome ~ treat_vec,data=this_data,
                           family="binomial"))


    tibble(treat_est=est_mod$coefficients[2,1],
           p_val=est_mod$coefficients[2,4],
           real_treat=e)

  })

},mc.cores=parallel::detectCores()) %>%
  bind_rows

# annotations, etc

horiz_lines <- tibble(Diagnostic=c('M_error','S_error',"Power"),
                      yintercept=c(1,NA,.8))

vert_lines <- tibble(Diagnostic=c('M_error','M_error',
                                  'S_error','S_error',
                                  "Power","Power"),
                     xintercept=rep(c(.5,.75),3))

bind_rows(mutate(ord_samp, type="Ordinal"),
          mutate(bin_samp, type="Binary")) %>%
  mutate(Power=p_val<0.05,
         M_error=abs(treat_est)/abs(real_treat),
         S_error=as.numeric(sign(real_treat)!=sign(treat_est))) %>%
  gather(key="Diagnostic",value="estimate",-real_treat,
         -treat_est,-p_val,-type) %>%
  group_by(type,Diagnostic,real_treat) %>%
  summarize(med_est=mean(estimate)) %>%
  ggplot(aes(y=med_est,x=real_treat)) +
  geom_line(aes(colour=type)) +
  geom_hline(data=horiz_lines, aes(yintercept=yintercept),
             linetype=2) +
  geom_vline(data=vert_lines, aes(xintercept=xintercept),
             linetype=3) +
  theme(panel.background = element_blank(),
        plot.caption = element_text(size=7)) +
  facet_wrap(~Diagnostic, scales="free_y",nrow=3) +
  labs(x="True Effect Size",y="",
       caption=stringr::str_wrap("M-errors are the ratio of estimated to true effect sizes and S-errors are the probability of obtaining an effect of the wrong sign from the true effect. Power is the probability of detecting a significant effect. Statistics calculated by Monte Carlo simulation.",width = 65)) +
  ggtitle("Power Calculation for Varying Effect Sizes")  +
  scale_color_viridis_d()
	# Ordinal versus binary outcome power calculation
	# Robert Kubinec
	# New York University Abu Dhabi
	# January 31, 2022

	library(dplyr)
	library(ordinal)
	library(ggplot2)


	N <- 500

	exist_cut <- c(-2,2)

	effects <- seq(0.25, 1.25, by=0.01)

	iter <- 1000



	ord_samp <- parallel::mclapply(effects, function(e) {

	treat_vec <- c(rep(e, 30), rep(0, 470))

	over_iter <- lapply(1:iter, function(i) {

	pr_mid1 <- plogis(treat_vec - exist_cut[1]) - plogis(treat_vec - exist_cut[2])

	pr_mid2 <- plogis(treat_vec - exist_cut[2]) - plogis(treat_vec - exist_cut[3])

	pr_end <- plogis(treat_vec - exist_cut[3])

	pr_beg <- 1 - plogis(treat_vec - exist_cut[1])

	pr_mat <- cbind(pr_beg, pr_mid1, pr_mid2, pr_end)

	out_samp <- apply(pr_mat, 1, function(c) {

	sample(1:4, size=1, prob=c)

	})

	this_data <- tibble(outcome=ordered(out_samp),
	treat_vec=as.numeric(treat_vec>0))

	est_mod <- summary(ordinal::clm(outcome ~ treat_vec,data=this_data))

	tibble(treat_est=est_mod$coefficients[4,1],
	p_val=est_mod$coefficients[4,4],
	real_treat=e)

	})

	},mc.cores=parallel::detectCores()) %>%
	bind_rows


	bin_samp <- parallel::mclapply(effects, function(e) {

	treat_vec <- c(rep(e, 30), rep(0, 470))

	over_iter <- lapply(1:iter, function(i) {

	this_data <- tibble(outcome=rbinom(n=N,size=1,
	prob=plogis(treat_vec)),
	treat_vec=as.numeric(treat_vec>0))

	est_mod <- summary(glm(outcome ~ treat_vec,data=this_data,
	family="binomial"))


	tibble(treat_est=est_mod$coefficients[2,1],
	p_val=est_mod$coefficients[2,4],
	real_treat=e)

	})

	},mc.cores=parallel::detectCores()) %>%
	bind_rows

	# annotations, etc

	horiz_lines <- tibble(Diagnostic=c('M_error','S_error',"Power"),
	yintercept=c(1,NA,.8))

	vert_lines <- tibble(Diagnostic=c('M_error','M_error',
	'S_error','S_error',
	"Power","Power"),
	xintercept=rep(c(.5,.75),3))

	bind_rows(mutate(ord_samp, type="Ordinal"),
	mutate(bin_samp, type="Binary")) %>%
	mutate(Power=p_val<0.05,
	M_error=abs(treat_est)/abs(real_treat),
	S_error=as.numeric(sign(real_treat)!=sign(treat_est))) %>%
	gather(key="Diagnostic",value="estimate",-real_treat,
	-treat_est,-p_val,-type) %>%
	group_by(type,Diagnostic,real_treat) %>%
	summarize(med_est=mean(estimate)) %>%
	ggplot(aes(y=med_est,x=real_treat)) +
	geom_line(aes(colour=type)) +
	geom_hline(data=horiz_lines, aes(yintercept=yintercept),
	linetype=2) +
	geom_vline(data=vert_lines, aes(xintercept=xintercept),
	linetype=3) +
	theme(panel.background = element_blank(),
	plot.caption = element_text(size=7)) +
	facet_wrap(~Diagnostic, scales="free_y",nrow=3) +
	labs(x="True Effect Size",y="",
	caption=stringr::str_wrap("M-errors are the ratio of estimated to true effect sizes and S-errors are the probability of obtaining an effect of the wrong sign from the true effect. Power is the probability of detecting a significant effect. Statistics calculated by Monte Carlo simulation.",width = 65)) +
	ggtitle("Power Calculation for Varying Effect Sizes") +
	scale_color_viridis_d()