Skip to content

Instantly share code, notes, and snippets.

@saudiwin
Created January 31, 2022 07:25
Show Gist options
  • Save saudiwin/255ba3f1a297616968797cfd9dc533c6 to your computer and use it in GitHub Desktop.
Save saudiwin/255ba3f1a297616968797cfd9dc533c6 to your computer and use it in GitHub Desktop.
Power calculation showing why ordinal outcomes are more efficient than binary (dichotomized) outcomes
# Ordinal versus binary outcome power calculation
# Robert Kubinec
# New York University Abu Dhabi
# January 31, 2022
library(dplyr)
library(ordinal)
library(ggplot2)
N <- 500
exist_cut <- c(-2,2)
effects <- seq(0.25, 1.25, by=0.01)
iter <- 1000
ord_samp <- parallel::mclapply(effects, function(e) {
treat_vec <- c(rep(e, 30), rep(0, 470))
over_iter <- lapply(1:iter, function(i) {
pr_mid1 <- plogis(treat_vec - exist_cut[1]) - plogis(treat_vec - exist_cut[2])
pr_mid2 <- plogis(treat_vec - exist_cut[2]) - plogis(treat_vec - exist_cut[3])
pr_end <- plogis(treat_vec - exist_cut[3])
pr_beg <- 1 - plogis(treat_vec - exist_cut[1])
pr_mat <- cbind(pr_beg, pr_mid1, pr_mid2, pr_end)
out_samp <- apply(pr_mat, 1, function(c) {
sample(1:4, size=1, prob=c)
})
this_data <- tibble(outcome=ordered(out_samp),
treat_vec=as.numeric(treat_vec>0))
est_mod <- summary(ordinal::clm(outcome ~ treat_vec,data=this_data))
tibble(treat_est=est_mod$coefficients[4,1],
p_val=est_mod$coefficients[4,4],
real_treat=e)
})
},mc.cores=parallel::detectCores()) %>%
bind_rows
bin_samp <- parallel::mclapply(effects, function(e) {
treat_vec <- c(rep(e, 30), rep(0, 470))
over_iter <- lapply(1:iter, function(i) {
this_data <- tibble(outcome=rbinom(n=N,size=1,
prob=plogis(treat_vec)),
treat_vec=as.numeric(treat_vec>0))
est_mod <- summary(glm(outcome ~ treat_vec,data=this_data,
family="binomial"))
tibble(treat_est=est_mod$coefficients[2,1],
p_val=est_mod$coefficients[2,4],
real_treat=e)
})
},mc.cores=parallel::detectCores()) %>%
bind_rows
# annotations, etc
horiz_lines <- tibble(Diagnostic=c('M_error','S_error',"Power"),
yintercept=c(1,NA,.8))
vert_lines <- tibble(Diagnostic=c('M_error','M_error',
'S_error','S_error',
"Power","Power"),
xintercept=rep(c(.5,.75),3))
bind_rows(mutate(ord_samp, type="Ordinal"),
mutate(bin_samp, type="Binary")) %>%
mutate(Power=p_val<0.05,
M_error=abs(treat_est)/abs(real_treat),
S_error=as.numeric(sign(real_treat)!=sign(treat_est))) %>%
gather(key="Diagnostic",value="estimate",-real_treat,
-treat_est,-p_val,-type) %>%
group_by(type,Diagnostic,real_treat) %>%
summarize(med_est=mean(estimate)) %>%
ggplot(aes(y=med_est,x=real_treat)) +
geom_line(aes(colour=type)) +
geom_hline(data=horiz_lines, aes(yintercept=yintercept),
linetype=2) +
geom_vline(data=vert_lines, aes(xintercept=xintercept),
linetype=3) +
theme(panel.background = element_blank(),
plot.caption = element_text(size=7)) +
facet_wrap(~Diagnostic, scales="free_y",nrow=3) +
labs(x="True Effect Size",y="",
caption=stringr::str_wrap("M-errors are the ratio of estimated to true effect sizes and S-errors are the probability of obtaining an effect of the wrong sign from the true effect. Power is the probability of detecting a significant effect. Statistics calculated by Monte Carlo simulation.",width = 65)) +
ggtitle("Power Calculation for Varying Effect Sizes") +
scale_color_viridis_d()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment