Created
January 31, 2022 07:25
-
-
Save saudiwin/255ba3f1a297616968797cfd9dc533c6 to your computer and use it in GitHub Desktop.
Power calculation showing why ordinal outcomes are more efficient than binary (dichotomized) outcomes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ordinal versus binary outcome power calculation | |
# Robert Kubinec | |
# New York University Abu Dhabi | |
# January 31, 2022 | |
library(dplyr) | |
library(ordinal) | |
library(ggplot2) | |
N <- 500 | |
exist_cut <- c(-2,2) | |
effects <- seq(0.25, 1.25, by=0.01) | |
iter <- 1000 | |
ord_samp <- parallel::mclapply(effects, function(e) { | |
treat_vec <- c(rep(e, 30), rep(0, 470)) | |
over_iter <- lapply(1:iter, function(i) { | |
pr_mid1 <- plogis(treat_vec - exist_cut[1]) - plogis(treat_vec - exist_cut[2]) | |
pr_mid2 <- plogis(treat_vec - exist_cut[2]) - plogis(treat_vec - exist_cut[3]) | |
pr_end <- plogis(treat_vec - exist_cut[3]) | |
pr_beg <- 1 - plogis(treat_vec - exist_cut[1]) | |
pr_mat <- cbind(pr_beg, pr_mid1, pr_mid2, pr_end) | |
out_samp <- apply(pr_mat, 1, function(c) { | |
sample(1:4, size=1, prob=c) | |
}) | |
this_data <- tibble(outcome=ordered(out_samp), | |
treat_vec=as.numeric(treat_vec>0)) | |
est_mod <- summary(ordinal::clm(outcome ~ treat_vec,data=this_data)) | |
tibble(treat_est=est_mod$coefficients[4,1], | |
p_val=est_mod$coefficients[4,4], | |
real_treat=e) | |
}) | |
},mc.cores=parallel::detectCores()) %>% | |
bind_rows | |
bin_samp <- parallel::mclapply(effects, function(e) { | |
treat_vec <- c(rep(e, 30), rep(0, 470)) | |
over_iter <- lapply(1:iter, function(i) { | |
this_data <- tibble(outcome=rbinom(n=N,size=1, | |
prob=plogis(treat_vec)), | |
treat_vec=as.numeric(treat_vec>0)) | |
est_mod <- summary(glm(outcome ~ treat_vec,data=this_data, | |
family="binomial")) | |
tibble(treat_est=est_mod$coefficients[2,1], | |
p_val=est_mod$coefficients[2,4], | |
real_treat=e) | |
}) | |
},mc.cores=parallel::detectCores()) %>% | |
bind_rows | |
# annotations, etc | |
horiz_lines <- tibble(Diagnostic=c('M_error','S_error',"Power"), | |
yintercept=c(1,NA,.8)) | |
vert_lines <- tibble(Diagnostic=c('M_error','M_error', | |
'S_error','S_error', | |
"Power","Power"), | |
xintercept=rep(c(.5,.75),3)) | |
bind_rows(mutate(ord_samp, type="Ordinal"), | |
mutate(bin_samp, type="Binary")) %>% | |
mutate(Power=p_val<0.05, | |
M_error=abs(treat_est)/abs(real_treat), | |
S_error=as.numeric(sign(real_treat)!=sign(treat_est))) %>% | |
gather(key="Diagnostic",value="estimate",-real_treat, | |
-treat_est,-p_val,-type) %>% | |
group_by(type,Diagnostic,real_treat) %>% | |
summarize(med_est=mean(estimate)) %>% | |
ggplot(aes(y=med_est,x=real_treat)) + | |
geom_line(aes(colour=type)) + | |
geom_hline(data=horiz_lines, aes(yintercept=yintercept), | |
linetype=2) + | |
geom_vline(data=vert_lines, aes(xintercept=xintercept), | |
linetype=3) + | |
theme(panel.background = element_blank(), | |
plot.caption = element_text(size=7)) + | |
facet_wrap(~Diagnostic, scales="free_y",nrow=3) + | |
labs(x="True Effect Size",y="", | |
caption=stringr::str_wrap("M-errors are the ratio of estimated to true effect sizes and S-errors are the probability of obtaining an effect of the wrong sign from the true effect. Power is the probability of detecting a significant effect. Statistics calculated by Monte Carlo simulation.",width = 65)) + | |
ggtitle("Power Calculation for Varying Effect Sizes") + | |
scale_color_viridis_d() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment