Last active
July 2, 2024 19:58
-
-
Save steveharoz/4540028b0f82dd60581e04b7725ee982 to your computer and use it in GitHub Desktop.
t-test vs Wilcox test vs ordinal regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# t-test vs wilcox vs ordinal | |
library(tidyverse) | |
library(multidplyr) # parallelize | |
library(rms) # ordinal regression | |
sample_size = 100 # N | |
# genarate paired sets and calculate p-values with different techniques | |
# reduce the iterations number to reduce time | |
simulate = function(iterations, seed = NULL) { | |
set.seed(seed) | |
replicate(iterations, (function() { | |
#### data with skewed distribution that changes after treatment | |
data = tibble( | |
subject = paste0("S", 1:sample_size), | |
# sample ordinal values with a made-up distribution | |
before = sample(1:7, sample_size, replace = TRUE, prob = c(0.2, 0.25, 0.18, 0.12, 0.1, 0.08, 0.07)), | |
# change after treatment is noisy and not consistent across range | |
after = before + ifelse(before>=5, 0, sample(c(-1, 0, 1), sample_size, replace = TRUE, prob = c(0.3, 0.3, 0.4))), | |
) %>% | |
# clamp `after` within bounds | |
mutate(after = pmin(pmax(1, after), 7)) | |
#### NULL effect | |
data_null = tibble( | |
subject = paste0("S", 1:sample_size), | |
# sample ordinal values with a uniform distribution | |
before = sample(1:7, sample_size, replace = TRUE), | |
# change after treatment is noisy | |
after = before + sample(c(-1, 0, 1), sample_size, replace = TRUE) | |
) %>% | |
# clamp `after` within bounds | |
mutate(after = pmin(pmax(1, after), 7)) | |
data_long = pivot_longer(data, before:after, names_to = "time", values_to = "measurement") | |
data_long_null = pivot_longer(data_null, before:after, names_to = "time", values_to = "measurement") | |
tibble( | |
p_t = t.test(data$before, data$after, paired = TRUE)$p.value, | |
p_w = wilcox.test(data$before, data$after, paired = TRUE)$p.value, | |
p_o = rms::orm(measurement ~ time, data_long, x=TRUE, y=TRUE) %>% | |
rms::robcov(cluster = data_long$subject) %>% | |
anova() %>% | |
as_tibble() %>% mutate(p = as.numeric(P)) %>% pull(p) %>% pluck(1), | |
p_t_null = t.test(data_null$before, data_null$after, paired = TRUE)$p.value, | |
p_w_null = wilcox.test(data_null$before, data_null$after, paired = TRUE)$p.value, | |
p_o_null = rms::orm(measurement ~ time, data_long_null, x=TRUE, y=TRUE) %>% | |
rms::robcov(cluster = data_long_null$subject) %>% | |
anova() %>% | |
as_tibble() %>% mutate(p = as.numeric(P)) %>% pull(p) %>% pluck(1) | |
) | |
})(), simplify = FALSE) %>% bind_rows() | |
} | |
# setup for parallel run | |
my_cluster = multidplyr::new_cluster(parallel::detectCores() - 1) | |
cluster_library(my_cluster, "rms") | |
cluster_library(my_cluster, "magrittr") | |
cluster_library(my_cluster, "dplyr") | |
cluster_library(my_cluster, "tidyr") | |
cluster_library(my_cluster, "purrr") | |
# copy variables to every cluster and run in parallel | |
start_time = Sys.time() | |
cluster_copy(my_cluster, "sample_size") | |
cluster_copy(my_cluster, "simulate") | |
cluster_assign(my_cluster, iterations = round(10000 / length(my_cluster))) | |
cluster_assign_each(my_cluster, seed = 8675309 + 1:length(my_cluster)) | |
ps = cluster_call(my_cluster, simulate(iterations, seed)) | |
ps = ps %>% bind_rows() | |
Sys.time() - start_time | |
beepr::beep(2) | |
##### results ##### | |
# what percent of p-values are below 0.05 | |
ps %>% | |
summarise( | |
t_test = mean(p_t<0.05) %>% scales::label_percent(.1)(), | |
wilcox_test = mean(p_w<0.05) %>% scales::label_percent(.1)(), | |
ordinal_regression = mean(p_o<0.05) %>% scales::label_percent(.1)(), | |
) | |
# t_test wilcox_test ordinal_regression | |
# 50.9% 50.9% 37.6% | |
# t-test vs ordinal | |
ps %>% | |
mutate(t_yes_o_no = (p_t<0.05) & (p_o>0.05)) %>% | |
mutate(t_no_o_yes = (p_t>0.05) & (p_o<0.05)) %>% | |
summarise( | |
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(), | |
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_o_no t_no_o_yes | |
# 13.3% 0.0% | |
# t-test vs wilcox | |
ps %>% | |
mutate(t_yes_w_no = (p_t<0.05) & (p_w>0.05)) %>% | |
mutate(t_no_w_yes = (p_t>0.05) & (p_w<0.05)) %>% | |
summarise( | |
t_yes_w_no = mean(t_yes_w_no) %>% scales::label_percent(.1)(), | |
t_no_w_yes = mean(t_no_w_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_w_no t_no_w_yes | |
# 0.0% 0.0% | |
# wilcox vs ordinal | |
ps %>% | |
mutate(w_yes_o_no = (p_w<0.05) & (p_o>0.05)) %>% | |
mutate(w_no_o_yes = (p_w>0.05) & (p_o<0.05)) %>% | |
summarise( | |
w_yes_o_no = mean(w_yes_o_no) %>% scales::label_percent(.1)(), | |
w_no_o_yes = mean(w_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# w_yes_o_no w_no_o_yes | |
# 13.3% 0.0% | |
# t-test vs ordinal EXTREME | |
ps %>% | |
mutate(t_yes_o_no = (p_t<0.01) & (p_o>0.05)) %>% | |
mutate(t_no_o_yes = (p_t>0.05) & (p_o<0.01)) %>% | |
summarise( | |
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(), | |
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_o_no t_no_o_yes | |
# 0.3% 0.0% | |
##### results for uniform distribution with null effect | |
# NULL: what percent of p-values are below 0.05 | |
ps %>% | |
summarise( | |
t_test = mean(p_t_null<0.05) %>% scales::label_percent(.1)(), | |
wilcox_test = mean(p_w_null<0.05) %>% scales::label_percent(.1)(), | |
ordinal_regression = mean(p_o_null<0.05) %>% scales::label_percent(.1)(), | |
) | |
# t_test wilcox_test ordinal_regression | |
# 4.9% 4.9% 4.6% | |
# NULL: t-test vs ordinal | |
ps %>% | |
mutate(t_yes_o_no = (p_t_null<0.05) & (p_o_null>0.05)) %>% | |
mutate(t_no_o_yes = (p_t_null>0.05) & (p_o_null<0.05)) %>% | |
summarise( | |
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(), | |
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)() | |
) | |
# t_yes_o_no t_no_o_yes | |
# 0.7% 0.4% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment