Skip to content

Instantly share code, notes, and snippets.

@MichelNivard
Created July 3, 2024 22:50
Show Gist options
  • Save MichelNivard/92ab612c4ae0d14ab167d99622cdd146 to your computer and use it in GitHub Desktop.
Save MichelNivard/92ab612c4ae0d14ab167d99622cdd146 to your computer and use it in GitHub Desktop.
# t-test vs wilcox vs ordinal
library(tidyverse)
library(multidplyr) # parallelize
library(rms) # ordinal regression
sample_size = 500 # N
# genarate paired sets and calculate p-values with different techniques
# reduce the iterations number to reduce time
simulate = function(iterations, seed = NULL) {
set.seed(seed)
replicate(iterations, (function() {
#### data with skewed distribution that changes after treatment
data = tibble(
subject = paste0("S", 1:sample_size),
# sample ordinal values with a made-up distribution
before = rnorm(sample_size),
#
after = sqrt(0.7)*before + sqrt(.1)*scale(before^2,center = T) + sqrt(0.2)*rnorm(sample_size)
) %>%
# cut into ordinal:
mutate(after = as.numeric(cut(after,breaks=c(-5,-.5,0.25,0.75,1.5,2,2.5,5))),
before = as.numeric(cut(before,breaks=c(-5,-.5,0.25,0.75,1.5,2,2.5,5))))
#### NULL effect
data_null = tibble(
subject = paste0("S", 1:sample_size),
# sample ordinal values with a uniform distribution
before = rnorm(sample_size),
# change after treatment is noisy
after = sqrt(0.7)*before + sqrt(.1)*scale(before^2,center = T) + sqrt(0.2)*rnorm(sample_size)
) %>%
# cunt into semetric ordinal
mutate(after = as.numeric(cut(after,breaks=c(-5,-2.25,-1.5,-.5,.5,1.5,2.25,5))),
before = as.numeric(cut(before,breaks=c(-5,-2.25,-1.5,-.5,.5,1.5,2.25,5))))
data_long = pivot_longer(data, before:after, names_to = "time", values_to = "measurement")
data_long_null = pivot_longer(data_null, before:after, names_to = "time", values_to = "measurement")
tibble(
p_t = t.test(data$before, data$after, paired = TRUE)$p.value,
p_w = wilcox.test(data$before, data$after, paired = TRUE)$p.value,
p_o = rms::orm(measurement ~ time, data_long, x=TRUE, y=TRUE) %>%
rms::robcov(cluster = data_long$subject) %>%
anova() %>%
as_tibble() %>% mutate(p = as.numeric(P)) %>% pull(p) %>% pluck(1),
p_t_null = t.test(data_null$before, data_null$after, paired = TRUE)$p.value,
p_w_null = wilcox.test(data_null$before, data_null$after, paired = TRUE)$p.value,
p_o_null = rms::orm(measurement ~ time, data_long_null, x=TRUE, y=TRUE) %>%
rms::robcov(cluster = data_long_null$subject) %>%
anova() %>%
as_tibble() %>% mutate(p = as.numeric(P)) %>% pull(p) %>% pluck(1)
)
})(), simplify = FALSE) %>% bind_rows()
}
# setup for parallel run
my_cluster = multidplyr::new_cluster(parallel::detectCores() - 1)
cluster_library(my_cluster, "rms")
cluster_library(my_cluster, "magrittr")
cluster_library(my_cluster, "dplyr")
cluster_library(my_cluster, "tidyr")
cluster_library(my_cluster, "purrr")
# copy variables to every cluster and run in parallel
start_time = Sys.time()
cluster_copy(my_cluster, "sample_size")
cluster_copy(my_cluster, "simulate")
cluster_assign(my_cluster, iterations = round(10000 / length(my_cluster)))
cluster_assign_each(my_cluster, seed = 8675309 + 1:length(my_cluster))
ps = cluster_call(my_cluster, simulate(iterations, seed))
ps = ps %>% bind_rows()
Sys.time() - start_time
##### results #####
# what percent of p-values are below 0.05
ps %>%
summarise(
t_test = mean(p_t<0.05) %>% scales::label_percent(.1)(),
wilcox_test = mean(p_w<0.05) %>% scales::label_percent(.1)(),
ordinal_regression = mean(p_o<0.05) %>% scales::label_percent(.1)(),
)
# t_test wilcox_test ordinal_regression
# t-test vs ordinal
ps %>%
mutate(t_yes_o_no = (p_t<0.05) & (p_o>0.05)) %>%
mutate(t_no_o_yes = (p_t>0.05) & (p_o<0.05)) %>%
summarise(
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(),
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)()
)
# t_yes_o_no t_no_o_yes
# t-test vs wilcox
ps %>%
mutate(t_yes_w_no = (p_t<0.05) & (p_w>0.05)) %>%
mutate(t_no_w_yes = (p_t>0.05) & (p_w<0.05)) %>%
summarise(
t_yes_w_no = mean(t_yes_w_no) %>% scales::label_percent(.1)(),
t_no_w_yes = mean(t_no_w_yes) %>% scales::label_percent(.1)()
)
# t_yes_w_no t_no_w_yes
# wilcox vs ordinal
ps %>%
mutate(w_yes_o_no = (p_w<0.05) & (p_o>0.05)) %>%
mutate(w_no_o_yes = (p_w>0.05) & (p_o<0.05)) %>%
summarise(
w_yes_o_no = mean(w_yes_o_no) %>% scales::label_percent(.1)(),
w_no_o_yes = mean(w_no_o_yes) %>% scales::label_percent(.1)()
)
# w_yes_o_no w_no_o_yes
# t-test vs ordinal EXTREME
ps %>%
mutate(t_yes_o_no = (p_t<0.01) & (p_o>0.05)) %>%
mutate(t_no_o_yes = (p_t>0.05) & (p_o<0.01)) %>%
summarise(
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(),
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)()
)
# t_yes_o_no t_no_o_yes
##### results for uniform distribution with null effect
# NULL: what percent of p-values are below 0.05
ps %>%
summarise(
t_test = mean(p_t_null<0.05) %>% scales::label_percent(.1)(),
wilcox_test = mean(p_w_null<0.05) %>% scales::label_percent(.1)(),
ordinal_regression = mean(p_o_null<0.05) %>% scales::label_percent(.1)(),
)
# t_test wilcox_test ordinal_regression
# NULL: t-test vs ordinal
ps %>%
mutate(t_yes_o_no = (p_t_null<0.05) & (p_o_null>0.05)) %>%
mutate(t_no_o_yes = (p_t_null>0.05) & (p_o_null<0.05)) %>%
summarise(
t_yes_o_no = mean(t_yes_o_no) %>% scales::label_percent(.1)(),
t_no_o_yes = mean(t_no_o_yes) %>% scales::label_percent(.1)()
)
# t_yes_o_no t_no_o_yes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment