Last active
April 3, 2024 16:54
-
-
Save steveharoz/a8955fd91a8c9b7822dc3990c03b3283 to your computer and use it in GitHub Desktop.
Replication rate by effect size
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# simulate one experiment | |
simulate = function(subject_count = 20, effect_size = 0) { | |
a = rnorm(subject_count/2) | |
b = rnorm(subject_count/2, effect_size) | |
tibble( | |
p = t.test(a, b, alternative = "less")$p.value, | |
d = (mean(b) - mean(a)) / sd(c(a-mean(a), b-mean(b))) | |
) | |
} | |
# subject count | |
N_original = 20 | |
N_replication = N_original * 2.5 | |
data = expand_grid( | |
effect_size = seq(0, 1, 0.1), | |
index = 1:2000 # simulations per effect size | |
) %>% | |
rowwise() %>% | |
mutate(original = simulate(N_original, effect_size)) %>% | |
mutate(replication = simulate(N_replication, effect_size)) %>% | |
ungroup() %>% | |
# get the p-value and cohens d from original and replication | |
unnest(original, names_sep="_") %>% | |
unnest(replication, names_sep="_") | |
# done! | |
beepr::beep(2) | |
# check if replicate | |
data = data %>% | |
mutate(significant_original = original_p<0.05) %>% | |
mutate(significant_replication = replication_p<0.05) %>% | |
mutate(has_replicated = significant_original == significant_replication) %>% | |
mutate(replicated_and_wrong = has_replicated & (sign(effect_size) != significant_original)) | |
ggplot(data) + | |
aes(x=original_d, y=as.numeric(has_replicated), color=factor(effect_size), fill=factor(effect_size)) + | |
geom_smooth(linewidth=1, fullrange=TRUE, method = glm, method.args= list(family="binomial")) + | |
scale_x_continuous(limits = c(0, 1), expand=c(0,0)) + | |
scale_y_continuous(labels = scales::label_percent()) + | |
theme_classic(12) + | |
labs(x = "Cohen's d measured from original experiment", | |
y = NULL, | |
color = "Actual effect size", fill = "Actual effect size", | |
title = "Replication rate by effect size", | |
subtitle = "Replication: p-values are both <0.05 or both >0.05", | |
caption = "Original N = 20. Replication N = 50.") | |
ggplot(data) + | |
aes(x=original_d, y=as.numeric(replicated_and_wrong), color=factor(effect_size), fill=factor(effect_size)) + | |
geom_smooth(linewidth=1, method = glm, method.args= list(family="binomial")) + | |
scale_x_continuous(limits = c(0, 1), expand=c(0,0)) + | |
scale_y_continuous(labels = scales::label_percent()) + | |
scale_color_hue(aesthetics = c("color", "fill"), breaks = c(seq(.1, 1, .1), 0)) + | |
theme_classic(12) + | |
labs(x = "Cohen's d measured from original experiment", | |
y = NULL, | |
color = "Actual effect size", fill = "Actual effect size", | |
title = "Rate that both original and replication results mislead", | |
subtitle = "\"Mislead\" means p<0.05 for effect size 0,\n and p>0.05 for effect size > 0", | |
caption = "Original N = 20. Replication N = 50.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment