Last active
April 26, 2024 09:50
-
-
Save steveharoz/452929e367fc8717e9e742d81ab5e195 to your computer and use it in GitHub Desktop.
Uncanny Mountain simulations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "How likely is the uncanny mountain?" | |
author: "by Steve Haroz" | |
output: | |
github_document: | |
params: | |
use_cache: TRUE | |
--- | |
```{r setup, warning = FALSE, message = FALSE} | |
library(tidyverse) | |
library(patchwork) | |
library(multidplyr) | |
``` | |
```{r parameters} | |
# significant level | |
P_HI = 0.05 | |
P_LO = 0.01 | |
``` | |
```{r initialize} | |
simulations = expand.grid( | |
effect_size = seq(0, 1, 0.05), | |
N = c(seq(20, 100, 5), seq(120, 200, 10), 250, 500, 1000, 5000, 10000), # actually N/2 | |
rep = 1:10000 | |
) | |
``` | |
```{r parallel} | |
core_count = parallel::detectCores() - 1 | |
cluster = new_cluster(core_count) | |
``` | |
```{r simulate, cache=TRUE} | |
start = Sys.time() | |
simulations = simulations %>% | |
mutate(row = 1:n()) %>% # rowwise workaround | |
partition(cluster) %>% | |
group_by(row) %>% # rowwise workaround | |
mutate(p = t.test(rnorm(N), rnorm(N, effect_size))$p.value) %>% | |
ungroup() %>% | |
collect() | |
Sys.time() - start | |
``` | |
```{r summarize} | |
simulations = simulations %>% | |
mutate(significant = p < P_HI) %>% | |
mutate(uncanny = (p > P_LO) & (p < P_HI)) | |
simulations_summary = simulations %>% | |
summarise( | |
.by = c(effect_size, N), | |
uncanny = mean(uncanny), | |
significant = mean(significant), | |
p_lo = mean(p < P_LO) | |
) | |
``` | |
```{r line_graphs} | |
ggplot(simulations_summary) + | |
aes(x = N*2, y = uncanny, color = factor(effect_size)) + | |
geom_line(linewidth = 1) + | |
theme_minimal(12) + | |
labs(title = "", color = "Cohen's D") | |
ggplot(simulations_summary) + | |
aes(x = effect_size, y = uncanny, color = factor(N*2)) + | |
geom_line(linewidth = 1) + | |
theme_minimal(12) + | |
labs(title = "", x = "Cohen's D", color = "N") | |
``` | |
```{r heatmap} | |
pretty_percent = function(x) { | |
ifelse (x < 0.005, "<1%", scales::label_percent(1)(x)) | |
} | |
# labels | |
Ns = simulations$N %>% unique() %>% sort() | |
Ns = Ns * 2 | |
Ns_labels = as.character(Ns) | |
Ns_labels[seq(2, length(Ns_labels)-3, 2)] = "" | |
Ns_labels[Ns >= 1000] = as.character(paste0(Ns[Ns >= 1000]/1000, "k")) | |
ESs = simulations$effect_size %>% unique() %>% sort() | |
ESs_labels = as.character(ESs) | |
ESs_labels[seq(2, length(ESs)-1, 2)] = "" | |
# graph | |
plot_simulation = ggplot(simulations_summary) + | |
aes(x = factor(N*2), y = factor(effect_size), fill = uncanny) + | |
geom_raster() + | |
geom_text(aes(label = pretty_percent(uncanny))) + | |
scale_fill_gradientn( | |
limits = c(0,.25), | |
na.value = "#9E0142", | |
labels = scales::label_percent(), | |
guide = "none", | |
colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) + | |
scale_x_discrete(labels = Ns_labels) + | |
scale_y_discrete(labels = ESs_labels) + | |
theme_minimal(12) + | |
labs(title = 'Probability of getting a p-value in the "Uncanny Mountain"', | |
subtitle = "What proportion of 10k between-subject simulations have 0.01 < p < 0.05?", | |
x = "N", y = "Effect size (Cohen's D)") | |
plot_simulation | |
``` | |
---------- | |
```{r multiple} | |
multiple = function(total_ps, uncanny_ps, probability_of_uncanny) { | |
choose(total_ps, uncanny_ps) * | |
probability_of_uncanny^uncanny_ps * | |
(1-probability_of_uncanny)^(total_ps-uncanny_ps) | |
} | |
simulations_summary = simulations_summary %>% | |
rowwise() %>% | |
mutate(uncanny_multiple = sapply(3:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>% | |
ungroup() | |
plot_simulation3_5 = ggplot(simulations_summary) + | |
aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) + | |
geom_raster() + | |
geom_text(aes(label = pretty_percent(uncanny_multiple))) + | |
scale_fill_gradientn( | |
limits = c(0,.25), | |
na.value = "#9E0142", | |
labels = scales::label_percent(), | |
guide = "none", | |
colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) + | |
scale_x_discrete(labels = Ns_labels) + | |
scale_y_discrete(labels = ESs_labels) + | |
theme_minimal(12) + | |
labs(title = 'Probability of getting at least 3 "Uncanny" p-values from 5 results', | |
subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 3 p-values in (0.01 < p < 0.05)?", | |
x = "N", y = "Effect size (Cohen's D)") | |
plot_simulation3_5 | |
``` | |
```{r} | |
simulations_summary = simulations_summary %>% | |
rowwise() %>% | |
mutate(uncanny_multiple = sapply(4:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>% | |
ungroup() | |
plot_simulation4_5 = ggplot(simulations_summary) + | |
aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) + | |
geom_raster() + | |
geom_text(aes(label = pretty_percent(uncanny_multiple))) + | |
scale_fill_gradientn( | |
limits = c(0,.25), | |
na.value = "#9E0142", | |
labels = scales::label_percent(), | |
guide = "none", | |
colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) + | |
scale_x_discrete(labels = Ns_labels) + | |
scale_y_discrete(labels = ESs_labels) + | |
theme_minimal(12) + | |
labs(title = 'Probability of getting at least 4 "Uncanny" p-values from 5 results', | |
subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 4 p-values in (0.01 < p < 0.05)?", | |
x = "N", y = "Effect size (Cohen's D)") | |
plot_simulation4_5 | |
``` | |
```{r} | |
plot_simulation / plot_simulation3_5 / plot_simulation4_5 | |
ggsave("~/statistics/multiple uncanny.png", width = 1000, height = 2000, units = "px", scale = 3) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment