steveharoz/readme.MD

## readme.MD

      
    Raw
  

              readme.MD
            
          
    Original idea
BlueSky thread


## uncanny.rmd
---
title: "How likely is the uncanny mountain?"
author: "by Steve Haroz"
output:
  github_document:
params:
  use_cache: TRUE
---


```{r setup, warning = FALSE, message = FALSE}
library(tidyverse)
library(patchwork)
library(multidplyr)
```

```{r parameters}
# significant level
P_HI = 0.05
P_LO = 0.01
```

```{r initialize}
simulations = expand.grid(
  effect_size = seq(0, 1, 0.05),
  N = c(seq(20, 100, 5), seq(120, 200, 10), 250, 500, 1000, 5000, 10000), # actually N/2
  rep = 1:10000
)
```

```{r parallel}
core_count = parallel::detectCores() - 1
cluster = new_cluster(core_count)
```


```{r simulate, cache=TRUE}
start = Sys.time()
simulations = simulations %>%
  mutate(row = 1:n()) %>%  # rowwise workaround
  partition(cluster) %>%
  group_by(row) %>%  # rowwise workaround
  mutate(p = t.test(rnorm(N), rnorm(N, effect_size))$p.value) %>%
  ungroup() %>%
  collect()
Sys.time() - start
```

```{r summarize}
simulations = simulations %>%
  mutate(significant = p < P_HI) %>%
  mutate(uncanny = (p > P_LO) & (p < P_HI))

simulations_summary = simulations %>%
  summarise(
    .by = c(effect_size, N),
    uncanny = mean(uncanny),
    significant = mean(significant),
    p_lo = mean(p < P_LO)
  )
```


```{r line_graphs}
ggplot(simulations_summary) +
  aes(x = N*2, y = uncanny, color = factor(effect_size)) +
  geom_line(linewidth = 1) +
  theme_minimal(12) +
  labs(title = "", color = "Cohen's D")

ggplot(simulations_summary) +
  aes(x = effect_size, y = uncanny, color = factor(N*2)) +
  geom_line(linewidth = 1) +
  theme_minimal(12) +
  labs(title = "", x = "Cohen's D", color = "N")
```

```{r heatmap}
pretty_percent = function(x) {
  ifelse (x < 0.005, "<1%", scales::label_percent(1)(x))
}

# labels
Ns = simulations$N %>% unique() %>% sort()
Ns = Ns * 2
Ns_labels = as.character(Ns)
Ns_labels[seq(2, length(Ns_labels)-3, 2)] = ""
Ns_labels[Ns >= 1000] = as.character(paste0(Ns[Ns >= 1000]/1000, "k"))

ESs = simulations$effect_size %>% unique() %>% sort()
ESs_labels = as.character(ESs)
ESs_labels[seq(2, length(ESs)-1, 2)] = ""

# graph
plot_simulation = ggplot(simulations_summary) +
  aes(x = factor(N*2), y = factor(effect_size), fill = uncanny) +
  geom_raster() +
  geom_text(aes(label = pretty_percent(uncanny))) +
  scale_fill_gradientn(
    limits = c(0,.25),
    na.value = "#9E0142",
    labels = scales::label_percent(),
    guide = "none",
    colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
  scale_x_discrete(labels = Ns_labels) +
  scale_y_discrete(labels = ESs_labels) +
  theme_minimal(12) +
  labs(title = 'Probability of getting a p-value in the "Uncanny Mountain"',
       subtitle = "What proportion of 10k between-subject simulations have 0.01 < p < 0.05?",
       x = "N", y = "Effect size (Cohen's D)")
plot_simulation
```


----------


```{r multiple}

multiple = function(total_ps, uncanny_ps, probability_of_uncanny) {
  choose(total_ps, uncanny_ps) *
    probability_of_uncanny^uncanny_ps *
    (1-probability_of_uncanny)^(total_ps-uncanny_ps)
}

simulations_summary = simulations_summary %>%
  rowwise() %>%
  mutate(uncanny_multiple = sapply(3:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
  ungroup()


plot_simulation3_5 = ggplot(simulations_summary) +
  aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
  geom_raster() +
  geom_text(aes(label = pretty_percent(uncanny_multiple))) +
  scale_fill_gradientn(
    limits = c(0,.25),
    na.value = "#9E0142",
    labels = scales::label_percent(),
    guide = "none",
    colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
  scale_x_discrete(labels = Ns_labels) +
  scale_y_discrete(labels = ESs_labels) +
  theme_minimal(12) +
  labs(title = 'Probability of getting at least 3 "Uncanny" p-values from 5 results',
       subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 3 p-values in (0.01 < p < 0.05)?",
       x = "N", y = "Effect size (Cohen's D)")
plot_simulation3_5
```
```{r}

simulations_summary = simulations_summary %>%
  rowwise() %>%
  mutate(uncanny_multiple = sapply(4:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
  ungroup()


plot_simulation4_5 = ggplot(simulations_summary) +
  aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
  geom_raster() +
  geom_text(aes(label = pretty_percent(uncanny_multiple))) +
  scale_fill_gradientn(
    limits = c(0,.25),
    na.value = "#9E0142",
    labels = scales::label_percent(),
    guide = "none",
    colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
  scale_x_discrete(labels = Ns_labels) +
  scale_y_discrete(labels = ESs_labels) +
  theme_minimal(12) +
  labs(title = 'Probability of getting at least 4 "Uncanny" p-values from 5 results',
       subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 4 p-values in (0.01 < p < 0.05)?",
       x = "N", y = "Effect size (Cohen's D)")
plot_simulation4_5
```

```{r}
plot_simulation / plot_simulation3_5 / plot_simulation4_5
ggsave("~/statistics/multiple uncanny.png", width = 1000, height = 2000, units = "px", scale = 3)
```
	---
	title: "How likely is the uncanny mountain?"
	author: "by Steve Haroz"
	output:
	github_document:
	params:
	use_cache: TRUE
	---


	```{r setup, warning = FALSE, message = FALSE}
	library(tidyverse)
	library(patchwork)
	library(multidplyr)
	```

	```{r parameters}
	# significant level
	P_HI = 0.05
	P_LO = 0.01
	```

	```{r initialize}
	simulations = expand.grid(
	effect_size = seq(0, 1, 0.05),
	N = c(seq(20, 100, 5), seq(120, 200, 10), 250, 500, 1000, 5000, 10000), # actually N/2
	rep = 1:10000
	)
	```

	```{r parallel}
	core_count = parallel::detectCores() - 1
	cluster = new_cluster(core_count)
	```


	```{r simulate, cache=TRUE}
	start = Sys.time()
	simulations = simulations %>%
	mutate(row = 1:n()) %>% # rowwise workaround
	partition(cluster) %>%
	group_by(row) %>% # rowwise workaround
	mutate(p = t.test(rnorm(N), rnorm(N, effect_size))$p.value) %>%
	ungroup() %>%
	collect()
	Sys.time() - start
	```

	```{r summarize}
	simulations = simulations %>%
	mutate(significant = p < P_HI) %>%
	mutate(uncanny = (p > P_LO) & (p < P_HI))

	simulations_summary = simulations %>%
	summarise(
	.by = c(effect_size, N),
	uncanny = mean(uncanny),
	significant = mean(significant),
	p_lo = mean(p < P_LO)
	)
	```


	```{r line_graphs}
	ggplot(simulations_summary) +
	aes(x = N*2, y = uncanny, color = factor(effect_size)) +
	geom_line(linewidth = 1) +
	theme_minimal(12) +
	labs(title = "", color = "Cohen's D")

	ggplot(simulations_summary) +
	aes(x = effect_size, y = uncanny, color = factor(N*2)) +
	geom_line(linewidth = 1) +
	theme_minimal(12) +
	labs(title = "", x = "Cohen's D", color = "N")
	```

	```{r heatmap}
	pretty_percent = function(x) {
	ifelse (x < 0.005, "<1%", scales::label_percent(1)(x))
	}

	# labels
	Ns = simulations$N %>% unique() %>% sort()
	Ns = Ns * 2
	Ns_labels = as.character(Ns)
	Ns_labels[seq(2, length(Ns_labels)-3, 2)] = ""
	Ns_labels[Ns >= 1000] = as.character(paste0(Ns[Ns >= 1000]/1000, "k"))

	ESs = simulations$effect_size %>% unique() %>% sort()
	ESs_labels = as.character(ESs)
	ESs_labels[seq(2, length(ESs)-1, 2)] = ""

	# graph
	plot_simulation = ggplot(simulations_summary) +
	aes(x = factor(N*2), y = factor(effect_size), fill = uncanny) +
	geom_raster() +
	geom_text(aes(label = pretty_percent(uncanny))) +
	scale_fill_gradientn(
	limits = c(0,.25),
	na.value = "#9E0142",
	labels = scales::label_percent(),
	guide = "none",
	colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
	scale_x_discrete(labels = Ns_labels) +
	scale_y_discrete(labels = ESs_labels) +
	theme_minimal(12) +
	labs(title = 'Probability of getting a p-value in the "Uncanny Mountain"',
	subtitle = "What proportion of 10k between-subject simulations have 0.01 < p < 0.05?",
	x = "N", y = "Effect size (Cohen's D)")
	plot_simulation
	```


	----------



	```{r multiple}

	multiple = function(total_ps, uncanny_ps, probability_of_uncanny) {
	choose(total_ps, uncanny_ps) *
	probability_of_uncanny^uncanny_ps *
	(1-probability_of_uncanny)^(total_ps-uncanny_ps)
	}

	simulations_summary = simulations_summary %>%
	rowwise() %>%
	mutate(uncanny_multiple = sapply(3:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
	ungroup()


	plot_simulation3_5 = ggplot(simulations_summary) +
	aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
	geom_raster() +
	geom_text(aes(label = pretty_percent(uncanny_multiple))) +
	scale_fill_gradientn(
	limits = c(0,.25),
	na.value = "#9E0142",
	labels = scales::label_percent(),
	guide = "none",
	colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
	scale_x_discrete(labels = Ns_labels) +
	scale_y_discrete(labels = ESs_labels) +
	theme_minimal(12) +
	labs(title = 'Probability of getting at least 3 "Uncanny" p-values from 5 results',
	subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 3 p-values in (0.01 < p < 0.05)?",
	x = "N", y = "Effect size (Cohen's D)")
	plot_simulation3_5
	```
	```{r}

	simulations_summary = simulations_summary %>%
	rowwise() %>%
	mutate(uncanny_multiple = sapply(4:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
	ungroup()


	plot_simulation4_5 = ggplot(simulations_summary) +
	aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
	geom_raster() +
	geom_text(aes(label = pretty_percent(uncanny_multiple))) +
	scale_fill_gradientn(
	limits = c(0,.25),
	na.value = "#9E0142",
	labels = scales::label_percent(),
	guide = "none",
	colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
	scale_x_discrete(labels = Ns_labels) +
	scale_y_discrete(labels = ESs_labels) +
	theme_minimal(12) +
	labs(title = 'Probability of getting at least 4 "Uncanny" p-values from 5 results',
	subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 4 p-values in (0.01 < p < 0.05)?",
	x = "N", y = "Effect size (Cohen's D)")
	plot_simulation4_5
	```

	```{r}
	plot_simulation / plot_simulation3_5 / plot_simulation4_5
	ggsave("~/statistics/multiple uncanny.png", width = 1000, height = 2000, units = "px", scale = 3)
	```