ajstewartlang/gist:b272364c0d5bfea5beb28ab3c0a66e15

## gistfile1.txt
library(tidyverse)
library(broom)
library(Hmisc)

total_samples <- 10000
sample_size <- 128
participant <- rep(1:sample_size)
condition <- c(rep("fast", times = sample_size/2), rep("slow", times = sample_size/2))
all_data <- NULL

for  (i in 1:total_samples) {
  sample <- i
  set.seed(1233 + i)
  dv <- c(rnorm(sample_size/2, 1000, 50), rnorm(sample_size/2, 1025, 50))
  data <- as.tibble(cbind(participant, condition, dv, sample))
  all_data <- rbind(data, all_data)
}

all_data$condition <- as.factor(all_data$condition)

all_data$dv <- as.integer(all_data$dv)

ggplot(all_data, aes(x = condition, y = dv, fill = condition)) +
  geom_violin() +
  geom_jitter(alpha = .3, width = .05) +
  guides(fill = FALSE) +
  facet_wrap(~ sample, ncol = 5, nrow = 2)

str(all_data)

all_data %>% group_by(condition, sample) %>%
  summarise(average = mean(dv), sd(dv)) %>%
  ggplot(aes(x = condition, y = average, group = condition, label = sample)) +
  geom_jitter(width = .1, alpha = .5) +
  stat_summary(fun.data = "mean_cl_boot", colour = "blue") +
  geom_text(check_overlap = TRUE, nudge_x = .2, nudge_y = 0, colour = "black") +
  ylab("Reaction Time (ms.)")

all_data %>% group_by(condition, sample) %>% summarise(mean(dv), sd(dv))

# Saving the p-values for t-tests for each of the samples in one new tibble called "result".

result <- NULL
for  (i in 1:total_samples) {
  result <- rbind(tidy(t.test(filter(all_data, condition == "fast" & sample == i)$dv,
                              filter(all_data, condition == "slow" & sample == i)$dv, paired = FALSE)), result)
}

# All the p-values for each of the tests are stored in the column labelled "p.value" in the
# tibble "result".

# We can plot a histogram of these p-values.

ggplot(result, aes(x = p.value)) + geom_histogram(bins = 50)

# and also work out how many are < .05

count(filter(result, p.value <= .05))

ggplot(filter(result, p.value < .05), aes(x = p.value)) + geom_histogram(bins = 50)

# Relationship between p-values and Cohen's d (tl;dr there isn't one) ####
# The following very messy code works out Cohen's d for each of the sample where
# the t-test is significant.

result_data <- as.tibble(filter(cbind(seq(1:10000), result), result$p.value < .05))

colnames(result_data)[1] <- "sample"

temp <- all_data %>%
  group_by(sample, condition) %>%
  summarise(mean = mean(dv))

temp <- spread(temp, "condition", "mean", c("fast", "slow"))

temp1 <- all_data %>%
  group_by(sample, condition) %>%
  summarise(sd = sd(dv))

temp1 <- spread(temp1, "condition", "sd", c("fast", "slow"))

temp2 <- inner_join(temp, temp1, by = "sample")
colnames(temp2) <- c("sample", "fast_mean", "slow_mean", "fast_sd", "slow_sd")

temp2$sample <- as.integer(temp2$sample)
temp2$fast_mean <- as.integer(temp2$fast_mean)
temp2$slow_mean <- as.integer(temp2$slow_mean)
temp2$fast_sd <- as.integer(temp2$fast_sd)
temp2$slow_sd <- as.integer(temp2$slow_sd)

temp3 <- left_join(result_data, temp2, by = "sample")

temp4 <- temp3 %>%
  mutate(d = (slow_mean-fast_mean)/sqrt(mean(c((slow_sd*slow_sd), (fast_sd*fast_sd)))))

ggplot(temp4, aes (x = d)) +
  geom_histogram(bins = 30) +
  xlab("Cohen's d estimate") +
  geom_vline(xintercept = .5, colour = "red") +
  xlim(-.2, 1.1)

ggplot(temp4, aes (x = p.value, y = d)) +
  geom_point(alpha = .2) +
  geom_smooth(method = "lm") +
  labs(y = "Cohen's d")

rcorr(temp4$p.value, temp4$d)
	library(tidyverse)
	library(broom)
	library(Hmisc)

	total_samples <- 10000
	sample_size <- 128
	participant <- rep(1:sample_size)
	condition <- c(rep("fast", times = sample_size/2), rep("slow", times = sample_size/2))
	all_data <- NULL

	for (i in 1:total_samples) {
	sample <- i
	set.seed(1233 + i)
	dv <- c(rnorm(sample_size/2, 1000, 50), rnorm(sample_size/2, 1025, 50))
	data <- as.tibble(cbind(participant, condition, dv, sample))
	all_data <- rbind(data, all_data)
	}

	all_data$condition <- as.factor(all_data$condition)

	all_data$dv <- as.integer(all_data$dv)

	ggplot(all_data, aes(x = condition, y = dv, fill = condition)) +
	geom_violin() +
	geom_jitter(alpha = .3, width = .05) +
	guides(fill = FALSE) +
	facet_wrap(~ sample, ncol = 5, nrow = 2)

	str(all_data)

	all_data %>% group_by(condition, sample) %>%
	summarise(average = mean(dv), sd(dv)) %>%
	ggplot(aes(x = condition, y = average, group = condition, label = sample)) +
	geom_jitter(width = .1, alpha = .5) +
	stat_summary(fun.data = "mean_cl_boot", colour = "blue") +
	geom_text(check_overlap = TRUE, nudge_x = .2, nudge_y = 0, colour = "black") +
	ylab("Reaction Time (ms.)")

	all_data %>% group_by(condition, sample) %>% summarise(mean(dv), sd(dv))

	# Saving the p-values for t-tests for each of the samples in one new tibble called "result".

	result <- NULL
	for (i in 1:total_samples) {
	result <- rbind(tidy(t.test(filter(all_data, condition == "fast" & sample == i)$dv,
	filter(all_data, condition == "slow" & sample == i)$dv, paired = FALSE)), result)
	}

	# All the p-values for each of the tests are stored in the column labelled "p.value" in the
	# tibble "result".

	# We can plot a histogram of these p-values.

	ggplot(result, aes(x = p.value)) + geom_histogram(bins = 50)

	# and also work out how many are < .05

	count(filter(result, p.value <= .05))

	ggplot(filter(result, p.value < .05), aes(x = p.value)) + geom_histogram(bins = 50)

	# Relationship between p-values and Cohen's d (tl;dr there isn't one) ####
	# The following very messy code works out Cohen's d for each of the sample where
	# the t-test is significant.

	result_data <- as.tibble(filter(cbind(seq(1:10000), result), result$p.value < .05))

	colnames(result_data)[1] <- "sample"

	temp <- all_data %>%
	group_by(sample, condition) %>%
	summarise(mean = mean(dv))

	temp <- spread(temp, "condition", "mean", c("fast", "slow"))

	temp1 <- all_data %>%
	group_by(sample, condition) %>%
	summarise(sd = sd(dv))

	temp1 <- spread(temp1, "condition", "sd", c("fast", "slow"))

	temp2 <- inner_join(temp, temp1, by = "sample")
	colnames(temp2) <- c("sample", "fast_mean", "slow_mean", "fast_sd", "slow_sd")

	temp2$sample <- as.integer(temp2$sample)
	temp2$fast_mean <- as.integer(temp2$fast_mean)
	temp2$slow_mean <- as.integer(temp2$slow_mean)
	temp2$fast_sd <- as.integer(temp2$fast_sd)
	temp2$slow_sd <- as.integer(temp2$slow_sd)

	temp3 <- left_join(result_data, temp2, by = "sample")

	temp4 <- temp3 %>%
	mutate(d = (slow_mean-fast_mean)/sqrt(mean(c((slow_sdslow_sd), (fast_sdfast_sd)))))

	ggplot(temp4, aes (x = d)) +
	geom_histogram(bins = 30) +
	xlab("Cohen's d estimate") +
	geom_vline(xintercept = .5, colour = "red") +
	xlim(-.2, 1.1)

	ggplot(temp4, aes (x = p.value, y = d)) +
	geom_point(alpha = .2) +
	geom_smooth(method = "lm") +
	labs(y = "Cohen's d")

	rcorr(temp4$p.value, temp4$d)