Skip to content

Instantly share code, notes, and snippets.

@ajstewartlang
Last active February 8, 2019 11:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajstewartlang/b272364c0d5bfea5beb28ab3c0a66e15 to your computer and use it in GitHub Desktop.
Save ajstewartlang/b272364c0d5bfea5beb28ab3c0a66e15 to your computer and use it in GitHub Desktop.
Simulation_script_10000_expts_cohens_d_.5
library(tidyverse)
library(broom)
library(Hmisc)
total_samples <- 10000
sample_size <- 128
participant <- rep(1:sample_size)
condition <- c(rep("fast", times = sample_size/2), rep("slow", times = sample_size/2))
all_data <- NULL
for (i in 1:total_samples) {
sample <- i
set.seed(1233 + i)
dv <- c(rnorm(sample_size/2, 1000, 50), rnorm(sample_size/2, 1025, 50))
data <- as.tibble(cbind(participant, condition, dv, sample))
all_data <- rbind(data, all_data)
}
all_data$condition <- as.factor(all_data$condition)
all_data$dv <- as.integer(all_data$dv)
ggplot(all_data, aes(x = condition, y = dv, fill = condition)) +
geom_violin() +
geom_jitter(alpha = .3, width = .05) +
guides(fill = FALSE) +
facet_wrap(~ sample, ncol = 5, nrow = 2)
str(all_data)
all_data %>% group_by(condition, sample) %>%
summarise(average = mean(dv), sd(dv)) %>%
ggplot(aes(x = condition, y = average, group = condition, label = sample)) +
geom_jitter(width = .1, alpha = .5) +
stat_summary(fun.data = "mean_cl_boot", colour = "blue") +
geom_text(check_overlap = TRUE, nudge_x = .2, nudge_y = 0, colour = "black") +
ylab("Reaction Time (ms.)")
all_data %>% group_by(condition, sample) %>% summarise(mean(dv), sd(dv))
# Saving the p-values for t-tests for each of the samples in one new tibble called "result".
result <- NULL
for (i in 1:total_samples) {
result <- rbind(tidy(t.test(filter(all_data, condition == "fast" & sample == i)$dv,
filter(all_data, condition == "slow" & sample == i)$dv, paired = FALSE)), result)
}
# All the p-values for each of the tests are stored in the column labelled "p.value" in the
# tibble "result".
# We can plot a histogram of these p-values.
ggplot(result, aes(x = p.value)) + geom_histogram(bins = 50)
# and also work out how many are < .05
count(filter(result, p.value <= .05))
ggplot(filter(result, p.value < .05), aes(x = p.value)) + geom_histogram(bins = 50)
# Relationship between p-values and Cohen's d (tl;dr there isn't one) ####
# The following very messy code works out Cohen's d for each of the sample where
# the t-test is significant.
result_data <- as.tibble(filter(cbind(seq(1:10000), result), result$p.value < .05))
colnames(result_data)[1] <- "sample"
temp <- all_data %>%
group_by(sample, condition) %>%
summarise(mean = mean(dv))
temp <- spread(temp, "condition", "mean", c("fast", "slow"))
temp1 <- all_data %>%
group_by(sample, condition) %>%
summarise(sd = sd(dv))
temp1 <- spread(temp1, "condition", "sd", c("fast", "slow"))
temp2 <- inner_join(temp, temp1, by = "sample")
colnames(temp2) <- c("sample", "fast_mean", "slow_mean", "fast_sd", "slow_sd")
temp2$sample <- as.integer(temp2$sample)
temp2$fast_mean <- as.integer(temp2$fast_mean)
temp2$slow_mean <- as.integer(temp2$slow_mean)
temp2$fast_sd <- as.integer(temp2$fast_sd)
temp2$slow_sd <- as.integer(temp2$slow_sd)
temp3 <- left_join(result_data, temp2, by = "sample")
temp4 <- temp3 %>%
mutate(d = (slow_mean-fast_mean)/sqrt(mean(c((slow_sd*slow_sd), (fast_sd*fast_sd)))))
ggplot(temp4, aes (x = d)) +
geom_histogram(bins = 30) +
xlab("Cohen's d estimate") +
geom_vline(xintercept = .5, colour = "red") +
xlim(-.2, 1.1)
ggplot(temp4, aes (x = p.value, y = d)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm") +
labs(y = "Cohen's d")
rcorr(temp4$p.value, temp4$d)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment