steveharoz/cohens d replicate simulation.R

## readme.md

      
    Raw
  

              readme.md
            
          
## cohens d replicate simulation.R
# Simulate different experiment designs and approaches to calculating Cohen's D

# References
# https://journalofcognition.org/articles/10.5334/joc.10   (h/t Aaron Caldwell @arcstats.bsky.social)
# https://jakewestfall.org/blog/index.php/2016/03/25/five-different-cohens-d-statistics-for-within-subject-designs/


library(tidyverse)
library(lmerTest)
library(multidplyr) # not necessary, but helps performance
library(ggtext) # markdown labels
library(glue)

# reproducible
set.seed(8675309)

BASELINE = 0
BASELINE_SD = 0.2
RESPONSE_SD = .5
DIFFERENCE = .1

translation = tribble(
  ~d_type, ~d_type_label,
  "da_between", "Between-subject Cohen's d<sub>a</sub>",
  "dr_between", "Between-subject Cohen's d<sub>r</sub>",
  "dz", "Within-subject Cohen's d<sub>z</sub>",
  "dr", "Within-subject Cohen's d<sub>r</sub>"
)

#### Simulations ####

# simulate one between-subject experiment
simulate_between = function(count_a = ceiling(10/2), count_b = ceiling(10/2), replicate_count = 1) {

  count_total = count_a + count_b

  # each subject does one condition [replicate_count] times
  data = tibble(
    subjectID = paste0("S", 1:count_total),
    baseline = BASELINE + rnorm(count_total, 0, BASELINE_SD),
    condition = c(rep("A", count_a), rep("B", count_b))
  ) %>%
    expand_grid(rep = 1:replicate_count)

  # simulate responses
  data %>% mutate(response = rnorm(
    n = n(),
    mean = baseline + ifelse(condition=="A", 0, DIFFERENCE),
    sd = RESPONSE_SD)
  )
}

# simulate one within-subject experiment
simulate_within = function(count = 10, replicate_count = 1) {

  # each subject does each condition [replicate_count] times
  data = tibble(
    subjectID = paste0("S", 1:count),
    baseline = BASELINE + rnorm(count, 0, BASELINE_SD)
  ) %>%
    expand_grid(
      condition = c("A", "B"),
      rep = 1:replicate_count
    )

  # simulate responses
  data %>% mutate(response = rnorm(
    n = n(),
    mean = baseline + ifelse(condition=="A", 0, DIFFERENCE),
    sd = RESPONSE_SD)
  )
}


#### Cohen's d calculations ####

# d_a
# aggregate replicates, ignore within
cohens_d_subject_means = function(data) {

  # collapse replicates to get mean per subject*condition
  subject_means = data %>%
    summarise(.by = c(subjectID, condition), response = mean(response))

  numerator = subject_means %>%
    summarise(.by = c(condition), response = mean(response)) %>%
    pull(response)
  numerator = sum(numerator * c(-1, 1))

  denominator = subject_means %>%
    summarise(.by = c(condition), variance = var(response)) %>%
    pull(variance)
  denominator = sqrt(mean(denominator))


  numerator / denominator # mean diff over sd
}

# d_z
# aggregate replicates, use per-subject diffs
cohens_d_subject_diffs = function(data) {

  # collapse replicates to get mean per subject*condition
  subject_means = data %>%
    summarise(.by = c(subjectID, condition), response = mean(response))

  data = subject_means %>%
    pivot_wider(names_from = condition, values_from = response) %>%
    mutate(diff = B - A)

  mean(data$diff) / sd(data$diff)
}

# d_r
# include residual variance in the denominator
cohens_d_residual_variance = function(data) {
  if(data %>% count(subjectID) %>% pull(n) %>% max() < 2)
    return(cohens_d_subject_means_between(data))

  fit = lmer(response ~ condition + (1|subjectID), data) %>% summary()

  # cohen's d from fit
  denominator = fit$varcor %>% as_tibble() %>% pull(vcov) %>% sum() %>% sqrt()
  numerator = data %>% group_by(condition) %>% summarise(response = mean(response)) %>% pull(response)
  numerator = sum(numerator * c(-1, 1))
  numerator / denominator # mean diff over sd
}

# d_a (for between-subject)
# aggregate replicates
cohens_d_subject_means_between = function(data) {

  # collapse replicates to get mean per subject*condition
  subject_means = data %>%
    summarise(.by = c(subjectID, condition), response = mean(response))

  numerator = subject_means %>% summarise(.by = condition, mu=mean(response)) %>% pull(mu)
  denominator = sqrt( subject_means %>% summarise(.by = condition, var=var(response)) %>% pull(var) %>% mean() )
  sum(numerator * c(-1, 1)) / denominator
}


#### plot ###########

plot_aggregated = function(data) {
  # collapse replicates to get mean per subject*condition
  subject_means = data %>%
    summarise(.by = c(subjectID, condition), response = mean(response))

  subject_means = subject_means %>% left_join(
    subject_means %>% summarize(.by=subjectID, baseline = mean(response))
  ) %>%
    mutate(response_relative = response - baseline)

  ggplot(subject_means) +
    aes(x=condition, y=response_relative, group=subjectID) +
    geom_point(size = 4) +
    geom_line()
}

plot_aggregated_between = function(data) {
  # collapse replicates to get mean per subject*condition
  subject_means = data %>%
    summarise(.by = c(subjectID, condition), response = mean(response))

  subject_means = subject_means %>%
    mutate(.by = condition, response_relative = response-mean(response))

  ggplot(subject_means) +
    aes(x=condition, y=response_relative) +
    geom_point(size = 4)
}


#### test ####

(simulate_within(10, 2) %>% plot_aggregated()) +
(simulate_within(10, 20) %>% plot_aggregated()) +
(simulate_within(10, 200) %>% plot_aggregated())


simulate_between() %>% plot_aggregated_between()
simulate_between(10, 10, 1000) %>% cohens_d_subject_means_between()

simulate_within(10, 30) %>% plot_aggregated()
simulate_within(10, 30) %>% cohens_d_residual_variance()
simulate_within(10, 30) %>% cohens_d_subject_means_between()


#### simulate ####

run_simulations = function(subject_count, replicates) {
  within_data = simulate_within(subject_count, replicates)
  between_data = simulate_between(subject_count/2, subject_count/2, replicates)
  list(
    dz = within_data %>% cohens_d_subject_diffs(),
    dr = within_data %>% cohens_d_residual_variance(),
    #da_within = within_data %>% cohens_d_subject_means_between(),
    da_between = between_data %>% cohens_d_subject_means_between(),
    dr_between = between_data %>% cohens_d_residual_variance()
  )
}

cluster = new_cluster(parallel::detectCores() - 1)
cluster_library(cluster, c("dplyr", "tidyr", "magrittr", "lmerTest"))
cluster_copy(cluster, c(
  "BASELINE", "BASELINE_SD", "RESPONSE_SD", "DIFFERENCE",
  "simulate_between", "simulate_within",
  "cohens_d_subject_diffs", "cohens_d_residual_variance", "cohens_d_subject_means_between",
  "run_simulations"))

SIMULATION_COUNT = 5000

data2 = expand_grid(
  index = 1:SIMULATION_COUNT,
  subject_count = c(10, 40),
  replicates = 5^(0:4)
) %>%
  rowwise() %>%
  partition(cluster) %>%  # improves performance, but not necessary
  mutate(ds = list(run_simulations(subject_count, replicates))) %>%
  collect() %>% # improves performance, but not necessary
  ungroup() %>%
  unnest_longer(ds, "d", "d_type")

# save it
#data %>% write_csv("~/statistics/cohens_d_simulations.csv")
beepr::beep(5)


#### plot ####

# confidence intervals
quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) {
  tibble(
    val = quantile(x, probs, na.rm = TRUE),
    quant = probs
  )
}

data %>%
  reframe(.by = c(replicates, d_type, subject_count), quantile_df(d, c(0.025, 0.5, 0.975))) %>%
  pivot_wider(names_from = quant, values_from = val) %>%
  mutate(d_type = factor(d_type, levels=c("dz", "dr", "da_between", "dr_between"), labels=translation$d_type_label)) %>%
ggplot() +
  aes(xmin = `0.025`, x = `0.5`, xmax = `0.975`, color = d_type, linetype = factor(subject_count),
      y = as.numeric(factor(replicates)) - 0.15*scale(as.numeric(subject_count))) +
  geom_vline(xintercept = 0) +
  geom_pointrange(linewidth = 1, size = .5, key_glyph = draw_key_linerange) +
  scale_x_continuous(breaks = -1:12, minor_breaks = FALSE) +
  scale_y_continuous( minor_breaks = FALSE,
    breaks = data$replicates %>% unique() %>% seq_along(),
    labels = data$replicates %>% unique() %>% sort()) +
  scale_color_brewer(palette = "Set1") +
  guides(color = "none") +
  facet_wrap(facets = vars(d_type), ncol = 1) +
  theme_minimal(12) + theme(
    strip.text = ggtext::element_markdown(),
    legend.text = ggtext::element_markdown(),
  ) +
  labs(x = "Cohen's d (95% CI)", y = "Number of replicate trials")


# dot histogram
library(ggdist)

translation %>%
  expand_grid(subject_count = data$subject_count %>% unique()) %>%
  mutate(design = glue("{dtype} (N = {subject_count})"))

p = data %>%
  left_join(
    translation %>%
      expand_grid(subject_count = data$subject_count %>% unique()) %>%
      mutate(design = glue("{d_type_label} (N = {subject_count})")) %>%
      mutate(design = factor(design, levels = design))
  ) %>%
  mutate(d_type = factor(d_type, levels=translation$d_type, labels=translation$d_type_label)) %>%
  #filter(index < 50) %>%
ggplot() +
  aes(x = d, y = factor(replicates), fill = d_type) +
  #geom_vline(xintercept = DIFFERENCE/sqrt(RESPONSE_SD^2+BASELINE_SD^2), color = "grey92") +
  geom_vline(xintercept = 0, color = "grey30") +
  ggdist::geom_dots(color = NA, slab_linewidth = 0) +
  scale_x_continuous(breaks=seq(-12,12,by=2), limits=c(-3, 5), expand=c(0,0), minor_breaks=FALSE) +
  scale_fill_manual(values = c("#267CBF", "#335E7F", "#E41A1C", "#993D3E")) +
  guides(fill = "none") +
  facet_wrap(facets = vars(design), ncol=2, scales = "free_x") +
  theme_minimal(12) + theme(
    axis.title = element_text(hjust = 0, size = rel(0.8), color = "gray30"),
    plot.caption = ggtext::element_markdown(color = "gray50", lineheight = 1.2, margin = margin(20,0,0,0)),
    plot.subtitle = ggtext::element_markdown(size = 10, lineheight = 1.2),
    strip.text = ggtext::element_markdown(hjust = 0, size = rel(1), margin = margin(20,0,0,0)),
    plot.margin = margin(20, 20, 20, 20, "pt"),
    panel.spacing.x = unit(10, "pt")) +
  labs(x = "Cohen's d 🠚", y = "Replicate trial count 🠚",
       title = "Within/between-subject, replicate trial count, and Cohen's d variant impact effect size and consistency",
       subtitle = glue(
       "Each scenario was simulated {SIMULATION_COUNT} times: <br>
       •  2 conditions (A and B) and a continuous response<br>
       •  Individual differences in baseline: 𝑁(0, 0.2) <br>
       •  Response: subject_baseline + 0.1*(condition == B) + 𝑁(0, 0.5)"),
       caption =
       'Cohen\'s d definitions from "Five Different Cohen\'s d Statistics for Within-subject Designs" by Jake Westfall<br>
       Simulation and chart by Steve Haroz')
# •  Subjects: 10 <br>

ragg::agg_png("~/cohens d and replicate counts subj.png", 7200, 6000, scaling = 9)
p
dev.off()
	# Simulate different experiment designs and approaches to calculating Cohen's D

	# References
	# https://journalofcognition.org/articles/10.5334/joc.10 (h/t Aaron Caldwell @arcstats.bsky.social)
	# https://jakewestfall.org/blog/index.php/2016/03/25/five-different-cohens-d-statistics-for-within-subject-designs/


	library(tidyverse)
	library(lmerTest)
	library(multidplyr) # not necessary, but helps performance
	library(ggtext) # markdown labels
	library(glue)

	# reproducible
	set.seed(8675309)

	BASELINE = 0
	BASELINE_SD = 0.2
	RESPONSE_SD = .5
	DIFFERENCE = .1

	translation = tribble(
	~d_type, ~d_type_label,
	"da_between", "Between-subject Cohen's d<sub>a</sub>",
	"dr_between", "Between-subject Cohen's d<sub>r</sub>",
	"dz", "Within-subject Cohen's d<sub>z</sub>",
	"dr", "Within-subject Cohen's d<sub>r</sub>"
	)

	#### Simulations ####

	# simulate one between-subject experiment
	simulate_between = function(count_a = ceiling(10/2), count_b = ceiling(10/2), replicate_count = 1) {

	count_total = count_a + count_b

	# each subject does one condition [replicate_count] times
	data = tibble(
	subjectID = paste0("S", 1:count_total),
	baseline = BASELINE + rnorm(count_total, 0, BASELINE_SD),
	condition = c(rep("A", count_a), rep("B", count_b))
	) %>%
	expand_grid(rep = 1:replicate_count)

	# simulate responses
	data %>% mutate(response = rnorm(
	n = n(),
	mean = baseline + ifelse(condition=="A", 0, DIFFERENCE),
	sd = RESPONSE_SD)
	)
	}

	# simulate one within-subject experiment
	simulate_within = function(count = 10, replicate_count = 1) {

	# each subject does each condition [replicate_count] times
	data = tibble(
	subjectID = paste0("S", 1:count),
	baseline = BASELINE + rnorm(count, 0, BASELINE_SD)
	) %>%
	expand_grid(
	condition = c("A", "B"),
	rep = 1:replicate_count
	)

	# simulate responses
	data %>% mutate(response = rnorm(
	n = n(),
	mean = baseline + ifelse(condition=="A", 0, DIFFERENCE),
	sd = RESPONSE_SD)
	)
	}


	#### Cohen's d calculations ####

	# d_a
	# aggregate replicates, ignore within
	cohens_d_subject_means = function(data) {

	# collapse replicates to get mean per subject*condition
	subject_means = data %>%
	summarise(.by = c(subjectID, condition), response = mean(response))

	numerator = subject_means %>%
	summarise(.by = c(condition), response = mean(response)) %>%
	pull(response)
	numerator = sum(numerator * c(-1, 1))

	denominator = subject_means %>%
	summarise(.by = c(condition), variance = var(response)) %>%
	pull(variance)
	denominator = sqrt(mean(denominator))


	numerator / denominator # mean diff over sd
	}

	# d_z
	# aggregate replicates, use per-subject diffs
	cohens_d_subject_diffs = function(data) {

	# collapse replicates to get mean per subject*condition
	subject_means = data %>%
	summarise(.by = c(subjectID, condition), response = mean(response))

	data = subject_means %>%
	pivot_wider(names_from = condition, values_from = response) %>%
	mutate(diff = B - A)

	mean(data$diff) / sd(data$diff)
	}

	# d_r
	# include residual variance in the denominator
	cohens_d_residual_variance = function(data) {
	if(data %>% count(subjectID) %>% pull(n) %>% max() < 2)
	return(cohens_d_subject_means_between(data))

	fit = lmer(response ~ condition + (1\|subjectID), data) %>% summary()

	# cohen's d from fit
	denominator = fit$varcor %>% as_tibble() %>% pull(vcov) %>% sum() %>% sqrt()
	numerator = data %>% group_by(condition) %>% summarise(response = mean(response)) %>% pull(response)
	numerator = sum(numerator * c(-1, 1))
	numerator / denominator # mean diff over sd
	}

	# d_a (for between-subject)
	# aggregate replicates
	cohens_d_subject_means_between = function(data) {

	# collapse replicates to get mean per subject*condition
	subject_means = data %>%
	summarise(.by = c(subjectID, condition), response = mean(response))

	numerator = subject_means %>% summarise(.by = condition, mu=mean(response)) %>% pull(mu)
	denominator = sqrt( subject_means %>% summarise(.by = condition, var=var(response)) %>% pull(var) %>% mean() )
	sum(numerator * c(-1, 1)) / denominator
	}


	#### plot ###########

	plot_aggregated = function(data) {
	# collapse replicates to get mean per subject*condition
	subject_means = data %>%
	summarise(.by = c(subjectID, condition), response = mean(response))

	subject_means = subject_means %>% left_join(
	subject_means %>% summarize(.by=subjectID, baseline = mean(response))
	) %>%
	mutate(response_relative = response - baseline)

	ggplot(subject_means) +
	aes(x=condition, y=response_relative, group=subjectID) +
	geom_point(size = 4) +
	geom_line()
	}

	plot_aggregated_between = function(data) {
	# collapse replicates to get mean per subject*condition
	subject_means = data %>%
	summarise(.by = c(subjectID, condition), response = mean(response))

	subject_means = subject_means %>%
	mutate(.by = condition, response_relative = response-mean(response))

	ggplot(subject_means) +
	aes(x=condition, y=response_relative) +
	geom_point(size = 4)
	}




	#### test ####

	(simulate_within(10, 2) %>% plot_aggregated()) +
	(simulate_within(10, 20) %>% plot_aggregated()) +
	(simulate_within(10, 200) %>% plot_aggregated())


	simulate_between() %>% plot_aggregated_between()
	simulate_between(10, 10, 1000) %>% cohens_d_subject_means_between()

	simulate_within(10, 30) %>% plot_aggregated()
	simulate_within(10, 30) %>% cohens_d_residual_variance()
	simulate_within(10, 30) %>% cohens_d_subject_means_between()


	#### simulate ####

	run_simulations = function(subject_count, replicates) {
	within_data = simulate_within(subject_count, replicates)
	between_data = simulate_between(subject_count/2, subject_count/2, replicates)
	list(
	dz = within_data %>% cohens_d_subject_diffs(),
	dr = within_data %>% cohens_d_residual_variance(),
	#da_within = within_data %>% cohens_d_subject_means_between(),
	da_between = between_data %>% cohens_d_subject_means_between(),
	dr_between = between_data %>% cohens_d_residual_variance()
	)
	}

	cluster = new_cluster(parallel::detectCores() - 1)
	cluster_library(cluster, c("dplyr", "tidyr", "magrittr", "lmerTest"))
	cluster_copy(cluster, c(
	"BASELINE", "BASELINE_SD", "RESPONSE_SD", "DIFFERENCE",
	"simulate_between", "simulate_within",
	"cohens_d_subject_diffs", "cohens_d_residual_variance", "cohens_d_subject_means_between",
	"run_simulations"))

	SIMULATION_COUNT = 5000

	data2 = expand_grid(
	index = 1:SIMULATION_COUNT,
	subject_count = c(10, 40),
	replicates = 5^(0:4)
	) %>%
	rowwise() %>%
	partition(cluster) %>% # improves performance, but not necessary
	mutate(ds = list(run_simulations(subject_count, replicates))) %>%
	collect() %>% # improves performance, but not necessary
	ungroup() %>%
	unnest_longer(ds, "d", "d_type")

	# save it
	#data %>% write_csv("~/statistics/cohens_d_simulations.csv")
	beepr::beep(5)


	#### plot ####

	# confidence intervals
	quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) {
	tibble(
	val = quantile(x, probs, na.rm = TRUE),
	quant = probs
	)
	}

	data %>%
	reframe(.by = c(replicates, d_type, subject_count), quantile_df(d, c(0.025, 0.5, 0.975))) %>%
	pivot_wider(names_from = quant, values_from = val) %>%
	mutate(d_type = factor(d_type, levels=c("dz", "dr", "da_between", "dr_between"), labels=translation$d_type_label)) %>%
	ggplot() +
	aes(xmin = `0.025`, x = `0.5`, xmax = `0.975`, color = d_type, linetype = factor(subject_count),
	y = as.numeric(factor(replicates)) - 0.15*scale(as.numeric(subject_count))) +
	geom_vline(xintercept = 0) +
	geom_pointrange(linewidth = 1, size = .5, key_glyph = draw_key_linerange) +
	scale_x_continuous(breaks = -1:12, minor_breaks = FALSE) +
	scale_y_continuous( minor_breaks = FALSE,
	breaks = data$replicates %>% unique() %>% seq_along(),
	labels = data$replicates %>% unique() %>% sort()) +
	scale_color_brewer(palette = "Set1") +
	guides(color = "none") +
	facet_wrap(facets = vars(d_type), ncol = 1) +
	theme_minimal(12) + theme(
	strip.text = ggtext::element_markdown(),
	legend.text = ggtext::element_markdown(),
	) +
	labs(x = "Cohen's d (95% CI)", y = "Number of replicate trials")



	# dot histogram
	library(ggdist)

	translation %>%
	expand_grid(subject_count = data$subject_count %>% unique()) %>%
	mutate(design = glue("{dtype} (N = {subject_count})"))

	p = data %>%
	left_join(
	translation %>%
	expand_grid(subject_count = data$subject_count %>% unique()) %>%
	mutate(design = glue("{d_type_label} (N = {subject_count})")) %>%
	mutate(design = factor(design, levels = design))
	) %>%
	mutate(d_type = factor(d_type, levels=translation$d_type, labels=translation$d_type_label)) %>%
	#filter(index < 50) %>%
	ggplot() +
	aes(x = d, y = factor(replicates), fill = d_type) +
	#geom_vline(xintercept = DIFFERENCE/sqrt(RESPONSE_SD^2+BASELINE_SD^2), color = "grey92") +
	geom_vline(xintercept = 0, color = "grey30") +
	ggdist::geom_dots(color = NA, slab_linewidth = 0) +
	scale_x_continuous(breaks=seq(-12,12,by=2), limits=c(-3, 5), expand=c(0,0), minor_breaks=FALSE) +
	scale_fill_manual(values = c("#267CBF", "#335E7F", "#E41A1C", "#993D3E")) +
	guides(fill = "none") +
	facet_wrap(facets = vars(design), ncol=2, scales = "free_x") +
	theme_minimal(12) + theme(
	axis.title = element_text(hjust = 0, size = rel(0.8), color = "gray30"),
	plot.caption = ggtext::element_markdown(color = "gray50", lineheight = 1.2, margin = margin(20,0,0,0)),
	plot.subtitle = ggtext::element_markdown(size = 10, lineheight = 1.2),
	strip.text = ggtext::element_markdown(hjust = 0, size = rel(1), margin = margin(20,0,0,0)),
	plot.margin = margin(20, 20, 20, 20, "pt"),
	panel.spacing.x = unit(10, "pt")) +
	labs(x = "Cohen's d 🠚", y = "Replicate trial count 🠚",
	title = "Within/between-subject, replicate trial count, and Cohen's d variant impact effect size and consistency",
	subtitle = glue(
	"Each scenario was simulated {SIMULATION_COUNT} times: <br>
	• 2 conditions (A and B) and a continuous response<br>
	• Individual differences in baseline: 𝑁(0, 0.2) <br>
	• Response: subject_baseline + 0.1*(condition == B) + 𝑁(0, 0.5)"),
	caption =
	'Cohen\'s d definitions from "Five Different Cohen\'s d Statistics for Within-subject Designs" by Jake Westfall<br>
	Simulation and chart by Steve Haroz')
	# • Subjects: 10 <br>

	ragg::agg_png("~/cohens d and replicate counts subj.png", 7200, 6000, scaling = 9)
	p
	dev.off()