steveharoz/simulate Mazar and Zhong (2010) exp3.R

## simulate Mazar and Zhong (2010) exp3.R
library(tidyverse)
library(effectsize) # cohens_d()

# reproducible
set.seed(8675309)

# simulate one experiment
simulate = function(count_a, count_b, replicate_count) {

  count_total = count_a + count_b
  higher_proportion_for_b = 0.1

  # generate the subjects and conditions
  data = tibble(
    subjectID = paste0("S", 1:count_total),
    condition = c(rep("A", count_a), rep("B", count_b))
  )

  # simulate responses
  data = data %>%
    rowwise() %>%
    mutate(response = rbinom(1, replicate_count, ifelse(condition=="A", 0.4, 0.4 + higher_proportion_for_b))) %>%
    ungroup()

  effectsize::cohens_d(response ~ condition, data = data)$Cohens_d
}

# cohen's d from many experiments:
cohens_d = replicate(10000, simulate(38, 43, 90))
hist(abs(cohens_d))

## zzz better approach for calculating cohens d.R
# Alterantive approach to calculating Cohen's D that is not impacted by the number of replicate trials
# https://journalofcognition.org/articles/10.5334/joc.10
# h/t Aaron Caldwell @arcstats.bsky.social


library(tidyverse)
library(lmerTest)

# reproducible
set.seed(8675309)

# simulate one experiment
simulate = function(count_a, count_b, replicate_count) {

  count_total = count_a + count_b
  higher_proportion_for_b = 0.1

  # 81 subjects each perform 90 replicate trials
  data = tibble(
    subjectID = paste0("S", 1:count_total),
    condition = c(rep("A", count_a), rep("B", count_b))
  ) %>%
    expand_grid(rep = 1:replicate_count)

  # simulate responses
  data = data %>%
    mutate(response = rbinom(n(), 1, ifelse(condition=="A", 0.4, 0.4 + higher_proportion_for_b)))


  fit = lmer(response ~ condition + (1|subjectID), data) %>% summary()

  # cohen's d from fit
  denominator = fit$varcor %>% as_tibble() %>% pull(vcov) %>% sum() %>% sqrt()
  numerator = data %>% group_by(condition) %>% summarise(response = mean(response)) %>% pull(response)
  numerator = sum(numerator * c(-1, 1))
  numerator / denominator # mean diff over sd
}

# cohen's d from many experiments:
cohens_d = replicate(1000, simulate(38, 43, 90))
hist(abs(cohens_d), breaks=seq(0.15,0.25,0.01))
	library(tidyverse)
	library(effectsize) # cohens_d()

	# reproducible
	set.seed(8675309)

	# simulate one experiment
	simulate = function(count_a, count_b, replicate_count) {

	count_total = count_a + count_b
	higher_proportion_for_b = 0.1

	# generate the subjects and conditions
	data = tibble(
	subjectID = paste0("S", 1:count_total),
	condition = c(rep("A", count_a), rep("B", count_b))
	)

	# simulate responses
	data = data %>%
	rowwise() %>%
	mutate(response = rbinom(1, replicate_count, ifelse(condition=="A", 0.4, 0.4 + higher_proportion_for_b))) %>%
	ungroup()

	effectsize::cohens_d(response ~ condition, data = data)$Cohens_d
	}

	# cohen's d from many experiments:
	cohens_d = replicate(10000, simulate(38, 43, 90))
	hist(abs(cohens_d))
	# Alterantive approach to calculating Cohen's D that is not impacted by the number of replicate trials
	# https://journalofcognition.org/articles/10.5334/joc.10
	# h/t Aaron Caldwell @arcstats.bsky.social


	library(tidyverse)
	library(lmerTest)

	# reproducible
	set.seed(8675309)

	# simulate one experiment
	simulate = function(count_a, count_b, replicate_count) {

	count_total = count_a + count_b
	higher_proportion_for_b = 0.1

	# 81 subjects each perform 90 replicate trials
	data = tibble(
	subjectID = paste0("S", 1:count_total),
	condition = c(rep("A", count_a), rep("B", count_b))
	) %>%
	expand_grid(rep = 1:replicate_count)

	# simulate responses
	data = data %>%
	mutate(response = rbinom(n(), 1, ifelse(condition=="A", 0.4, 0.4 + higher_proportion_for_b)))


	fit = lmer(response ~ condition + (1\|subjectID), data) %>% summary()

	# cohen's d from fit
	denominator = fit$varcor %>% as_tibble() %>% pull(vcov) %>% sum() %>% sqrt()
	numerator = data %>% group_by(condition) %>% summarise(response = mean(response)) %>% pull(response)
	numerator = sum(numerator * c(-1, 1))
	numerator / denominator # mean diff over sd
	}

	# cohen's d from many experiments:
	cohens_d = replicate(1000, simulate(38, 43, 90))
	hist(abs(cohens_d), breaks=seq(0.15,0.25,0.01))