carlislerainey/list-exit-pwr-analysis. R

## list-exit-pwr-analysis. R

# load packages
library(tidyverse)
library(marginaleffects)
library(patchwork)


# clear workspace
rm(list = ls())

# create respondents
n_resp <- 3000       # number of total respondents
x1 <- rnorm(n_resp)  # a covariate to induce some correlation in the responses for list experiment

# the designs to consider
# - 0.5 = each condition of list experiment is half the size as direct and prime;
#         or, the whole list experiment is the same size as direct and prime
# - 1 = each condition of list experiment is same size as direct and prime
# - 3 = each condition of list experiment is 3x as large as direct and prime
times <- seq(0.5, 5, by = 0.01)

res_list <- pred_list <- NULL # containers for estimates
for (i in 1:length(times)) {  # loop over each value of times

  # list experiment items
  b0 <- 0.0                                                     # <- logit intercept for "other" items
  b1 <- -2.0                                                    # <- logit slope for "other" items
  item1 <- rbinom(n_resp, size = 1, prob = plogis(-1 + 2*x1))   # <- item of interest; -1 intercept ~~> 25% agreement
  item2 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
  item3 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
  item4 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
  item5 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))

  # direct question with prime; created by falsifying item1 responses above
  prime_pr_lies <- 0.15                                         # <- the fraction of true 1s (item1) that are falsified
  prime1 <- ifelse(item1 == 1,
                   rbinom(n_resp, size = 1, prob = 1 - prime_pr_lies), # change some 1s to 0s
                   0) # keep all 0s as 0s

  # direct question (without prime); created by falsifying even more responses above
  dq_pr_lies <- 0.3                                             # <- the fraction of true 1s (item1) that are falsified
  dq1 <- ifelse(item1 == 1,
                rbinom(n_resp, size = 1, prob = 1 - dq_pr_lies), # change some 1s to 0s
                0) # keep all 0s as 0s

  # combine outcomes into a data frame
  potential_outcomes <- data.frame(item1, item2, item3, item4, item5, dq1, prime1) %>%
    mutate(list5 = item1 + item2 + item3 + item4 + item5,
           list4 =         item2 + item3 + item4 + item5)

  # the four conditions to which we assign respondents
  conditions <- c("List Experiment: 4 Items",
                  "List Experiment: 5 Items",
                  "Direct Question",
                  "Direct Question with Prime")

  # the unnormalized probabilities of assignment to each condition
  condition_rel_weights <- c(times[i], times[i], 1, 1)
  condition_weights <- condition_rel_weights/sum(condition_rel_weights)

  # place respondents into conditions
  sample <- potential_outcomes |>
    mutate(condition = sample(conditions, size = n(),
                              replace = TRUE,
                              prob = condition_weights)) |>
    # put responses in the same outcome variable for linear regression
    mutate(y = case_when(condition == "List Experiment: 4 Items" ~ list4,
                         condition == "List Experiment: 5 Items" ~ list5,
                         condition =="Direct Question" ~ dq1,
                         condition == "Direct Question with Prime" ~ prime1))

  # linear regression
  fit <- lm(y ~ -1 + condition, data = sample)
  #arm::display(fit)

  # compare to truth -----

  # list
  p <- predictions(fit,
              newdata = datagrid(condition = c("List Experiment: 4 Items",
                                               "List Experiment: 5 Items")),
              vcov = "HC3",
              hypothesis = "b2 - b1 = 0")
  mean(item1); p$estimate

  # direct
  p <- predictions(fit,
                   newdata = datagrid(condition = c("Direct Question")),
                   vcov = "HC3")
  mean(item1); p$estimate

  # direct with prime
  p <- predictions(fit,
                   newdata = datagrid(condition = c("Direct Question with Prime")),
                   vcov = "HC3")
  mean(item1); p$estimate

  # prediction of percent agreement with item1 -----

  # list
  p <- predictions(fit,
                   newdata = datagrid(condition = c("List Experiment: 4 Items",
                                                    "List Experiment: 5 Items")),
                   vcov = "HC3",
                   hypothesis = "b2 - b1 = 0")
  pred1 <- tibble(name = "List", se = p$std.error, est = p$estimate)

  # direct
  p <- predictions(fit,
                   newdata = datagrid(condition = c("Direct Question")),
                   vcov = "HC3")
  pred2 <- tibble(name = "Direct", se = p$std.error, est = p$estimate)

  # direct with prime
  p <- predictions(fit,
                   newdata = datagrid(condition = c("Direct Question with Prime")),
                   vcov = "HC3")
  pred3 <- tibble(name = "Prime", se = p$std.error, est = p$estimate)

  pred_list[[i]] <- bind_rows(list(pred1, pred2, pred3)) %>%
    mutate(times = times[i])

  # hypothesis tests ------

  # is direct different than direct with prime?
  p <- predictions(fit,
                   newdata = datagrid(condition = unique),
                   vcov = "HC3",
                   hypothesis = "b2 = b1")
  mde1 <- tibble(name = "D v. P", mde = p$std.error*2.48)  # minimal detectable effect (w/ 80% power)

  # is list different than direct?
  p <- predictions(fit,
                   newdata = datagrid(condition = unique),
                   vcov = "HC3",
                   hypothesis = "b4 - b3 = b1")
  mde2 <- tibble(name = "L v. D", mde = p$std.error*2.48)  # mde


  # is list different than direct with prime?
  p <- predictions(fit,
                   newdata = datagrid(condition = unique),
                   vcov = "HC3",
                   hypothesis = "b4 - b3 = b2")
  mde3 <- tibble(name = "L v. P", mde = p$std.error*2.48)  # mde

  res_list[[i]] <- bind_rows(mde1, mde2, mde3) %>%
    mutate(times = times[i])
}

# plot mde
res <- bind_rows(res_list)
gg1 <- ggplot(res, aes(x = times, y = mde, color = name)) +
  geom_line(alpha = 0.4) +
  geom_smooth(se = FALSE)+
  labs(title = "MDE in the Three Conditions",
       y = "Minimum Detectable Effect",
       x = "Size of *Each* List Experiment Condition Relative to Direct and Prime")

# plot estimates
pred <- bind_rows(pred_list)
gg2 <- ggplot(pred, aes(x = times, y = est, color = name)) +
  geom_hline(yintercept = mean(item1)) +
  geom_line(alpha = 0.4) +
  geom_smooth(se = FALSE) +
  labs(title = "Estimates in the Three Conditions",
       y = "Estimate",
       x = "Size of *Each* List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)")

# plot se
gg3 <- ggplot(pred, aes(x = times, y = se, color = name)) +
  geom_line(alpha = 0.4) +
  geom_smooth(se = FALSE) +
  labs(title = "Standard Errors in the Three Conditions",
       y = "Stanard Error",
       x = "Size of *Each* List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)")

gg1 / gg2 / gg3

# recommendation is the following assignments
times <- 3
tibble(condition = conditions,
       frac_respondents = c(times, times, 1, 1)/sum(c(times, times, 1, 1))) |>
  mutate(n_respondents = 3000*frac_respondents) |>
  mutate(Condition = condition,
         `% of Respondents` = scales::percent(frac_respondents, accuracy = 0.1),
         `Number of Respondents` = scales::number(n_respondents, big.mark = ",")) %>%
  select(Condition = condition, `% of Respondents`, `Number of Respondents`) %>%
  gt::gt()

	# load packages
	library(tidyverse)
	library(marginaleffects)
	library(patchwork)


	# clear workspace
	rm(list = ls())

	# create respondents
	n_resp <- 3000 # number of total respondents
	x1 <- rnorm(n_resp) # a covariate to induce some correlation in the responses for list experiment

	# the designs to consider
	# - 0.5 = each condition of list experiment is half the size as direct and prime;
	# or, the whole list experiment is the same size as direct and prime
	# - 1 = each condition of list experiment is same size as direct and prime
	# - 3 = each condition of list experiment is 3x as large as direct and prime
	times <- seq(0.5, 5, by = 0.01)

	res_list <- pred_list <- NULL # containers for estimates
	for (i in 1:length(times)) { # loop over each value of times

	# list experiment items
	b0 <- 0.0 # <- logit intercept for "other" items
	b1 <- -2.0 # <- logit slope for "other" items
	item1 <- rbinom(n_resp, size = 1, prob = plogis(-1 + 2*x1)) # <- item of interest; -1 intercept ~~> 25% agreement
	item2 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
	item3 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
	item4 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
	item5 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))

	# direct question with prime; created by falsifying item1 responses above
	prime_pr_lies <- 0.15 # <- the fraction of true 1s (item1) that are falsified
	prime1 <- ifelse(item1 == 1,
	rbinom(n_resp, size = 1, prob = 1 - prime_pr_lies), # change some 1s to 0s
	0) # keep all 0s as 0s

	# direct question (without prime); created by falsifying even more responses above
	dq_pr_lies <- 0.3 # <- the fraction of true 1s (item1) that are falsified
	dq1 <- ifelse(item1 == 1,
	rbinom(n_resp, size = 1, prob = 1 - dq_pr_lies), # change some 1s to 0s
	0) # keep all 0s as 0s

	# combine outcomes into a data frame
	potential_outcomes <- data.frame(item1, item2, item3, item4, item5, dq1, prime1) %>%
	mutate(list5 = item1 + item2 + item3 + item4 + item5,
	list4 = item2 + item3 + item4 + item5)

	# the four conditions to which we assign respondents
	conditions <- c("List Experiment: 4 Items",
	"List Experiment: 5 Items",
	"Direct Question",
	"Direct Question with Prime")

	# the unnormalized probabilities of assignment to each condition
	condition_rel_weights <- c(times[i], times[i], 1, 1)
	condition_weights <- condition_rel_weights/sum(condition_rel_weights)

	# place respondents into conditions
	sample <- potential_outcomes \|>
	mutate(condition = sample(conditions, size = n(),
	replace = TRUE,
	prob = condition_weights)) \|>
	# put responses in the same outcome variable for linear regression
	mutate(y = case_when(condition == "List Experiment: 4 Items" ~ list4,
	condition == "List Experiment: 5 Items" ~ list5,
	condition =="Direct Question" ~ dq1,
	condition == "Direct Question with Prime" ~ prime1))

	# linear regression
	fit <- lm(y ~ -1 + condition, data = sample)
	#arm::display(fit)

	# compare to truth -----

	# list
	p <- predictions(fit,
	newdata = datagrid(condition = c("List Experiment: 4 Items",
	"List Experiment: 5 Items")),
	vcov = "HC3",
	hypothesis = "b2 - b1 = 0")
	mean(item1); p$estimate

	# direct
	p <- predictions(fit,
	newdata = datagrid(condition = c("Direct Question")),
	vcov = "HC3")
	mean(item1); p$estimate

	# direct with prime
	p <- predictions(fit,
	newdata = datagrid(condition = c("Direct Question with Prime")),
	vcov = "HC3")
	mean(item1); p$estimate

	# prediction of percent agreement with item1 -----

	# list
	p <- predictions(fit,
	newdata = datagrid(condition = c("List Experiment: 4 Items",
	"List Experiment: 5 Items")),
	vcov = "HC3",
	hypothesis = "b2 - b1 = 0")
	pred1 <- tibble(name = "List", se = p$std.error, est = p$estimate)

	# direct
	p <- predictions(fit,
	newdata = datagrid(condition = c("Direct Question")),
	vcov = "HC3")
	pred2 <- tibble(name = "Direct", se = p$std.error, est = p$estimate)

	# direct with prime
	p <- predictions(fit,
	newdata = datagrid(condition = c("Direct Question with Prime")),
	vcov = "HC3")
	pred3 <- tibble(name = "Prime", se = p$std.error, est = p$estimate)

	pred_list[[i]] <- bind_rows(list(pred1, pred2, pred3)) %>%
	mutate(times = times[i])

	# hypothesis tests ------

	# is direct different than direct with prime?
	p <- predictions(fit,
	newdata = datagrid(condition = unique),
	vcov = "HC3",
	hypothesis = "b2 = b1")
	mde1 <- tibble(name = "D v. P", mde = p$std.error*2.48) # minimal detectable effect (w/ 80% power)

	# is list different than direct?
	p <- predictions(fit,
	newdata = datagrid(condition = unique),
	vcov = "HC3",
	hypothesis = "b4 - b3 = b1")
	mde2 <- tibble(name = "L v. D", mde = p$std.error*2.48) # mde


	# is list different than direct with prime?
	p <- predictions(fit,
	newdata = datagrid(condition = unique),
	vcov = "HC3",
	hypothesis = "b4 - b3 = b2")
	mde3 <- tibble(name = "L v. P", mde = p$std.error*2.48) # mde

	res_list[[i]] <- bind_rows(mde1, mde2, mde3) %>%
	mutate(times = times[i])
	}

	# plot mde
	res <- bind_rows(res_list)
	gg1 <- ggplot(res, aes(x = times, y = mde, color = name)) +
	geom_line(alpha = 0.4) +
	geom_smooth(se = FALSE)+
	labs(title = "MDE in the Three Conditions",
	y = "Minimum Detectable Effect",
	x = "Size of Each List Experiment Condition Relative to Direct and Prime")

	# plot estimates
	pred <- bind_rows(pred_list)
	gg2 <- ggplot(pred, aes(x = times, y = est, color = name)) +
	geom_hline(yintercept = mean(item1)) +
	geom_line(alpha = 0.4) +
	geom_smooth(se = FALSE) +
	labs(title = "Estimates in the Three Conditions",
	y = "Estimate",
	x = "Size of Each List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)")

	# plot se
	gg3 <- ggplot(pred, aes(x = times, y = se, color = name)) +
	geom_line(alpha = 0.4) +
	geom_smooth(se = FALSE) +
	labs(title = "Standard Errors in the Three Conditions",
	y = "Stanard Error",
	x = "Size of Each List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)")

	gg1 / gg2 / gg3

	# recommendation is the following assignments
	times <- 3
	tibble(condition = conditions,
	frac_respondents = c(times, times, 1, 1)/sum(c(times, times, 1, 1))) \|>
	mutate(n_respondents = 3000*frac_respondents) \|>
	mutate(Condition = condition,
	`% of Respondents` = scales::percent(frac_respondents, accuracy = 0.1),
	`Number of Respondents` = scales::number(n_respondents, big.mark = ",")) %>%
	select(Condition = condition, `% of Respondents`, `Number of Respondents`) %>%
	gt::gt()