Skip to content

Instantly share code, notes, and snippets.

@carlislerainey
Created November 16, 2023 13:33
Show Gist options
  • Save carlislerainey/56ed4449fd8a0e8f896fb1458fad1c4e to your computer and use it in GitHub Desktop.
Save carlislerainey/56ed4449fd8a0e8f896fb1458fad1c4e to your computer and use it in GitHub Desktop.
Power analysis for list experiment
# load packages
library(tidyverse)
library(marginaleffects)
library(patchwork)
# clear workspace
rm(list = ls())
# create respondents
n_resp <- 3000 # number of total respondents
x1 <- rnorm(n_resp) # a covariate to induce some correlation in the responses for list experiment
# the designs to consider
# - 0.5 = each condition of list experiment is half the size as direct and prime;
# or, the whole list experiment is the same size as direct and prime
# - 1 = each condition of list experiment is same size as direct and prime
# - 3 = each condition of list experiment is 3x as large as direct and prime
times <- seq(0.5, 5, by = 0.01)
res_list <- pred_list <- NULL # containers for estimates
for (i in 1:length(times)) { # loop over each value of times
# list experiment items
b0 <- 0.0 # <- logit intercept for "other" items
b1 <- -2.0 # <- logit slope for "other" items
item1 <- rbinom(n_resp, size = 1, prob = plogis(-1 + 2*x1)) # <- item of interest; -1 intercept ~~> 25% agreement
item2 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
item3 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
item4 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
item5 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1))
# direct question with prime; created by falsifying item1 responses above
prime_pr_lies <- 0.15 # <- the fraction of true 1s (item1) that are falsified
prime1 <- ifelse(item1 == 1,
rbinom(n_resp, size = 1, prob = 1 - prime_pr_lies), # change some 1s to 0s
0) # keep all 0s as 0s
# direct question (without prime); created by falsifying even more responses above
dq_pr_lies <- 0.3 # <- the fraction of true 1s (item1) that are falsified
dq1 <- ifelse(item1 == 1,
rbinom(n_resp, size = 1, prob = 1 - dq_pr_lies), # change some 1s to 0s
0) # keep all 0s as 0s
# combine outcomes into a data frame
potential_outcomes <- data.frame(item1, item2, item3, item4, item5, dq1, prime1) %>%
mutate(list5 = item1 + item2 + item3 + item4 + item5,
list4 = item2 + item3 + item4 + item5)
# the four conditions to which we assign respondents
conditions <- c("List Experiment: 4 Items",
"List Experiment: 5 Items",
"Direct Question",
"Direct Question with Prime")
# the unnormalized probabilities of assignment to each condition
condition_rel_weights <- c(times[i], times[i], 1, 1)
condition_weights <- condition_rel_weights/sum(condition_rel_weights)
# place respondents into conditions
sample <- potential_outcomes |>
mutate(condition = sample(conditions, size = n(),
replace = TRUE,
prob = condition_weights)) |>
# put responses in the same outcome variable for linear regression
mutate(y = case_when(condition == "List Experiment: 4 Items" ~ list4,
condition == "List Experiment: 5 Items" ~ list5,
condition =="Direct Question" ~ dq1,
condition == "Direct Question with Prime" ~ prime1))
# linear regression
fit <- lm(y ~ -1 + condition, data = sample)
#arm::display(fit)
# compare to truth -----
# list
p <- predictions(fit,
newdata = datagrid(condition = c("List Experiment: 4 Items",
"List Experiment: 5 Items")),
vcov = "HC3",
hypothesis = "b2 - b1 = 0")
mean(item1); p$estimate
# direct
p <- predictions(fit,
newdata = datagrid(condition = c("Direct Question")),
vcov = "HC3")
mean(item1); p$estimate
# direct with prime
p <- predictions(fit,
newdata = datagrid(condition = c("Direct Question with Prime")),
vcov = "HC3")
mean(item1); p$estimate
# prediction of percent agreement with item1 -----
# list
p <- predictions(fit,
newdata = datagrid(condition = c("List Experiment: 4 Items",
"List Experiment: 5 Items")),
vcov = "HC3",
hypothesis = "b2 - b1 = 0")
pred1 <- tibble(name = "List", se = p$std.error, est = p$estimate)
# direct
p <- predictions(fit,
newdata = datagrid(condition = c("Direct Question")),
vcov = "HC3")
pred2 <- tibble(name = "Direct", se = p$std.error, est = p$estimate)
# direct with prime
p <- predictions(fit,
newdata = datagrid(condition = c("Direct Question with Prime")),
vcov = "HC3")
pred3 <- tibble(name = "Prime", se = p$std.error, est = p$estimate)
pred_list[[i]] <- bind_rows(list(pred1, pred2, pred3)) %>%
mutate(times = times[i])
# hypothesis tests ------
# is direct different than direct with prime?
p <- predictions(fit,
newdata = datagrid(condition = unique),
vcov = "HC3",
hypothesis = "b2 = b1")
mde1 <- tibble(name = "D v. P", mde = p$std.error*2.48) # minimal detectable effect (w/ 80% power)
# is list different than direct?
p <- predictions(fit,
newdata = datagrid(condition = unique),
vcov = "HC3",
hypothesis = "b4 - b3 = b1")
mde2 <- tibble(name = "L v. D", mde = p$std.error*2.48) # mde
# is list different than direct with prime?
p <- predictions(fit,
newdata = datagrid(condition = unique),
vcov = "HC3",
hypothesis = "b4 - b3 = b2")
mde3 <- tibble(name = "L v. P", mde = p$std.error*2.48) # mde
res_list[[i]] <- bind_rows(mde1, mde2, mde3) %>%
mutate(times = times[i])
}
# plot mde
res <- bind_rows(res_list)
gg1 <- ggplot(res, aes(x = times, y = mde, color = name)) +
geom_line(alpha = 0.4) +
geom_smooth(se = FALSE)+
labs(title = "MDE in the Three Conditions",
y = "Minimum Detectable Effect",
x = "Size of *Each* List Experiment Condition Relative to Direct and Prime")
# plot estimates
pred <- bind_rows(pred_list)
gg2 <- ggplot(pred, aes(x = times, y = est, color = name)) +
geom_hline(yintercept = mean(item1)) +
geom_line(alpha = 0.4) +
geom_smooth(se = FALSE) +
labs(title = "Estimates in the Three Conditions",
y = "Estimate",
x = "Size of *Each* List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)")
# plot se
gg3 <- ggplot(pred, aes(x = times, y = se, color = name)) +
geom_line(alpha = 0.4) +
geom_smooth(se = FALSE) +
labs(title = "Standard Errors in the Three Conditions",
y = "Stanard Error",
x = "Size of *Each* List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)")
gg1 / gg2 / gg3
# recommendation is the following assignments
times <- 3
tibble(condition = conditions,
frac_respondents = c(times, times, 1, 1)/sum(c(times, times, 1, 1))) |>
mutate(n_respondents = 3000*frac_respondents) |>
mutate(Condition = condition,
`% of Respondents` = scales::percent(frac_respondents, accuracy = 0.1),
`Number of Respondents` = scales::number(n_respondents, big.mark = ",")) %>%
select(Condition = condition, `% of Respondents`, `Number of Respondents`) %>%
gt::gt()
@carlislerainey
Copy link
Author

image

@carlislerainey
Copy link
Author

Based on the simulations above, something like this seems reasonable.

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment