Created
November 16, 2023 13:33
-
-
Save carlislerainey/56ed4449fd8a0e8f896fb1458fad1c4e to your computer and use it in GitHub Desktop.
Power analysis for list experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load packages | |
library(tidyverse) | |
library(marginaleffects) | |
library(patchwork) | |
# clear workspace | |
rm(list = ls()) | |
# create respondents | |
n_resp <- 3000 # number of total respondents | |
x1 <- rnorm(n_resp) # a covariate to induce some correlation in the responses for list experiment | |
# the designs to consider | |
# - 0.5 = each condition of list experiment is half the size as direct and prime; | |
# or, the whole list experiment is the same size as direct and prime | |
# - 1 = each condition of list experiment is same size as direct and prime | |
# - 3 = each condition of list experiment is 3x as large as direct and prime | |
times <- seq(0.5, 5, by = 0.01) | |
res_list <- pred_list <- NULL # containers for estimates | |
for (i in 1:length(times)) { # loop over each value of times | |
# list experiment items | |
b0 <- 0.0 # <- logit intercept for "other" items | |
b1 <- -2.0 # <- logit slope for "other" items | |
item1 <- rbinom(n_resp, size = 1, prob = plogis(-1 + 2*x1)) # <- item of interest; -1 intercept ~~> 25% agreement | |
item2 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1)) | |
item3 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1)) | |
item4 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1)) | |
item5 <- rbinom(n_resp, size = 1, prob = plogis(b0 + b1*x1)) | |
# direct question with prime; created by falsifying item1 responses above | |
prime_pr_lies <- 0.15 # <- the fraction of true 1s (item1) that are falsified | |
prime1 <- ifelse(item1 == 1, | |
rbinom(n_resp, size = 1, prob = 1 - prime_pr_lies), # change some 1s to 0s | |
0) # keep all 0s as 0s | |
# direct question (without prime); created by falsifying even more responses above | |
dq_pr_lies <- 0.3 # <- the fraction of true 1s (item1) that are falsified | |
dq1 <- ifelse(item1 == 1, | |
rbinom(n_resp, size = 1, prob = 1 - dq_pr_lies), # change some 1s to 0s | |
0) # keep all 0s as 0s | |
# combine outcomes into a data frame | |
potential_outcomes <- data.frame(item1, item2, item3, item4, item5, dq1, prime1) %>% | |
mutate(list5 = item1 + item2 + item3 + item4 + item5, | |
list4 = item2 + item3 + item4 + item5) | |
# the four conditions to which we assign respondents | |
conditions <- c("List Experiment: 4 Items", | |
"List Experiment: 5 Items", | |
"Direct Question", | |
"Direct Question with Prime") | |
# the unnormalized probabilities of assignment to each condition | |
condition_rel_weights <- c(times[i], times[i], 1, 1) | |
condition_weights <- condition_rel_weights/sum(condition_rel_weights) | |
# place respondents into conditions | |
sample <- potential_outcomes |> | |
mutate(condition = sample(conditions, size = n(), | |
replace = TRUE, | |
prob = condition_weights)) |> | |
# put responses in the same outcome variable for linear regression | |
mutate(y = case_when(condition == "List Experiment: 4 Items" ~ list4, | |
condition == "List Experiment: 5 Items" ~ list5, | |
condition =="Direct Question" ~ dq1, | |
condition == "Direct Question with Prime" ~ prime1)) | |
# linear regression | |
fit <- lm(y ~ -1 + condition, data = sample) | |
#arm::display(fit) | |
# compare to truth ----- | |
# list | |
p <- predictions(fit, | |
newdata = datagrid(condition = c("List Experiment: 4 Items", | |
"List Experiment: 5 Items")), | |
vcov = "HC3", | |
hypothesis = "b2 - b1 = 0") | |
mean(item1); p$estimate | |
# direct | |
p <- predictions(fit, | |
newdata = datagrid(condition = c("Direct Question")), | |
vcov = "HC3") | |
mean(item1); p$estimate | |
# direct with prime | |
p <- predictions(fit, | |
newdata = datagrid(condition = c("Direct Question with Prime")), | |
vcov = "HC3") | |
mean(item1); p$estimate | |
# prediction of percent agreement with item1 ----- | |
# list | |
p <- predictions(fit, | |
newdata = datagrid(condition = c("List Experiment: 4 Items", | |
"List Experiment: 5 Items")), | |
vcov = "HC3", | |
hypothesis = "b2 - b1 = 0") | |
pred1 <- tibble(name = "List", se = p$std.error, est = p$estimate) | |
# direct | |
p <- predictions(fit, | |
newdata = datagrid(condition = c("Direct Question")), | |
vcov = "HC3") | |
pred2 <- tibble(name = "Direct", se = p$std.error, est = p$estimate) | |
# direct with prime | |
p <- predictions(fit, | |
newdata = datagrid(condition = c("Direct Question with Prime")), | |
vcov = "HC3") | |
pred3 <- tibble(name = "Prime", se = p$std.error, est = p$estimate) | |
pred_list[[i]] <- bind_rows(list(pred1, pred2, pred3)) %>% | |
mutate(times = times[i]) | |
# hypothesis tests ------ | |
# is direct different than direct with prime? | |
p <- predictions(fit, | |
newdata = datagrid(condition = unique), | |
vcov = "HC3", | |
hypothesis = "b2 = b1") | |
mde1 <- tibble(name = "D v. P", mde = p$std.error*2.48) # minimal detectable effect (w/ 80% power) | |
# is list different than direct? | |
p <- predictions(fit, | |
newdata = datagrid(condition = unique), | |
vcov = "HC3", | |
hypothesis = "b4 - b3 = b1") | |
mde2 <- tibble(name = "L v. D", mde = p$std.error*2.48) # mde | |
# is list different than direct with prime? | |
p <- predictions(fit, | |
newdata = datagrid(condition = unique), | |
vcov = "HC3", | |
hypothesis = "b4 - b3 = b2") | |
mde3 <- tibble(name = "L v. P", mde = p$std.error*2.48) # mde | |
res_list[[i]] <- bind_rows(mde1, mde2, mde3) %>% | |
mutate(times = times[i]) | |
} | |
# plot mde | |
res <- bind_rows(res_list) | |
gg1 <- ggplot(res, aes(x = times, y = mde, color = name)) + | |
geom_line(alpha = 0.4) + | |
geom_smooth(se = FALSE)+ | |
labs(title = "MDE in the Three Conditions", | |
y = "Minimum Detectable Effect", | |
x = "Size of *Each* List Experiment Condition Relative to Direct and Prime") | |
# plot estimates | |
pred <- bind_rows(pred_list) | |
gg2 <- ggplot(pred, aes(x = times, y = est, color = name)) + | |
geom_hline(yintercept = mean(item1)) + | |
geom_line(alpha = 0.4) + | |
geom_smooth(se = FALSE) + | |
labs(title = "Estimates in the Three Conditions", | |
y = "Estimate", | |
x = "Size of *Each* List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)") | |
# plot se | |
gg3 <- ggplot(pred, aes(x = times, y = se, color = name)) + | |
geom_line(alpha = 0.4) + | |
geom_smooth(se = FALSE) + | |
labs(title = "Standard Errors in the Three Conditions", | |
y = "Stanard Error", | |
x = "Size of *Each* List Experiment Condition Relative to Direct and Prime\n(1 = Same Size; 3 = Three Times as Large)") | |
gg1 / gg2 / gg3 | |
# recommendation is the following assignments | |
times <- 3 | |
tibble(condition = conditions, | |
frac_respondents = c(times, times, 1, 1)/sum(c(times, times, 1, 1))) |> | |
mutate(n_respondents = 3000*frac_respondents) |> | |
mutate(Condition = condition, | |
`% of Respondents` = scales::percent(frac_respondents, accuracy = 0.1), | |
`Number of Respondents` = scales::number(n_respondents, big.mark = ",")) %>% | |
select(Condition = condition, `% of Respondents`, `Number of Respondents`) %>% | |
gt::gt() |
Author
carlislerainey
commented
Nov 16, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment