Created
August 8, 2023 19:35
-
-
Save jwbowers/b381101666f04c7eb5d287fa16f72135 to your computer and use it in GitHub Desktop.
Bias in Estimating "Any Treatment" versus Control
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(DeclareDesign) | |
my_designer <- function(N, tau1, tau2) { | |
# N - total sample size | |
# tau1 - difference in proportion control versus "easy" message | |
# tau2 - difference in proportion control versus "right" message | |
fixed_pop <- fabricate( | |
N = N, | |
tau1 = tau1, | |
tau2 = tau2, | |
tau1_0 = pmax(0, rnorm(N, mean = tau1, sd = .005)), | |
tau2_0 = pmax(0, rnorm(N, mean = mean(tau1_0) + tau2, sd = .005)), | |
x1 = draw_binary(prob = .5, N = N) | |
) | |
pop <- declare_population(fixed_pop) | |
pot <- declare_potential_outcomes(Y ~ rbinom( | |
n = N, size = 1, | |
prob = 0.1 + tau1_0 * (Z == 1) + tau2_0 * (Z == 2) + x1 * .05 | |
), conditions = c(0, 1, 2)) | |
assignment <- declare_assignment(Z = complete_ra(N, num_arms = 3, conditions = c(0, 1, 2))) | |
reveal <- declare_measurement(Y = reveal_outcomes(Y ~ Z)) | |
ate_estimand <- declare_inquiries( | |
ate_Y_1_0 = mean(Y_Z_1 - Y_Z_0), | |
ate_Y_2_0 = mean(Y_Z_2 - Y_Z_0), | |
ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1), | |
ate_Y_any = mean((Y_Z_1 + Y_Z_2 > 0)) - mean(Y_Z_0) | |
) | |
des_base <- pop + pot + assignment + reveal + ate_estimand | |
return(des_base) | |
} | |
des <- my_designer(N = 7500*3, tau1 = .01, tau2 = .01) | |
estimation1_fn <- function(data) { | |
estimates <- rbind.data.frame( | |
ate_Y_1_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 1), | |
ate_Y_2_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 2), | |
ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 1, condition2 = 2) | |
) | |
estimates$estimator <- c("DIM (Z_1 - Z_0)", "DIM (Z_2 - Z_0)", "DIM (Z_2 - Z_1)") | |
estimates$inquiry <- rownames(estimates) | |
estimates$estimate <- estimates$coefficients | |
estimates$term <- NULL | |
return(estimates) | |
} | |
estimator1 <- declare_estimator(handler = estimation1_fn, label = "est1") | |
estimation2_fn <- function(data) { | |
# Effect of assignment to **either** of the two messages | |
data$Zany <- ifelse(data$Z %in% c(1, 2), 1, 0) | |
res0 <- tidy(lm_robust(Y ~ Zany, data = data)) %>% dplyr::filter(term == "Zany") | |
res0$estimator <- "DIM (Any)" | |
res0$inquiry <- "ate_Y_any" | |
res0$term <- NULL | |
return(res0) | |
} | |
estimator2 <- declare_estimator(handler = estimation2_fn, label = "DIM Any") | |
test1_fn <- function(data) { | |
res0 <- coin::cmh_test(factor(Y) ~ factor(Z), data = data) | |
res <- data.frame(estimator = "CMH Test", p.value = coin::pvalue(res0)[1]) | |
return(res) | |
} | |
test1 <- declare_test(handler = test1_fn, label = "Indep Test") | |
des_plus_test <- des + estimator1 + estimator2 + test1 | |
diagnosis <- diagnose_design(des_plus_test, bootstrap_sims = 0, sims = 1000) | |
diagnosis | |
## Notice bias in the "DIM Any" estimator | |
# Research design diagnosis based on 1000 simulations. Diagnosis completed in 1 mins. | |
# Design Inquiry Estimator Outcome Mean Estimand Mean Estimate | |
#des_plus_test ate_Y_1_0 DIM (Z_1 - Z_0) Y 0.01 0.01 | |
#des_plus_test ate_Y_2_0 DIM (Z_2 - Z_0) Y 0.02 0.02 | |
#des_plus_test ate_Y_2_1 DIM (Z_2 - Z_1) Y 0.01 0.01 | |
#des_plus_test ate_Y_any DIM (Any) Y 0.13 0.02 | |
#des_plus_test <NA> CMH Test <NA> NA NA | |
# Bias SD Estimate RMSE Power Coverage N Sims | |
# 0.00 0.01 0.00 0.45 0.98 1000 | |
# 0.00 0.01 0.00 0.95 0.98 1000 | |
#-0.00 0.01 0.00 0.42 0.99 1000 | |
#-0.12 0.00 0.12 0.87 0.00 1000 | |
# NA NA NA 0.90 NA 1000 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment