Skip to content

Instantly share code, notes, and snippets.

@jwbowers
Created August 8, 2023 19:35
Show Gist options
  • Save jwbowers/b381101666f04c7eb5d287fa16f72135 to your computer and use it in GitHub Desktop.
Save jwbowers/b381101666f04c7eb5d287fa16f72135 to your computer and use it in GitHub Desktop.
Bias in Estimating "Any Treatment" versus Control
library(tidyverse)
library(DeclareDesign)
my_designer <- function(N, tau1, tau2) {
# N - total sample size
# tau1 - difference in proportion control versus "easy" message
# tau2 - difference in proportion control versus "right" message
fixed_pop <- fabricate(
N = N,
tau1 = tau1,
tau2 = tau2,
tau1_0 = pmax(0, rnorm(N, mean = tau1, sd = .005)),
tau2_0 = pmax(0, rnorm(N, mean = mean(tau1_0) + tau2, sd = .005)),
x1 = draw_binary(prob = .5, N = N)
)
pop <- declare_population(fixed_pop)
pot <- declare_potential_outcomes(Y ~ rbinom(
n = N, size = 1,
prob = 0.1 + tau1_0 * (Z == 1) + tau2_0 * (Z == 2) + x1 * .05
), conditions = c(0, 1, 2))
assignment <- declare_assignment(Z = complete_ra(N, num_arms = 3, conditions = c(0, 1, 2)))
reveal <- declare_measurement(Y = reveal_outcomes(Y ~ Z))
ate_estimand <- declare_inquiries(
ate_Y_1_0 = mean(Y_Z_1 - Y_Z_0),
ate_Y_2_0 = mean(Y_Z_2 - Y_Z_0),
ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1),
ate_Y_any = mean((Y_Z_1 + Y_Z_2 > 0)) - mean(Y_Z_0)
)
des_base <- pop + pot + assignment + reveal + ate_estimand
return(des_base)
}
des <- my_designer(N = 7500*3, tau1 = .01, tau2 = .01)
estimation1_fn <- function(data) {
estimates <- rbind.data.frame(
ate_Y_1_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 1),
ate_Y_2_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 2),
ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 1, condition2 = 2)
)
estimates$estimator <- c("DIM (Z_1 - Z_0)", "DIM (Z_2 - Z_0)", "DIM (Z_2 - Z_1)")
estimates$inquiry <- rownames(estimates)
estimates$estimate <- estimates$coefficients
estimates$term <- NULL
return(estimates)
}
estimator1 <- declare_estimator(handler = estimation1_fn, label = "est1")
estimation2_fn <- function(data) {
# Effect of assignment to **either** of the two messages
data$Zany <- ifelse(data$Z %in% c(1, 2), 1, 0)
res0 <- tidy(lm_robust(Y ~ Zany, data = data)) %>% dplyr::filter(term == "Zany")
res0$estimator <- "DIM (Any)"
res0$inquiry <- "ate_Y_any"
res0$term <- NULL
return(res0)
}
estimator2 <- declare_estimator(handler = estimation2_fn, label = "DIM Any")
test1_fn <- function(data) {
res0 <- coin::cmh_test(factor(Y) ~ factor(Z), data = data)
res <- data.frame(estimator = "CMH Test", p.value = coin::pvalue(res0)[1])
return(res)
}
test1 <- declare_test(handler = test1_fn, label = "Indep Test")
des_plus_test <- des + estimator1 + estimator2 + test1
diagnosis <- diagnose_design(des_plus_test, bootstrap_sims = 0, sims = 1000)
diagnosis
## Notice bias in the "DIM Any" estimator
# Research design diagnosis based on 1000 simulations. Diagnosis completed in 1 mins.
# Design Inquiry Estimator Outcome Mean Estimand Mean Estimate
#des_plus_test ate_Y_1_0 DIM (Z_1 - Z_0) Y 0.01 0.01
#des_plus_test ate_Y_2_0 DIM (Z_2 - Z_0) Y 0.02 0.02
#des_plus_test ate_Y_2_1 DIM (Z_2 - Z_1) Y 0.01 0.01
#des_plus_test ate_Y_any DIM (Any) Y 0.13 0.02
#des_plus_test <NA> CMH Test <NA> NA NA
# Bias SD Estimate RMSE Power Coverage N Sims
# 0.00 0.01 0.00 0.45 0.98 1000
# 0.00 0.01 0.00 0.95 0.98 1000
#-0.00 0.01 0.00 0.42 0.99 1000
#-0.12 0.00 0.12 0.87 0.00 1000
# NA NA NA 0.90 NA 1000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment