jwbowers/anytreatment_vs_control.R

## anytreatment_vs_control.R
library(tidyverse)
library(DeclareDesign)

my_designer <- function(N, tau1, tau2) {
  # N - total sample size
  # tau1 - difference in proportion control versus "easy" message
  # tau2 - difference in proportion control versus "right" message

  fixed_pop <- fabricate(
    N = N,
    tau1 = tau1,
    tau2 = tau2,
    tau1_0 = pmax(0, rnorm(N, mean = tau1, sd = .005)),
    tau2_0 = pmax(0, rnorm(N, mean = mean(tau1_0) + tau2, sd = .005)),
    x1 = draw_binary(prob = .5, N = N)
  )

  pop <- declare_population(fixed_pop)

  pot <- declare_potential_outcomes(Y ~ rbinom(
    n = N, size = 1,
    prob = 0.1 + tau1_0 * (Z == 1) + tau2_0 * (Z == 2) + x1 * .05
  ), conditions = c(0, 1, 2))

  assignment <- declare_assignment(Z = complete_ra(N, num_arms = 3, conditions = c(0, 1, 2)))

  reveal <- declare_measurement(Y = reveal_outcomes(Y ~ Z))

  ate_estimand <- declare_inquiries(
    ate_Y_1_0 = mean(Y_Z_1 - Y_Z_0),
    ate_Y_2_0 = mean(Y_Z_2 - Y_Z_0),
    ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1),
    ate_Y_any = mean((Y_Z_1 + Y_Z_2 > 0)) - mean(Y_Z_0)
  )

  des_base <- pop + pot + assignment + reveal + ate_estimand

  return(des_base)
}

des <- my_designer(N = 7500*3, tau1 = .01, tau2 = .01)

estimation1_fn <- function(data) {
  estimates <- rbind.data.frame(
    ate_Y_1_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 1),
    ate_Y_2_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 2),
    ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 1, condition2 = 2)
  )
  estimates$estimator <- c("DIM (Z_1 - Z_0)", "DIM (Z_2 - Z_0)", "DIM (Z_2 - Z_1)")
  estimates$inquiry <- rownames(estimates)
  estimates$estimate <- estimates$coefficients
  estimates$term <- NULL
  return(estimates)
}

estimator1 <- declare_estimator(handler = estimation1_fn, label = "est1")

estimation2_fn <- function(data) {
# Effect of assignment to **either** of the two messages
  data$Zany <- ifelse(data$Z %in% c(1, 2), 1, 0)
  res0 <- tidy(lm_robust(Y ~ Zany, data = data)) %>% dplyr::filter(term == "Zany")
  res0$estimator <- "DIM (Any)"
  res0$inquiry <- "ate_Y_any"
  res0$term <- NULL
  return(res0)
}

estimator2 <- declare_estimator(handler = estimation2_fn, label = "DIM Any")

test1_fn <- function(data) {
  res0 <- coin::cmh_test(factor(Y) ~ factor(Z), data = data)
  res <- data.frame(estimator = "CMH Test", p.value = coin::pvalue(res0)[1])
  return(res)
}

test1 <- declare_test(handler = test1_fn, label = "Indep Test")

des_plus_test <- des + estimator1 + estimator2 + test1

diagnosis <- diagnose_design(des_plus_test, bootstrap_sims = 0, sims = 1000)

diagnosis

## Notice bias in the "DIM Any" estimator
# Research design diagnosis based on 1000 simulations. Diagnosis completed in 1 mins.

 #       Design   Inquiry       Estimator Outcome Mean Estimand Mean Estimate
 #des_plus_test ate_Y_1_0 DIM (Z_1 - Z_0)       Y          0.01          0.01
 #des_plus_test ate_Y_2_0 DIM (Z_2 - Z_0)       Y          0.02          0.02
 #des_plus_test ate_Y_2_1 DIM (Z_2 - Z_1)       Y          0.01          0.01
 #des_plus_test ate_Y_any       DIM (Any)       Y          0.13          0.02
 #des_plus_test      <NA>        CMH Test    <NA>            NA            NA
 # Bias SD Estimate RMSE Power Coverage N Sims
 # 0.00        0.01 0.00  0.45     0.98   1000
 # 0.00        0.01 0.00  0.95     0.98   1000
 #-0.00        0.01 0.00  0.42     0.99   1000
 #-0.12        0.00 0.12  0.87     0.00   1000
 #   NA          NA   NA  0.90       NA   1000
	library(tidyverse)
	library(DeclareDesign)

	my_designer <- function(N, tau1, tau2) {
	# N - total sample size
	# tau1 - difference in proportion control versus "easy" message
	# tau2 - difference in proportion control versus "right" message

	fixed_pop <- fabricate(
	N = N,
	tau1 = tau1,
	tau2 = tau2,
	tau1_0 = pmax(0, rnorm(N, mean = tau1, sd = .005)),
	tau2_0 = pmax(0, rnorm(N, mean = mean(tau1_0) + tau2, sd = .005)),
	x1 = draw_binary(prob = .5, N = N)
	)

	pop <- declare_population(fixed_pop)

	pot <- declare_potential_outcomes(Y ~ rbinom(
	n = N, size = 1,
	prob = 0.1 + tau1_0 * (Z == 1) + tau2_0 * (Z == 2) + x1 * .05
	), conditions = c(0, 1, 2))

	assignment <- declare_assignment(Z = complete_ra(N, num_arms = 3, conditions = c(0, 1, 2)))

	reveal <- declare_measurement(Y = reveal_outcomes(Y ~ Z))

	ate_estimand <- declare_inquiries(
	ate_Y_1_0 = mean(Y_Z_1 - Y_Z_0),
	ate_Y_2_0 = mean(Y_Z_2 - Y_Z_0),
	ate_Y_2_1 = mean(Y_Z_2 - Y_Z_1),
	ate_Y_any = mean((Y_Z_1 + Y_Z_2 > 0)) - mean(Y_Z_0)
	)

	des_base <- pop + pot + assignment + reveal + ate_estimand

	return(des_base)
	}

	des <- my_designer(N = 7500*3, tau1 = .01, tau2 = .01)

	estimation1_fn <- function(data) {
	estimates <- rbind.data.frame(
	ate_Y_1_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 1),
	ate_Y_2_0 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 0, condition2 = 2),
	ate_Y_2_1 = difference_in_means(formula = Y ~ Z, data = data, condition1 = 1, condition2 = 2)
	)
	estimates$estimator <- c("DIM (Z_1 - Z_0)", "DIM (Z_2 - Z_0)", "DIM (Z_2 - Z_1)")
	estimates$inquiry <- rownames(estimates)
	estimates$estimate <- estimates$coefficients
	estimates$term <- NULL
	return(estimates)
	}

	estimator1 <- declare_estimator(handler = estimation1_fn, label = "est1")

	estimation2_fn <- function(data) {
	# Effect of assignment to either of the two messages
	data$Zany <- ifelse(data$Z %in% c(1, 2), 1, 0)
	res0 <- tidy(lm_robust(Y ~ Zany, data = data)) %>% dplyr::filter(term == "Zany")
	res0$estimator <- "DIM (Any)"
	res0$inquiry <- "ate_Y_any"
	res0$term <- NULL
	return(res0)
	}

	estimator2 <- declare_estimator(handler = estimation2_fn, label = "DIM Any")

	test1_fn <- function(data) {
	res0 <- coin::cmh_test(factor(Y) ~ factor(Z), data = data)
	res <- data.frame(estimator = "CMH Test", p.value = coin::pvalue(res0)[1])
	return(res)
	}

	test1 <- declare_test(handler = test1_fn, label = "Indep Test")

	des_plus_test <- des + estimator1 + estimator2 + test1

	diagnosis <- diagnose_design(des_plus_test, bootstrap_sims = 0, sims = 1000)

	diagnosis

	## Notice bias in the "DIM Any" estimator
	# Research design diagnosis based on 1000 simulations. Diagnosis completed in 1 mins.

	# Design Inquiry Estimator Outcome Mean Estimand Mean Estimate
	#des_plus_test ate_Y_1_0 DIM (Z_1 - Z_0) Y 0.01 0.01
	#des_plus_test ate_Y_2_0 DIM (Z_2 - Z_0) Y 0.02 0.02
	#des_plus_test ate_Y_2_1 DIM (Z_2 - Z_1) Y 0.01 0.01
	#des_plus_test ate_Y_any DIM (Any) Y 0.13 0.02
	#des_plus_test <NA> CMH Test <NA> NA NA
	# Bias SD Estimate RMSE Power Coverage N Sims
	# 0.00 0.01 0.00 0.45 0.98 1000
	# 0.00 0.01 0.00 0.95 0.98 1000
	#-0.00 0.01 0.00 0.42 0.99 1000
	#-0.12 0.00 0.12 0.87 0.00 1000
	# NA NA NA 0.90 NA 1000