macartan/structural_model_declared

## structural_model_declared
# Replication code for Wilke, Humphreys 2020, Field Experiments, Theory, and External Validity

# The analysis was run using
## R version 3.5.3 (2019-03-11)
## DeclareDesign version 0.18.0
## tidyverse version 1.2.1
## bbmle version 1.0.20
## knitr version 1.22

# Install and load packages -----------------------------------------------

packages <- c(
  "DeclareDesign",
  "tidyverse",
  "bbmle",
  "knitr")

invisible(lapply(packages, function(x) if (!require(x, character.only=T)){install.packages(x);library(x, character.only = T)}))

rm(packages)


# Set number of simulations -----------------------------------------------

sims = 1000

# Define structural estimator ---------------------------------------------

# Maximum Likelihood Estimator to  estimate kappa (k), delta (d) and q:
structural_estimator <- function(data,
                                 Y = "Y_2_obs",
                                 # function to calculate
                                 # predicted price for rational
                                 # customers given z_i and delta,
                                 # defaults to n=2:
                                 y = function(Z, d)
                                   Z * d + (1 - Z) * (1 - d))

{
  # Define negative log likelihood as a function of kappa, delta and q
  LL  <- function(k, d, q) {
    m <- with(data, y(Z, d))
    R <- q * dbeta(data[Y][[1]], k * 3 / 4, k * 1 / 4) +
      (1 - q) * dbeta(data[Y][[1]], k * m, k * (1 - m))
    - sum(log(R))

  }

  # Estimation
  M <- mle2(
    LL,
    method = "L-BFGS-B",
    start = list(k = 2, d = 0.50,  q = 0.50),
    lower = c(k = 1,    d = 0.01,  q = 0.01),
    upper = c(k = 1000, d = 0.99,  q = 0.99)
  )

  # Format output from estimation
  out <- data.frame(coef(summary(M)), outcome = Y)

  names(out) <- c("estimate", "std.error", "statistic",
                  "p.value", "outcome")

  # Use estimates of q and delta to predict average treatment effects (ATEs)


  # Predicted ATE for n=2
  out[4, 1] <- (1 - out["q", "estimate"]) * (2 * out["d", "estimate"] - 1)

  # Predicted ATE for n=infinity
  out[5, 1] <- (1 - out["q", "estimate"]) * (2 * out["d", "estimate"] /
                                               (1 + out["d", "estimate"]) - 1)

  out
}


# Declare the design ----------------------------------------------------------

# Define parameter values:

N = 500       # Sample size
d = 0.8       # True delta (unknown)
k = 6         # Nuisance parameter that affects variance (unknown)
q = 0.5       # Share of behavioral types in the population (unknown)
e = 3/4       # Price paid by behavioral customers (known)
random = TRUE # Switch to control whether
# first movers are randomly assigned

# Design declaration:

design <-

  # Define the population
  declare_population(N = N,
                     # indicator for behavioral type (norm = 1)
                     norm = rbinom(N, 1, q),
                     # probablity of going first without random assignment
                     p = ifelse(norm == 1, .8, .5)
  ) +

  # Define mean potential outcomes for n = 2
  declare_potential_outcomes(Y_2 ~ norm*e + (1-norm)*(Z*d + (1-Z)*(1-d))) +

  # Define mean potential outcomes for n = infinity
  declare_potential_outcomes(Y_inf ~ norm*e +
                               (1-norm)*(Z*d/(1+d) +
                                           (1-Z)*(1-d/(1+d)))) +

  # Define estimands (quantities we want to estimate)
  declare_estimand(ATE_2 = mean(Y_2_Z_1 - Y_2_Z_0),       # ATE n = 2
                   ATE_inf = mean(Y_inf_Z_1 - Y_inf_Z_0), # ATE n = infinity
                   k = k,                                 # kappa
                   d = d,                                 # delta
                   q = q) +                               # q

  # Declare assignment process (random assignment if random = TRUE)
  declare_assignment(prob_unit = if(random) rep(0.5, N) else p, simple = TRUE) +

  # Declare revealed potential outcomes
  declare_reveal(Y_2,   Z) +
  declare_reveal(Y_inf, Z) +

  # Get draws from beta distribution given means for n = 2 and n = infinity
  declare_step(fabricate, Y_2_obs   = rbeta(N, Y_2*k, (1-Y_2)*k),
               Y_inf_obs = rbeta(N, Y_inf*k, (1-Y_inf)*k)
  ) +

  # Declare estimators

  # Difference-in-means for n = 2
  declare_estimator(Y_2_obs ~ Z,
                    estimand = "ATE_2",
                    label = "DIM_2") +

  # Difference-in-means for n = infinity
  declare_estimator(Y_inf_obs ~ Z,
                    estimand = "ATE_inf",
                    label = "DIM_inf") +

  # MLE for n = 2
  declare_estimator(handler = tidy_estimator(structural_estimator),
                    estimand = c("k","d", "q", "ATE_2", "ATE_inf"),
                    label = "Struc_2") +

  # MLE for n = infinity
  declare_estimator(handler = tidy_estimator(structural_estimator),
                    Y = "Y_inf_obs",
                    y = function(Z, d) Z*d/(1+d) +  (1-Z)*(1-d/(1+d)),
                    estimand = c("k","d","q","ATE_2", "ATE_inf"),
                    label = "Struc_inf")


# Diagnose the design -----------------------------------------------------

diagnosis_1 <- diagnose_design(design, sims = sims)

# Change design to remove random assignment -------------------------------

design_2    <- redesign(design, random = FALSE)

# Diagnose design without random assignment -------------------------------

diagnosis_2 <- diagnose_design(design_2, sims = sims)


# Change design to assume q = 0 -------------------------------------------

# Turn random assignment on again
design_3    <- redesign(design_2, random = TRUE)

# New estimator that assumes q = 0
structural_estimator_2 <- function(data,
                                   Y = "Y_2_obs",
                                   # function to calculate
                                   # predicted price for rational
                                   # customers given z_i and delta,
                                   # defaults to n=2:
                                   y = function(Z, d)
                                     Z * d + (1 - Z) * (1 - d)) {
  # Define negative log likelihood as a function of kappa and delta
  LL  <- function(k, d) {
    m <- with(data, y(Z, d))
    R <- dbeta(data[Y][[1]], k * m, k * (1 - m))
    - sum(log(R))
  }
  # Estimation
  M   <- mle2(LL, start = list(k  = 5, d = 0.5))

  # Format output from estimation
  out <- data.frame(coef(summary(M)),  outcome = Y)
  names(out) <- c("estimate", "std.error", "statistic",
                  "p.value",  "outcome")

  # Use estimates of delta to predict average treatment effects (ATEs)

  # Predicted ATE for n=2
  out[3, "estimate"] <- 2 * out["d", "estimate"] - 1

  # Predicted ATE for n=infinity
  out[4, "estimate"] <- 2 * (out["d", "estimate"] / (1 + out["d", "estimate"])) - 1
  out
}

# Update MLE for n = 2
design_3 <- replace_step(
  design_3,
  11,
  declare_estimator(
    handler =
      tidy_estimator(structural_estimator_2),
    estimand =
      c("k", "d",  "ATE_2", "ATE_inf"),
    label = "Struc_2_norm"
  )
)

# Update MLE for n = infinity
design_3 <- replace_step(
  design_3,
  12,
  declare_estimator(
    handler =
      tidy_estimator(structural_estimator_2),
    Y = "Y_inf_obs",
    y = function(Z, d)
      Z * d / (1 + d) + (1 - Z) *
      (1 - d / (1 + d)),
    estimand = c("k", "d", "ATE_2", "ATE_inf"),
    label = "Struc_inf_norm"
  )
)


# Diagnose design with wrong model ----------------------------------------

diagnosis_3 <- diagnose_design(design_3 , sims = sims)

# Change data generating process so that behavioral customers pay 1/2 -----

design_4    <- redesign(design_3, e = 1/2)

# Diagnose design with wrong but observationally equivalent model ---------

diagnosis_4 <- diagnose_design(design_4 , sims = sims)


# Helper function to clean diagnoses --------------------------------------

cleanDiagn <- function(diagnosis) {
  out <- reshape_diagnosis(diagnosis)
  out[out=="NA"] <- ""
  out <- out[!out$`Design Label` == "",]
  names(out)[which(names(out) == "Estimator Label")] <- "Estimator"
  names(out)[which(names(out) == "Mean Estimand")]   <- "Estimand"
  return(out)
}


# Result Tables -----------------------------------------------------------

# Tables in the main text refer to maximum likelihood estimators as "MLE_n"
# The code here reproduces the tables in the appendix which refer to the same estimators as "Struc_n"
# Specifically:
# MLE_2 = Struc_2
# MLE_{\infty} = Struc_inf
# MLE'_2 = Struc_2_norm
# MLE'_{\infty} = Struc_inf_norm

# Diagnosis of design with correct model (tables 2, 3 and 4 in text and table 8 in appendix)

result1 <- cleanDiagn(diagnosis_1)
kable(result1[, - c(1, 4, 5, 8, 9, 11:13)], row.names = F, caption = "Detailed diagnosis of design with correct model")


# Diagnosis of design with correct model but no randomization (table 5 in text and table 9 in appendix)

result2 <- cleanDiagn(diagnosis_2)
kable(result2[, - c(1, 2, 5, 6, 9, 10, 12:14)], row.names = F,
      caption = "Detailed diagnosis of design with correct model but no randomization")

# Diagnosis of design with incorrect model (6 in text and table 10 in appendix)

diagnosis_3$diagnosands_df

result3 <- cleanDiagn(diagnosis_3)
kable(result3[, - c(1:2, 5, 6, 9, 10, 12:14)], row.names = F,
      caption = "Detailed diagnosis of design with incorrect model (assume $q=0$)")


# Diagnosis of design with incorrect yet observationally equivalent model (table 7 in text and table 11 in appendix)

result4 <- cleanDiagn(diagnosis_4)
kable(result4[, - c(1:2, 5, 6, 9, 10, 12:14)], row.names = F,
      caption = "Detailed Diagnosis of design with incorrect yet observationally equivalent model")
	# Replication code for Wilke, Humphreys 2020, Field Experiments, Theory, and External Validity

	# The analysis was run using
	## R version 3.5.3 (2019-03-11)
	## DeclareDesign version 0.18.0
	## tidyverse version 1.2.1
	## bbmle version 1.0.20
	## knitr version 1.22

	# Install and load packages -----------------------------------------------

	packages <- c(
	"DeclareDesign",
	"tidyverse",
	"bbmle",
	"knitr")

	invisible(lapply(packages, function(x) if (!require(x, character.only=T)){install.packages(x);library(x, character.only = T)}))

	rm(packages)



	# Set number of simulations -----------------------------------------------

	sims = 1000

	# Define structural estimator ---------------------------------------------

	# Maximum Likelihood Estimator to estimate kappa (k), delta (d) and q:
	structural_estimator <- function(data,
	Y = "Y_2_obs",
	# function to calculate
	# predicted price for rational
	# customers given z_i and delta,
	# defaults to n=2:
	y = function(Z, d)
	Z * d + (1 - Z) * (1 - d))

	{
	# Define negative log likelihood as a function of kappa, delta and q
	LL <- function(k, d, q) {
	m <- with(data, y(Z, d))
	R <- q * dbeta(data[Y][[1]], k * 3 / 4, k * 1 / 4) +
	(1 - q) * dbeta(data[Y][[1]], k * m, k * (1 - m))
	- sum(log(R))

	}

	# Estimation
	M <- mle2(
	LL,
	method = "L-BFGS-B",
	start = list(k = 2, d = 0.50, q = 0.50),
	lower = c(k = 1, d = 0.01, q = 0.01),
	upper = c(k = 1000, d = 0.99, q = 0.99)
	)

	# Format output from estimation
	out <- data.frame(coef(summary(M)), outcome = Y)

	names(out) <- c("estimate", "std.error", "statistic",
	"p.value", "outcome")

	# Use estimates of q and delta to predict average treatment effects (ATEs)


	# Predicted ATE for n=2
	out[4, 1] <- (1 - out["q", "estimate"]) * (2 * out["d", "estimate"] - 1)

	# Predicted ATE for n=infinity
	out[5, 1] <- (1 - out["q", "estimate"]) * (2 * out["d", "estimate"] /
	(1 + out["d", "estimate"]) - 1)

	out
	}


	# Declare the design ----------------------------------------------------------

	# Define parameter values:

	N = 500 # Sample size
	d = 0.8 # True delta (unknown)
	k = 6 # Nuisance parameter that affects variance (unknown)
	q = 0.5 # Share of behavioral types in the population (unknown)
	e = 3/4 # Price paid by behavioral customers (known)
	random = TRUE # Switch to control whether
	# first movers are randomly assigned

	# Design declaration:

	design <-

	# Define the population
	declare_population(N = N,
	# indicator for behavioral type (norm = 1)
	norm = rbinom(N, 1, q),
	# probablity of going first without random assignment
	p = ifelse(norm == 1, .8, .5)
	) +

	# Define mean potential outcomes for n = 2
	declare_potential_outcomes(Y_2 ~ norme + (1-norm)(Zd + (1-Z)(1-d))) +

	# Define mean potential outcomes for n = infinity
	declare_potential_outcomes(Y_inf ~ norm*e +
	(1-norm)(Zd/(1+d) +
	(1-Z)*(1-d/(1+d)))) +

	# Define estimands (quantities we want to estimate)
	declare_estimand(ATE_2 = mean(Y_2_Z_1 - Y_2_Z_0), # ATE n = 2
	ATE_inf = mean(Y_inf_Z_1 - Y_inf_Z_0), # ATE n = infinity
	k = k, # kappa
	d = d, # delta
	q = q) + # q

	# Declare assignment process (random assignment if random = TRUE)
	declare_assignment(prob_unit = if(random) rep(0.5, N) else p, simple = TRUE) +

	# Declare revealed potential outcomes
	declare_reveal(Y_2, Z) +
	declare_reveal(Y_inf, Z) +

	# Get draws from beta distribution given means for n = 2 and n = infinity
	declare_step(fabricate, Y_2_obs = rbeta(N, Y_2k, (1-Y_2)k),
	Y_inf_obs = rbeta(N, Y_infk, (1-Y_inf)k)
	) +

	# Declare estimators

	# Difference-in-means for n = 2
	declare_estimator(Y_2_obs ~ Z,
	estimand = "ATE_2",
	label = "DIM_2") +

	# Difference-in-means for n = infinity
	declare_estimator(Y_inf_obs ~ Z,
	estimand = "ATE_inf",
	label = "DIM_inf") +

	# MLE for n = 2
	declare_estimator(handler = tidy_estimator(structural_estimator),
	estimand = c("k","d", "q", "ATE_2", "ATE_inf"),
	label = "Struc_2") +

	# MLE for n = infinity
	declare_estimator(handler = tidy_estimator(structural_estimator),
	Y = "Y_inf_obs",
	y = function(Z, d) Zd/(1+d) + (1-Z)(1-d/(1+d)),
	estimand = c("k","d","q","ATE_2", "ATE_inf"),
	label = "Struc_inf")


	# Diagnose the design -----------------------------------------------------

	diagnosis_1 <- diagnose_design(design, sims = sims)

	# Change design to remove random assignment -------------------------------

	design_2 <- redesign(design, random = FALSE)

	# Diagnose design without random assignment -------------------------------

	diagnosis_2 <- diagnose_design(design_2, sims = sims)



	# Change design to assume q = 0 -------------------------------------------

	# Turn random assignment on again
	design_3 <- redesign(design_2, random = TRUE)

	# New estimator that assumes q = 0
	structural_estimator_2 <- function(data,
	Y = "Y_2_obs",
	# function to calculate
	# predicted price for rational
	# customers given z_i and delta,
	# defaults to n=2:
	y = function(Z, d)
	Z * d + (1 - Z) * (1 - d)) {
	# Define negative log likelihood as a function of kappa and delta
	LL <- function(k, d) {
	m <- with(data, y(Z, d))
	R <- dbeta(data[Y][[1]], k * m, k * (1 - m))
	- sum(log(R))
	}
	# Estimation
	M <- mle2(LL, start = list(k = 5, d = 0.5))

	# Format output from estimation
	out <- data.frame(coef(summary(M)), outcome = Y)
	names(out) <- c("estimate", "std.error", "statistic",
	"p.value", "outcome")

	# Use estimates of delta to predict average treatment effects (ATEs)

	# Predicted ATE for n=2
	out[3, "estimate"] <- 2 * out["d", "estimate"] - 1

	# Predicted ATE for n=infinity
	out[4, "estimate"] <- 2 * (out["d", "estimate"] / (1 + out["d", "estimate"])) - 1
	out
	}

	# Update MLE for n = 2
	design_3 <- replace_step(
	design_3,
	11,
	declare_estimator(
	handler =
	tidy_estimator(structural_estimator_2),
	estimand =
	c("k", "d", "ATE_2", "ATE_inf"),
	label = "Struc_2_norm"
	)
	)

	# Update MLE for n = infinity
	design_3 <- replace_step(
	design_3,
	12,
	declare_estimator(
	handler =
	tidy_estimator(structural_estimator_2),
	Y = "Y_inf_obs",
	y = function(Z, d)
	Z * d / (1 + d) + (1 - Z) *
	(1 - d / (1 + d)),
	estimand = c("k", "d", "ATE_2", "ATE_inf"),
	label = "Struc_inf_norm"
	)
	)


	# Diagnose design with wrong model ----------------------------------------

	diagnosis_3 <- diagnose_design(design_3 , sims = sims)

	# Change data generating process so that behavioral customers pay 1/2 -----

	design_4 <- redesign(design_3, e = 1/2)

	# Diagnose design with wrong but observationally equivalent model ---------

	diagnosis_4 <- diagnose_design(design_4 , sims = sims)



	# Helper function to clean diagnoses --------------------------------------

	cleanDiagn <- function(diagnosis) {
	out <- reshape_diagnosis(diagnosis)
	out[out=="NA"] <- ""
	out <- out[!out$`Design Label` == "",]
	names(out)[which(names(out) == "Estimator Label")] <- "Estimator"
	names(out)[which(names(out) == "Mean Estimand")] <- "Estimand"
	return(out)
	}


	# Result Tables -----------------------------------------------------------

	# Tables in the main text refer to maximum likelihood estimators as "MLE_n"
	# The code here reproduces the tables in the appendix which refer to the same estimators as "Struc_n"
	# Specifically:
	# MLE_2 = Struc_2
	# MLE_{\infty} = Struc_inf
	# MLE'_2 = Struc_2_norm
	# MLE'_{\infty} = Struc_inf_norm

	# Diagnosis of design with correct model (tables 2, 3 and 4 in text and table 8 in appendix)

	result1 <- cleanDiagn(diagnosis_1)
	kable(result1[, - c(1, 4, 5, 8, 9, 11:13)], row.names = F, caption = "Detailed diagnosis of design with correct model")


	# Diagnosis of design with correct model but no randomization (table 5 in text and table 9 in appendix)

	result2 <- cleanDiagn(diagnosis_2)
	kable(result2[, - c(1, 2, 5, 6, 9, 10, 12:14)], row.names = F,
	caption = "Detailed diagnosis of design with correct model but no randomization")

	# Diagnosis of design with incorrect model (6 in text and table 10 in appendix)

	diagnosis_3$diagnosands_df

	result3 <- cleanDiagn(diagnosis_3)
	kable(result3[, - c(1:2, 5, 6, 9, 10, 12:14)], row.names = F,
	caption = "Detailed diagnosis of design with incorrect model (assume $q=0$)")


	# Diagnosis of design with incorrect yet observationally equivalent model (table 7 in text and table 11 in appendix)

	result4 <- cleanDiagn(diagnosis_4)
	kable(result4[, - c(1:2, 5, 6, 9, 10, 12:14)], row.names = F,
	caption = "Detailed Diagnosis of design with incorrect yet observationally equivalent model")