# Replication code for Wilke, Humphreys 2020, Field Experiments, Theory, and External Validity # The analysis was run using ## R version 3.5.3 (2019-03-11) ## DeclareDesign version 0.18.0 ## tidyverse version 1.2.1 ## bbmle version 1.0.20 ## knitr version 1.22 # Install and load packages ----------------------------------------------- packages <- c( "DeclareDesign", "tidyverse", "bbmle", "knitr") invisible(lapply(packages, function(x) if (!require(x, character.only=T)){install.packages(x);library(x, character.only = T)})) rm(packages) # Set number of simulations ----------------------------------------------- sims = 1000 # Define structural estimator --------------------------------------------- # Maximum Likelihood Estimator to estimate kappa (k), delta (d) and q: structural_estimator <- function(data, Y = "Y_2_obs", # function to calculate # predicted price for rational # customers given z_i and delta, # defaults to n=2: y = function(Z, d) Z * d + (1 - Z) * (1 - d)) { # Define negative log likelihood as a function of kappa, delta and q LL <- function(k, d, q) { m <- with(data, y(Z, d)) R <- q * dbeta(data[Y][], k * 3 / 4, k * 1 / 4) + (1 - q) * dbeta(data[Y][], k * m, k * (1 - m)) - sum(log(R)) } # Estimation M <- mle2( LL, method = "L-BFGS-B", start = list(k = 2, d = 0.50, q = 0.50), lower = c(k = 1, d = 0.01, q = 0.01), upper = c(k = 1000, d = 0.99, q = 0.99) ) # Format output from estimation out <- data.frame(coef(summary(M)), outcome = Y) names(out) <- c("estimate", "std.error", "statistic", "p.value", "outcome") # Use estimates of q and delta to predict average treatment effects (ATEs) # Predicted ATE for n=2 out[4, 1] <- (1 - out["q", "estimate"]) * (2 * out["d", "estimate"] - 1) # Predicted ATE for n=infinity out[5, 1] <- (1 - out["q", "estimate"]) * (2 * out["d", "estimate"] / (1 + out["d", "estimate"]) - 1) out } # Declare the design ---------------------------------------------------------- # Define parameter values: N = 500 # Sample size d = 0.8 # True delta (unknown) k = 6 # Nuisance parameter that affects variance (unknown) q = 0.5 # Share of behavioral types in the population (unknown) e = 3/4 # Price paid by behavioral customers (known) random = TRUE # Switch to control whether # first movers are randomly assigned # Design declaration: design <- # Define the population declare_population(N = N, # indicator for behavioral type (norm = 1) norm = rbinom(N, 1, q), # probablity of going first without random assignment p = ifelse(norm == 1, .8, .5) ) + # Define mean potential outcomes for n = 2 declare_potential_outcomes(Y_2 ~ norm*e + (1-norm)*(Z*d + (1-Z)*(1-d))) + # Define mean potential outcomes for n = infinity declare_potential_outcomes(Y_inf ~ norm*e + (1-norm)*(Z*d/(1+d) + (1-Z)*(1-d/(1+d)))) + # Define estimands (quantities we want to estimate) declare_estimand(ATE_2 = mean(Y_2_Z_1 - Y_2_Z_0), # ATE n = 2 ATE_inf = mean(Y_inf_Z_1 - Y_inf_Z_0), # ATE n = infinity k = k, # kappa d = d, # delta q = q) + # q # Declare assignment process (random assignment if random = TRUE) declare_assignment(prob_unit = if(random) rep(0.5, N) else p, simple = TRUE) + # Declare revealed potential outcomes declare_reveal(Y_2, Z) + declare_reveal(Y_inf, Z) + # Get draws from beta distribution given means for n = 2 and n = infinity declare_step(fabricate, Y_2_obs = rbeta(N, Y_2*k, (1-Y_2)*k), Y_inf_obs = rbeta(N, Y_inf*k, (1-Y_inf)*k) ) + # Declare estimators # Difference-in-means for n = 2 declare_estimator(Y_2_obs ~ Z, estimand = "ATE_2", label = "DIM_2") + # Difference-in-means for n = infinity declare_estimator(Y_inf_obs ~ Z, estimand = "ATE_inf", label = "DIM_inf") + # MLE for n = 2 declare_estimator(handler = tidy_estimator(structural_estimator), estimand = c("k","d", "q", "ATE_2", "ATE_inf"), label = "Struc_2") + # MLE for n = infinity declare_estimator(handler = tidy_estimator(structural_estimator), Y = "Y_inf_obs", y = function(Z, d) Z*d/(1+d) + (1-Z)*(1-d/(1+d)), estimand = c("k","d","q","ATE_2", "ATE_inf"), label = "Struc_inf") # Diagnose the design ----------------------------------------------------- diagnosis_1 <- diagnose_design(design, sims = sims) # Change design to remove random assignment ------------------------------- design_2 <- redesign(design, random = FALSE) # Diagnose design without random assignment ------------------------------- diagnosis_2 <- diagnose_design(design_2, sims = sims) # Change design to assume q = 0 ------------------------------------------- # Turn random assignment on again design_3 <- redesign(design_2, random = TRUE) # New estimator that assumes q = 0 structural_estimator_2 <- function(data, Y = "Y_2_obs", # function to calculate # predicted price for rational # customers given z_i and delta, # defaults to n=2: y = function(Z, d) Z * d + (1 - Z) * (1 - d)) { # Define negative log likelihood as a function of kappa and delta LL <- function(k, d) { m <- with(data, y(Z, d)) R <- dbeta(data[Y][], k * m, k * (1 - m)) - sum(log(R)) } # Estimation M <- mle2(LL, start = list(k = 5, d = 0.5)) # Format output from estimation out <- data.frame(coef(summary(M)), outcome = Y) names(out) <- c("estimate", "std.error", "statistic", "p.value", "outcome") # Use estimates of delta to predict average treatment effects (ATEs) # Predicted ATE for n=2 out[3, "estimate"] <- 2 * out["d", "estimate"] - 1 # Predicted ATE for n=infinity out[4, "estimate"] <- 2 * (out["d", "estimate"] / (1 + out["d", "estimate"])) - 1 out } # Update MLE for n = 2 design_3 <- replace_step( design_3, 11, declare_estimator( handler = tidy_estimator(structural_estimator_2), estimand = c("k", "d", "ATE_2", "ATE_inf"), label = "Struc_2_norm" ) ) # Update MLE for n = infinity design_3 <- replace_step( design_3, 12, declare_estimator( handler = tidy_estimator(structural_estimator_2), Y = "Y_inf_obs", y = function(Z, d) Z * d / (1 + d) + (1 - Z) * (1 - d / (1 + d)), estimand = c("k", "d", "ATE_2", "ATE_inf"), label = "Struc_inf_norm" ) ) # Diagnose design with wrong model ---------------------------------------- diagnosis_3 <- diagnose_design(design_3 , sims = sims) # Change data generating process so that behavioral customers pay 1/2 ----- design_4 <- redesign(design_3, e = 1/2) # Diagnose design with wrong but observationally equivalent model --------- diagnosis_4 <- diagnose_design(design_4 , sims = sims) # Helper function to clean diagnoses -------------------------------------- cleanDiagn <- function(diagnosis) { out <- reshape_diagnosis(diagnosis) out[out=="NA"] <- "" out <- out[!out$Design Label == "",] names(out)[which(names(out) == "Estimator Label")] <- "Estimator" names(out)[which(names(out) == "Mean Estimand")] <- "Estimand" return(out) } # Result Tables ----------------------------------------------------------- # Tables in the main text refer to maximum likelihood estimators as "MLE_n" # The code here reproduces the tables in the appendix which refer to the same estimators as "Struc_n" # Specifically: # MLE_2 = Struc_2 # MLE_{\infty} = Struc_inf # MLE'_2 = Struc_2_norm # MLE'_{\infty} = Struc_inf_norm # Diagnosis of design with correct model (tables 2, 3 and 4 in text and table 8 in appendix) result1 <- cleanDiagn(diagnosis_1) kable(result1[, - c(1, 4, 5, 8, 9, 11:13)], row.names = F, caption = "Detailed diagnosis of design with correct model") # Diagnosis of design with correct model but no randomization (table 5 in text and table 9 in appendix) result2 <- cleanDiagn(diagnosis_2) kable(result2[, - c(1, 2, 5, 6, 9, 10, 12:14)], row.names = F, caption = "Detailed diagnosis of design with correct model but no randomization") # Diagnosis of design with incorrect model (6 in text and table 10 in appendix) diagnosis_3$diagnosands_df result3 <- cleanDiagn(diagnosis_3) kable(result3[, - c(1:2, 5, 6, 9, 10, 12:14)], row.names = F, caption = "Detailed diagnosis of design with incorrect model (assume $q=0$)") # Diagnosis of design with incorrect yet observationally equivalent model (table 7 in text and table 11 in appendix) result4 <- cleanDiagn(diagnosis_4) kable(result4[, - c(1:2, 5, 6, 9, 10, 12:14)], row.names = F, caption = "Detailed Diagnosis of design with incorrect yet observationally equivalent model")