Last active
May 29, 2023 03:08
-
-
Save apoorvalal/074e5787ff0176f3d3a91b98be18c2b4 to your computer and use it in GitHub Desktop.
simulator to compare did/sc/sdid/augsynth/eb/ebhr for ATT estimation in panel settings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pacman::p_load(synthdid, ebal, glue, augsynth, MCPanel, glue) | |
# needs https://github.com/apoorvalal/ebal - solves ebal problem in torch - far more stable than old version | |
# remotes::install_github("apoorvalal/ebal") | |
# %% simulator for panel balancing | |
#' @param n number of units | |
#' @param t number of time periods | |
#' @param parallel_trends boolean for parallel trends | |
#' @param random_assignment boolean for random assignment of treatment | |
#' @param σ noise level in mapping from factor to outcome | |
#' @param τ true treatment effect (only applies in the t-th period) | |
#' @param treatfn function to generate treatment (must return value in (0,1)) | |
#' @param return_weights boolean for whether a matrix of unit weights should be returned | |
#' @return if return_weights, list with biases and weights, else vector with biases | |
oneSim = \(n = 1000L, t = 7L, | |
parallel_trends = FALSE, random_assignment = FALSE, | |
σ = 0.1, τ = 0, | |
treatfn = plogis, | |
return_weights = FALSE) { | |
d = data.table(id = 1:n) | |
# single unobserved factor that is increasing in rowid | |
d[, x := rnorm(1, mean = (.I / n) - 0.5, sd = 0.5), id] | |
if (random_assignment) { | |
d[, w := 1 * rbinom(1, 1, 0.5), id] # last 500 obs treated | |
} else { | |
d[, w := 1 * rbinom(1, 1, treatfn(x)), id] # assignment fn of factor | |
} | |
for (t in 1:t) { | |
if (parallel_trends) { # parallel trends | |
set(d, , paste0("y", t), d$x + rnorm(n, 0, σ)) | |
} else { # time trends - y_t = x * U(0.1, 0.5) * t | |
set(d, , paste0("y", t), d$x * runif(1, 0.1, 0.5) * t + rnorm(n, 0, σ)) | |
} | |
} | |
# final period outcome | |
yfin = paste0("y", t) | |
set(d, , yfin, d[[yfin]] + τ * d$w) | |
###################################################################### | |
# did | |
d[, mean_lag_y := rowMeans(.SD), by = id, .SDcols = paste0("y", 1:(t - 1))] | |
d[, diffY := get(yfin) - mean_lag_y] | |
did_est = lm(diffY ~ w, d)$coefficients[2] %>% unname | |
# sdid - prep outcome matrix - N X T with first N0 control units and N1 treat | |
ycols = grep("^y+", colnames(d), value = TRUE) | |
ymat = d[, ..ycols] %>% as.matrix() | |
treatIDs = which(d$w == 1) ; nt = length(treatIDs); nc = n - nt | |
# stack untreated and treated units | |
ymat = rbind(ymat[-treatIDs, ], ymat[treatIDs, ]) | |
# both sdid and sc accept ymat, Nctrl, Tctrl | |
sdid_est = synthdid_estimate(ymat, N0 = nc, T0 = t - 1) | |
sc_est = sc_estimate(ymat, N0 = nc, T0 = t - 1) | |
# horizontal regression - ridge | |
# need mask for missing data | |
# inverse of treatment matrix - only 0 where treatment applies | |
mask = matrix(1, nrow(ymat), ncol(ymat)); mask[(nc + 1):nrow(mask), t] = 0 | |
# time series elastic net - tune both λ and α | |
hrYhat = t(en_mp_rows(t(ymat), t(mask), num_alpha = 10)) | |
hr_cf = colMeans(hrYhat[(nc + 1):nrow(hrYhat), ]) # HR counterfactual | |
hr_est = mean(ymat[(nc + 1):nrow(ymat), t]) - hr_cf[t] | |
# augsynth prep - convert to long format | |
kv = c('id', ycols) | |
dlong = melt(d[, ..kv], id.var = "id") | |
dlong[, tt := as.numeric(gsub("y", "", variable))] | |
dlong = merge(dlong, d[, .(id, w)]) | |
dlong[, treat := ifelse(w == 1 & tt == t, 1, 0)] | |
augstat = try( | |
{ | |
augsynmod = augsynth(value ~ treat, id, tt, dlong, progfunc = 'Ridge') | |
augsyn_est = augsynmod %>% | |
summary(inf = FALSE) %>% | |
.$average_att %>% | |
.[1, 1] | |
}, | |
silent = TRUE | |
) | |
if (inherits(augstat, "try-error")) augsyn_est = NA | |
# ebal prep | |
ebfit = try( | |
{ | |
demeaned_mat = ymat[, 1:(t - 1)] - rowMeans(ymat[, 1:(t - 1)]) # subtract off row means to allow for extrapolation | |
# balance on demeaned data | |
ebwt = ebalance(c(rep(0, nc), rep(1, nt)), demeaned_mat, method = "AutoDiff")$w | |
# first difference is raw | |
treat_Y1_diffed = mean(ymat[(nc + 1):nrow(ymat), t] - rowMeans(ymat[(nc + 1):nrow(ymat), 1:(t - 1)])) | |
# second difference is weighted | |
est_Y0_diffed = weighted.mean(ymat[1:nc, t] - rowMeans(ymat[1:nc, 1:(t - 1)]), ebwt) | |
eb_est = treat_Y1_diffed - est_Y0_diffed | |
# ebal +hr | |
hrebal_cf = hr_cf + apply(ymat[1:nc, ] - hrYhat[1:nc, ], 2, weighted.mean, ebwt) # μhat + γ(y - μhat) | |
hrebal_est = mean(ymat[(nc + 1):nrow(ymat), t]) - hrebal_cf[t] | |
}, | |
silent = TRUE | |
) | |
if (inherits(ebfit, "try-error")) { | |
eb_est = NA; hrebal_est = NA; ebwt = NA; | |
} | |
res = (c( | |
"did" = did_est, "sc" = sc_est, "sdid" = sdid_est, "augsynth" = augsyn_est, | |
"eb" = eb_est, "hr" = hr_est, "hreb" = hrebal_est | |
) - τ) | |
if (!return_weights) return(res) | |
# return weights | |
unitWts = \(x) attr(x, "weights")$omega | |
wtmat = cbind( | |
as.numeric(unitWts(sdid_est)), | |
as.numeric(unitWts(sc_est)), | |
as.numeric(augsynmod$weights), | |
as.numeric(ebwt) | |
) %>% set_colnames(c("sdid", "sc", "augsyn", "eb")) | |
return(list(res, wtmat)) | |
} | |
# %% | |
oneSim(n = 500, t = 10, parallel_trends = FALSE) | |
# %% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment