Skip to content

Instantly share code, notes, and snippets.

@apoorvalal
Last active May 29, 2023 03:08
Show Gist options
  • Save apoorvalal/074e5787ff0176f3d3a91b98be18c2b4 to your computer and use it in GitHub Desktop.
Save apoorvalal/074e5787ff0176f3d3a91b98be18c2b4 to your computer and use it in GitHub Desktop.
simulator to compare did/sc/sdid/augsynth/eb/ebhr for ATT estimation in panel settings
pacman::p_load(synthdid, ebal, glue, augsynth, MCPanel, glue)
# needs https://github.com/apoorvalal/ebal - solves ebal problem in torch - far more stable than old version
# remotes::install_github("apoorvalal/ebal")
# %% simulator for panel balancing
#' @param n number of units
#' @param t number of time periods
#' @param parallel_trends boolean for parallel trends
#' @param random_assignment boolean for random assignment of treatment
#' @param σ noise level in mapping from factor to outcome
#' @param τ true treatment effect (only applies in the t-th period)
#' @param treatfn function to generate treatment (must return value in (0,1))
#' @param return_weights boolean for whether a matrix of unit weights should be returned
#' @return if return_weights, list with biases and weights, else vector with biases
oneSim = \(n = 1000L, t = 7L,
parallel_trends = FALSE, random_assignment = FALSE,
σ = 0.1, τ = 0,
treatfn = plogis,
return_weights = FALSE) {
d = data.table(id = 1:n)
# single unobserved factor that is increasing in rowid
d[, x := rnorm(1, mean = (.I / n) - 0.5, sd = 0.5), id]
if (random_assignment) {
d[, w := 1 * rbinom(1, 1, 0.5), id] # last 500 obs treated
} else {
d[, w := 1 * rbinom(1, 1, treatfn(x)), id] # assignment fn of factor
}
for (t in 1:t) {
if (parallel_trends) { # parallel trends
set(d, , paste0("y", t), d$x + rnorm(n, 0, σ))
} else { # time trends - y_t = x * U(0.1, 0.5) * t
set(d, , paste0("y", t), d$x * runif(1, 0.1, 0.5) * t + rnorm(n, 0, σ))
}
}
# final period outcome
yfin = paste0("y", t)
set(d, , yfin, d[[yfin]] + τ * d$w)
######################################################################
# did
d[, mean_lag_y := rowMeans(.SD), by = id, .SDcols = paste0("y", 1:(t - 1))]
d[, diffY := get(yfin) - mean_lag_y]
did_est = lm(diffY ~ w, d)$coefficients[2] %>% unname
# sdid - prep outcome matrix - N X T with first N0 control units and N1 treat
ycols = grep("^y+", colnames(d), value = TRUE)
ymat = d[, ..ycols] %>% as.matrix()
treatIDs = which(d$w == 1) ; nt = length(treatIDs); nc = n - nt
# stack untreated and treated units
ymat = rbind(ymat[-treatIDs, ], ymat[treatIDs, ])
# both sdid and sc accept ymat, Nctrl, Tctrl
sdid_est = synthdid_estimate(ymat, N0 = nc, T0 = t - 1)
sc_est = sc_estimate(ymat, N0 = nc, T0 = t - 1)
# horizontal regression - ridge
# need mask for missing data
# inverse of treatment matrix - only 0 where treatment applies
mask = matrix(1, nrow(ymat), ncol(ymat)); mask[(nc + 1):nrow(mask), t] = 0
# time series elastic net - tune both λ and α
hrYhat = t(en_mp_rows(t(ymat), t(mask), num_alpha = 10))
hr_cf = colMeans(hrYhat[(nc + 1):nrow(hrYhat), ]) # HR counterfactual
hr_est = mean(ymat[(nc + 1):nrow(ymat), t]) - hr_cf[t]
# augsynth prep - convert to long format
kv = c('id', ycols)
dlong = melt(d[, ..kv], id.var = "id")
dlong[, tt := as.numeric(gsub("y", "", variable))]
dlong = merge(dlong, d[, .(id, w)])
dlong[, treat := ifelse(w == 1 & tt == t, 1, 0)]
augstat = try(
{
augsynmod = augsynth(value ~ treat, id, tt, dlong, progfunc = 'Ridge')
augsyn_est = augsynmod %>%
summary(inf = FALSE) %>%
.$average_att %>%
.[1, 1]
},
silent = TRUE
)
if (inherits(augstat, "try-error")) augsyn_est = NA
# ebal prep
ebfit = try(
{
demeaned_mat = ymat[, 1:(t - 1)] - rowMeans(ymat[, 1:(t - 1)]) # subtract off row means to allow for extrapolation
# balance on demeaned data
ebwt = ebalance(c(rep(0, nc), rep(1, nt)), demeaned_mat, method = "AutoDiff")$w
# first difference is raw
treat_Y1_diffed = mean(ymat[(nc + 1):nrow(ymat), t] - rowMeans(ymat[(nc + 1):nrow(ymat), 1:(t - 1)]))
# second difference is weighted
est_Y0_diffed = weighted.mean(ymat[1:nc, t] - rowMeans(ymat[1:nc, 1:(t - 1)]), ebwt)
eb_est = treat_Y1_diffed - est_Y0_diffed
# ebal +hr
hrebal_cf = hr_cf + apply(ymat[1:nc, ] - hrYhat[1:nc, ], 2, weighted.mean, ebwt) # μhat + γ(y - μhat)
hrebal_est = mean(ymat[(nc + 1):nrow(ymat), t]) - hrebal_cf[t]
},
silent = TRUE
)
if (inherits(ebfit, "try-error")) {
eb_est = NA; hrebal_est = NA; ebwt = NA;
}
res = (c(
"did" = did_est, "sc" = sc_est, "sdid" = sdid_est, "augsynth" = augsyn_est,
"eb" = eb_est, "hr" = hr_est, "hreb" = hrebal_est
) - τ)
if (!return_weights) return(res)
# return weights
unitWts = \(x) attr(x, "weights")$omega
wtmat = cbind(
as.numeric(unitWts(sdid_est)),
as.numeric(unitWts(sc_est)),
as.numeric(augsynmod$weights),
as.numeric(ebwt)
) %>% set_colnames(c("sdid", "sc", "augsyn", "eb"))
return(list(res, wtmat))
}
# %%
oneSim(n = 500, t = 10, parallel_trends = FALSE)
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment