Created
February 12, 2022 08:11
-
-
Save apoorvalal/63e55cbdb271a3bc65dfedb9726f0008 to your computer and use it in GitHub Desktop.
Implementation of OB treatment effect estimators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list = ls()) | |
libreq(data.table, fixest, rio, ggplot2, ebal) | |
# %% | |
cps3 = import("cps3re74.dta") |> setDT() | |
cps3 = cps3 |> na.omit() | |
setnames(cps3, c("re78", "treat"), c("y", "d")) | |
xs = c("age", "age2", "ed", "black", "hisp", "married", "nodeg", "re74", "re75") | |
# %% | |
OB_ATT = function(df, xs){ | |
X1bar = df[d == 1, lapply(.SD, mean), .SDcols = xs] |> as.numeric() | |
Xdemeaned = sweep(df[, ..xs], 2, X1bar) | |
colnames(Xdemeaned) = paste0(colnames(Xdemeaned), "_dem") | |
regdf = cbind(df[, .(y, d)], df[, ..xs], Xdemeaned) | |
f = as.formula(paste0("y ~ d + ", paste(xs, collapse = "+"), "+", | |
"d:(", paste(colnames(Xdemeaned), collapse = "+"), ")")) | |
feols(f, data = regdf, vcov = 'hc1') | |
} | |
OB_ATT(cps3, xs) | |
# %% | |
data(lalonde.psid); setDT(lalonde.psid) | |
setnames(lalonde.psid, c("re78", "treat"), c("y", "d")) | |
xs = setdiff(colnames(lalonde.psid), c("y", 'd')) | |
OB_ATT(lalonde.psid, xs) | |
# %% | |
ebest = function(df, xs){ | |
ebobj = ebalance( df$d, df[, ..xs] |> as.matrix() ) | |
df[d == 1, mean(y)] - weighted.mean(df[d == 0, y], ebobj$w ) | |
} | |
ebest(lalonde.psid, xs) | |
ebest(cps3, c("age", "ed", "black", "hisp", "married", "nodeg", "re74", "re75")) | |
# %% #################################################### | |
Kline_OB = function(y, xs, df){ | |
require(data.table) | |
setDT(df) | |
# outcome model in treatment | |
omod = feols(formula_stitcher(y, xs), data = df[d == 0], vcov = "hc1") | |
b = omod$coefficients; V0 = vcov(omod) | |
# estimate effect | |
df$Y0 = predict(omod, df) | |
df[, dif := y - Y0] | |
M1 = df[d == 1, mean(dif)] | |
# standard error | |
X = cbind(1, df[, ..xs]) |> as.matrix() | |
N1 = sum(df$d) | |
D = df$d |> as.matrix() | |
V1 = df[d == 1, .(var(y), .N)] |> as.numeric() | |
V1 = V1[1]/V1[2] | |
Vdif = V1 + (t(D) %*% X %*% V0 %*% t(X) %*% (D / (N1^2)) ) | |
SE = sqrt(Vdif) |> as.numeric() | |
# return | |
c(est = M1, se = SE) |> round(3) | |
} | |
Kline_OB('y', c("age", "age2", "ed", "black", "hisp", "married", "nodeg", "re74", "re75"), cps3) | |
# %% | |
## ## ######## #### ###### ## ## ######## ###### | |
## ## ## ## ## ## ## ## ## ## ## ## | |
## ## ## ## ## ## ## ## ## ## | |
## ## ## ###### ## ## #### ######### ## ###### | |
## ## ## ## ## ## ## ## ## ## ## | |
## ## ## ## ## ## ## ## ## ## ## ## | |
### ### ######## #### ###### ## ## ## ###### | |
cps3$p = glm(formula_stitcher("d", xs), data = cps3, family = binomial())$fitted.values | |
# %% | |
π = mean(cps3$d); N1 = sum(cps3$d); N0 = nrow(cps3) - N1 | |
cps3[d== 0, wlogit := p/(1-p) * (1-π)/π] | |
# %% OB weights | |
N = nrow(cps3) | |
X = cbind(1, cps3[, ..xs]) |> as.matrix() | |
D = cps3$d |> as.matrix() | |
Y = cps3$y |> as.matrix() | |
# initialize zero | |
S = matrix(0, nrow = N, ncol = N) | |
# replace diag with 1 - d | |
diag(S) = 1 - cps3$d | |
H = X %*% solve(t(X) %*% S %*% X) %*% t(X) %*% S | |
obW = (1/N1) * t(D) %*% H |> as.numeric() | |
# %% # counterfactual mean | |
as.numeric(obW %*% Y) | |
cps3[d ==1, mean(y)] - as.numeric(obW %*% Y) | |
# %% | |
cps3[, wob := obW * N1] | |
with(cps3, plot(wlogit, wob)) | |
# %% balance | |
cps3[d == 1 , mean(age)] | |
cps3[d == 0, weighted.mean(age, wlogit)] | |
cps3[d == 0, weighted.mean(age, wob)] | |
# %% OB estimate |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment