Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@apoorvalal
Created February 12, 2022 08:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save apoorvalal/63e55cbdb271a3bc65dfedb9726f0008 to your computer and use it in GitHub Desktop.
Save apoorvalal/63e55cbdb271a3bc65dfedb9726f0008 to your computer and use it in GitHub Desktop.
Implementation of OB treatment effect estimators
rm(list = ls())
libreq(data.table, fixest, rio, ggplot2, ebal)
# %%
cps3 = import("cps3re74.dta") |> setDT()
cps3 = cps3 |> na.omit()
setnames(cps3, c("re78", "treat"), c("y", "d"))
xs = c("age", "age2", "ed", "black", "hisp", "married", "nodeg", "re74", "re75")
# %%
OB_ATT = function(df, xs){
X1bar = df[d == 1, lapply(.SD, mean), .SDcols = xs] |> as.numeric()
Xdemeaned = sweep(df[, ..xs], 2, X1bar)
colnames(Xdemeaned) = paste0(colnames(Xdemeaned), "_dem")
regdf = cbind(df[, .(y, d)], df[, ..xs], Xdemeaned)
f = as.formula(paste0("y ~ d + ", paste(xs, collapse = "+"), "+",
"d:(", paste(colnames(Xdemeaned), collapse = "+"), ")"))
feols(f, data = regdf, vcov = 'hc1')
}
OB_ATT(cps3, xs)
# %%
data(lalonde.psid); setDT(lalonde.psid)
setnames(lalonde.psid, c("re78", "treat"), c("y", "d"))
xs = setdiff(colnames(lalonde.psid), c("y", 'd'))
OB_ATT(lalonde.psid, xs)
# %%
ebest = function(df, xs){
ebobj = ebalance( df$d, df[, ..xs] |> as.matrix() )
df[d == 1, mean(y)] - weighted.mean(df[d == 0, y], ebobj$w )
}
ebest(lalonde.psid, xs)
ebest(cps3, c("age", "ed", "black", "hisp", "married", "nodeg", "re74", "re75"))
# %% ####################################################
Kline_OB = function(y, xs, df){
require(data.table)
setDT(df)
# outcome model in treatment
omod = feols(formula_stitcher(y, xs), data = df[d == 0], vcov = "hc1")
b = omod$coefficients; V0 = vcov(omod)
# estimate effect
df$Y0 = predict(omod, df)
df[, dif := y - Y0]
M1 = df[d == 1, mean(dif)]
# standard error
X = cbind(1, df[, ..xs]) |> as.matrix()
N1 = sum(df$d)
D = df$d |> as.matrix()
V1 = df[d == 1, .(var(y), .N)] |> as.numeric()
V1 = V1[1]/V1[2]
Vdif = V1 + (t(D) %*% X %*% V0 %*% t(X) %*% (D / (N1^2)) )
SE = sqrt(Vdif) |> as.numeric()
# return
c(est = M1, se = SE) |> round(3)
}
Kline_OB('y', c("age", "age2", "ed", "black", "hisp", "married", "nodeg", "re74", "re75"), cps3)
# %%
## ## ######## #### ###### ## ## ######## ######
## ## ## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ## ##
## ## ## ###### ## ## #### ######### ## ######
## ## ## ## ## ## ## ## ## ## ##
## ## ## ## ## ## ## ## ## ## ## ##
### ### ######## #### ###### ## ## ## ######
cps3$p = glm(formula_stitcher("d", xs), data = cps3, family = binomial())$fitted.values
# %%
π = mean(cps3$d); N1 = sum(cps3$d); N0 = nrow(cps3) - N1
cps3[d== 0, wlogit := p/(1-p) * (1-π)/π]
# %% OB weights
N = nrow(cps3)
X = cbind(1, cps3[, ..xs]) |> as.matrix()
D = cps3$d |> as.matrix()
Y = cps3$y |> as.matrix()
# initialize zero
S = matrix(0, nrow = N, ncol = N)
# replace diag with 1 - d
diag(S) = 1 - cps3$d
H = X %*% solve(t(X) %*% S %*% X) %*% t(X) %*% S
obW = (1/N1) * t(D) %*% H |> as.numeric()
# %% # counterfactual mean
as.numeric(obW %*% Y)
cps3[d ==1, mean(y)] - as.numeric(obW %*% Y)
# %%
cps3[, wob := obW * N1]
with(cps3, plot(wlogit, wob))
# %% balance
cps3[d == 1 , mean(age)]
cps3[d == 0, weighted.mean(age, wlogit)]
cps3[d == 0, weighted.mean(age, wob)]
# %% OB estimate
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment