-
-
Save apoorvalal/db934b9b9b8ec849c5aac914a6a2ca57 to your computer and use it in GitHub Desktop.
split-sample estimation using a single regression + delta-method for IV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list = ls()) | |
library(LalRUtils); libreq(fixest, data.table, AER, car, sandwich) | |
# stata auto data; download from https://github.com/apoorvalal/LalRUtils/blob/master/data/auto.RData | |
data(auto); setDT(auto) | |
auto = auto[!is.na(rep78)][, id := .I] # drop missings and create ID | |
reg1 = feols(price ~ mpg + weight, data = auto, se = 'hc1') | |
reg2 = feols(weight ~ 1 | 0 | length ~ rep78, data = auto, se = 'hc1', subset = auto[, foreign == "Foreign"]) | |
# %% stack | |
reg1samp = copy(auto); reg2samp = copy(auto[reg2$obs_selection$subset]) | |
reg2samp[, samp2 := 1] | |
# append them | |
auto2 = rbindlist(list(reg1samp, reg2samp), fill = T) | |
auto2[is.na(samp2), samp2 := 0] | |
# %% tricksy columns | |
auto2[, y := price * (1-samp2) + weight * samp2] | |
# rhs in reg1 | |
samp1v = c('mpg', 'weight') # can be done cleverly using labels(terms(reg1$fml)) | |
# generate samp1 specific columns | |
auto2[, paste0(samp1v, "_1") := lapply(.SD, function(x) x * (1-samp2)), .SDcols = samp1v] | |
# generate samp2 specific columns | |
samp2v = c('length', 'rep78') # trickier for IV formula reg2$fml_all$iv | |
auto2[, paste0(samp2v, "_2") := lapply(.SD, function(x) x * samp2), .SDcols = samp2v] | |
# %% IV + OLS at once - deltaMethod for AER::ivreg works | |
reg3 = AER::ivreg(y ~ length_2 + mpg_1 + weight_1 + samp2 | rep78_2 + mpg_1 + weight_1 + samp2, | |
data = auto2) | |
vc = vcovCL(reg3, ~id) | |
car::deltaMethod(reg3, "mpg_1 + length_2", vc) | |
# %% and feols does too (thanks grant) | |
reg3 = feols(y ~ mpg_1 + weight_1 + samp2 | 0 | length_2 ~ rep78_2, auto2, cluster = ~id) | |
etable(list(reg1, reg2, reg3)) | |
# fixest works (use 'fit_' prefix from vcov) | |
deltaMethod(reg3, "mpg_1 + fit_length_2", vcov(reg3)) | |
# linalg-y solution (via doug rivers) | |
vcovget = function(m, ...){ | |
X = model.matrix(m, ...) | |
n = nrow(X); k = ncol(X) | |
# white matrix | |
e = resid(m)/ sqrt(n / (n - k)) | |
# bread | |
A = crossprod(X) | |
# meat | |
B = wcrossprod(X, w = e^2) | |
# bread meat bread | |
V = solve(A) %*% B %*% solve(A) | |
return(list(V = V, A = A, B = B, e = e, X = X)) | |
} | |
# %% | |
f1 = feols(price ~ mpg + weight, data = auto, se = 'hc1') | |
f2 = feols(weight ~ 1 | 0 | length ~ rep78, data = auto, subset = ~ foreign == "Foreign", se = 'hc1') | |
r1 = vcovget(f1) | |
r2 = vcovget(f2, type ="iv.rhs2") | |
# this hardcodes the fact that m2 has a subset of obs - can be generalised | |
B12 = wcrossprod(r1$X[f2$obs_selection$subset, ], r2$X, | |
w = r1$e[f2$obs_selection$subset] * r2$e) | |
V12 = solve(r1$A) %*% B12 %*% solve(r2$A) | |
V = rbind(cbind(r1$V, V12), cbind(t(V12), r2$V)) | |
list(Estimate = coef(f1)['mpg'] + coef(f2)['fit_length'], | |
se = sqrt(V[2, 2] + 2 * V[2, 5] + V[5, 5]) | |
) | |
nit: I recommend nafill / setnafill for doing NA replacement
another nit: I'd loop columns here. lapply won't have any benefits, but a memory overhead.
samp2 = auto2$samp2
for (col in samp1v) set(auto2, , paste0(col, "_1"), (1-samp2) * auto2[[col]] )
related: Rdatatable/data.table#4225
ah, so that's what set
is for. Thanks, @MichaelChirico .
Thanks as usual for the ultra-clean implementation, @grantmcdermott
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice man. Couldn't help myself, so a couple minor "fwiw" suggestions/changes.
marginaleffects::hypotheses
instead ofcar::deltaMethod
.