apoorvalal/Hull_StackedReg_Example.R Secret

## Hull_StackedReg_Example.R
rm(list = ls())
library(LalRUtils); libreq(fixest, data.table, AER, car, sandwich)
# stata auto data; download from https://github.com/apoorvalal/LalRUtils/blob/master/data/auto.RData
data(auto); setDT(auto)
auto = auto[!is.na(rep78)][, id := .I] # drop missings and create ID
reg1 = feols(price ~ mpg + weight,            data = auto, se = 'hc1')
reg2 = feols(weight ~ 1 | 0 | length ~ rep78, data = auto, se = 'hc1', subset = auto[, foreign == "Foreign"])

# %% stack
reg1samp = copy(auto); reg2samp = copy(auto[reg2$obs_selection$subset])
reg2samp[, samp2 := 1]
# append them
auto2 = rbindlist(list(reg1samp, reg2samp), fill = T)
auto2[is.na(samp2), samp2 := 0]
# %% tricksy columns
auto2[, y := price * (1-samp2) + weight * samp2]
# rhs in reg1
samp1v = c('mpg', 'weight') # can be done cleverly using labels(terms(reg1$fml))
# generate samp1 specific columns
auto2[, paste0(samp1v, "_1") := lapply(.SD, function(x) x * (1-samp2)), .SDcols = samp1v]
# generate samp2 specific columns
samp2v = c('length', 'rep78') # trickier for IV formula reg2$fml_all$iv
auto2[, paste0(samp2v, "_2") := lapply(.SD, function(x) x * samp2),     .SDcols = samp2v]

# %% IV + OLS at once - deltaMethod for AER::ivreg works
reg3 = AER::ivreg(y ~ length_2 + mpg_1 + weight_1 + samp2 | rep78_2 + mpg_1 + weight_1 + samp2,
  data = auto2)
vc = vcovCL(reg3, ~id)
car::deltaMethod(reg3, "mpg_1 + length_2", vc)

# %% and feols does too (thanks grant)
reg3 = feols(y ~ mpg_1 + weight_1 + samp2 | 0 | length_2 ~ rep78_2, auto2, cluster =  ~id)
etable(list(reg1, reg2, reg3))
# fixest works (use 'fit_' prefix from vcov)
deltaMethod(reg3, "mpg_1 + fit_length_2", vcov(reg3))

# linalg-y solution (via doug rivers)
vcovget = function(m, ...){
  X = model.matrix(m, ...)
  n = nrow(X); k = ncol(X)
  # white matrix
  e = resid(m)/ sqrt(n / (n - k))
  # bread
  A = crossprod(X)
  # meat
  B = wcrossprod(X, w = e^2)
  # bread meat bread
  V = solve(A) %*% B  %*% solve(A)
  return(list(V = V, A = A, B = B, e = e, X = X))
}
# %%
f1 = feols(price ~ mpg + weight, data = auto, se = 'hc1')
f2 = feols(weight ~ 1 | 0 | length ~ rep78, data = auto, subset = ~ foreign == "Foreign", se = 'hc1')
r1 = vcovget(f1)
r2 = vcovget(f2, type ="iv.rhs2")
# this hardcodes the fact that m2 has a subset of obs - can be generalised
B12 = wcrossprod(r1$X[f2$obs_selection$subset, ], r2$X,
  w = r1$e[f2$obs_selection$subset] * r2$e)
V12 = solve(r1$A) %*% B12 %*% solve(r2$A)
V = rbind(cbind(r1$V, V12), cbind(t(V12), r2$V))
list(Estimate = coef(f1)['mpg'] + coef(f2)['fit_length'],
     se       = sqrt(V[2, 2] + 2 * V[2, 5] + V[5, 5])
)
	rm(list = ls())
	library(LalRUtils); libreq(fixest, data.table, AER, car, sandwich)
	# stata auto data; download from https://github.com/apoorvalal/LalRUtils/blob/master/data/auto.RData
	data(auto); setDT(auto)
	auto = auto[!is.na(rep78)][, id := .I] # drop missings and create ID
	reg1 = feols(price ~ mpg + weight, data = auto, se = 'hc1')
	reg2 = feols(weight ~ 1 \| 0 \| length ~ rep78, data = auto, se = 'hc1', subset = auto[, foreign == "Foreign"])

	# %% stack
	reg1samp = copy(auto); reg2samp = copy(auto[reg2$obs_selection$subset])
	reg2samp[, samp2 := 1]
	# append them
	auto2 = rbindlist(list(reg1samp, reg2samp), fill = T)
	auto2[is.na(samp2), samp2 := 0]
	# %% tricksy columns
	auto2[, y := price * (1-samp2) + weight * samp2]
	# rhs in reg1
	samp1v = c('mpg', 'weight') # can be done cleverly using labels(terms(reg1$fml))
	# generate samp1 specific columns
	auto2[, paste0(samp1v, "_1") := lapply(.SD, function(x) x * (1-samp2)), .SDcols = samp1v]
	# generate samp2 specific columns
	samp2v = c('length', 'rep78') # trickier for IV formula reg2$fml_all$iv
	auto2[, paste0(samp2v, "_2") := lapply(.SD, function(x) x * samp2), .SDcols = samp2v]

	# %% IV + OLS at once - deltaMethod for AER::ivreg works
	reg3 = AER::ivreg(y ~ length_2 + mpg_1 + weight_1 + samp2 \| rep78_2 + mpg_1 + weight_1 + samp2,
	data = auto2)
	vc = vcovCL(reg3, ~id)
	car::deltaMethod(reg3, "mpg_1 + length_2", vc)

	# %% and feols does too (thanks grant)
	reg3 = feols(y ~ mpg_1 + weight_1 + samp2 \| 0 \| length_2 ~ rep78_2, auto2, cluster = ~id)
	etable(list(reg1, reg2, reg3))
	# fixest works (use 'fit_' prefix from vcov)
	deltaMethod(reg3, "mpg_1 + fit_length_2", vcov(reg3))

	# linalg-y solution (via doug rivers)
	vcovget = function(m, ...){
	X = model.matrix(m, ...)
	n = nrow(X); k = ncol(X)
	# white matrix
	e = resid(m)/ sqrt(n / (n - k))
	# bread
	A = crossprod(X)
	# meat
	B = wcrossprod(X, w = e^2)
	# bread meat bread
	V = solve(A) %% B %% solve(A)
	return(list(V = V, A = A, B = B, e = e, X = X))
	}
	# %%
	f1 = feols(price ~ mpg + weight, data = auto, se = 'hc1')
	f2 = feols(weight ~ 1 \| 0 \| length ~ rep78, data = auto, subset = ~ foreign == "Foreign", se = 'hc1')
	r1 = vcovget(f1)
	r2 = vcovget(f2, type ="iv.rhs2")
	# this hardcodes the fact that m2 has a subset of obs - can be generalised
	B12 = wcrossprod(r1$X[f2$obs_selection$subset, ], r2$X,
	w = r1$e[f2$obs_selection$subset] * r2$e)
	V12 = solve(r1$A) %% B12 %% solve(r2$A)
	V = rbind(cbind(r1$V, V12), cbind(t(V12), r2$V))
	list(Estimate = coef(f1)['mpg'] + coef(f2)['fit_length'],
	se = sqrt(V[2, 2] + 2 * V[2, 5] + V[5, 5])
	)