Last active
February 16, 2021 09:54
-
-
Save sdaza/99c4a13349858e2745733bd6e998f19f to your computer and use it in GitHub Desktop.
Intro to instrumental variables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# intro to instrumental variables | |
# author: sebastian daza | |
# load libraries | |
library(simsem) | |
library(data.table) | |
library(texreg) | |
library(ivpack) | |
library(lme4) | |
library(ggplot2) | |
# colliders | |
model = " | |
Z ~ 1.5*X | |
Z ~ 1.5*Y | |
" | |
dat = simulateData(model, sample.nobs = 5000, model.type = "sem", | |
orthogonal = TRUE) | |
dat = data.table(dat) | |
dat[, Zd := ifelse(Z > 0, 1, 0)] | |
ggplot(dat[Zd == 1], aes(X, Y)) + geom_point() | |
ggplot(dat, aes(X, Y, color = Zd)) + geom_point() | |
m1 = lm(Y ~ X, data = dat) | |
m2 = lm(Y ~ X + Z, data = dat) | |
# example 1 | |
model = " | |
Y ~ 0.6*U | |
T ~ 0.5*Z + 0.5*U | |
" | |
exploreIV = function(model) { | |
dat = simulateData(model, sample.nobs = 5000, | |
model.type = "sem", orthogonal = TRUE) | |
m1 = lm(Y ~ T, data = dat) | |
m2 = lm(Y ~ T + Z, data = dat) | |
dat$Tf = fitted.values(lm(T ~ Z, data = dat)) | |
m3 = lm(Y ~ Tf, dat = dat) | |
m4 = ivreg(Y ~ T, ~ Z, data = dat) | |
m5 = lm(Y ~ T + U, data = dat) | |
return(list(dat, m1, m2, m3, m4, m5)) | |
} | |
model_names = c("Naive", "Bias amplifier", "2SLS by hand", "2SLS", "God") | |
output = exploreIV(model) | |
texreg(output[-1], | |
custom.coef.map = list(T = "T", Tf = "Tf", Z = "Z", U = "U"), | |
dcolumn = TRUE, | |
custom.model.names = model_names) | |
dat = output[[1]] | |
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z) | |
(0.5 * 0.0) / 0.5 | |
# example 2 | |
model = " | |
Y ~ 0.6*U | |
T ~ 0.5*U | |
Z ~ 0.5*T | |
" | |
dat = simulateData(model, sample.nobs = 100000, | |
model.type = "sem", orthogonal = TRUE) | |
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z) | |
0.5*0.6 | |
output = exploreIV(model) | |
texreg(output[c(2, 5, 6)], | |
dcolumn = TRUE, | |
custom.coef.map = list(T = "T", Tf = "Tf", Z = "Z", U = "U"), | |
custom.model.names = model_names[c(1, 4, 5)]) | |
# example 3 | |
model = " | |
Y ~ 0.6*U3 + 0.5*D | |
T ~ 0.5*U2 + 0.5*Z + 0.6*U3 | |
Z ~ 0.5*U1 | |
D ~ 0.5*U2 + 0.3*U1 | |
" | |
dat = simulateData(model, sample.nobs = 5000, | |
model.type = "sem", orthogonal = TRUE) | |
m1 = lm(Y ~ T, data = dat) | |
m2 = lm(Y ~ T + D, data = dat) | |
m3 = ivreg(Y ~ T, ~ Z, data = dat) | |
m4 = ivreg(Y ~ T + D, ~ Z + D, data = dat) | |
m5 = lm(Y ~ T + U3 + D, data = dat) | |
screenreg(list(m1, m2, m3, m4, m5)) | |
texreg(list(m1, m2, m3, m4, m5), | |
custom.coef.map = list(T = "T", Z = "Z", D = "D"), | |
custom.model.names = c("Naive", "Naive + D", "2SLS", "2SLS + D", "God"), | |
dcolumn = TRUE, | |
use.packges = TRUE) | |
# example 4 | |
model = " | |
Y ~ 1.5*E + 1.5*U1 + 0.5*U2 | |
T ~ 0.3*Z + 0.5*U2 | |
E ~ 0.4*Z + 0.5*U1 | |
" | |
dat = simulateData(model, sample.nobs = 10000, | |
model.type = "sem", orthogonal = TRUE) | |
m1 = lm(Y ~ T, data = dat) | |
m2 = lm(Y ~ T + E, data = dat) | |
m3 = ivreg(Y ~ T, ~ Z, data = dat) | |
m4 = ivreg(Y ~ T + E, ~ Z + E, data = dat) | |
m5 = lm(Y ~ T + U1 + U2 + E, data = dat) | |
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z) | |
screenreg(list(m1, m2, m3, m4, m5)) | |
texreg(list(m1, m2, m3, m4, m5), | |
custom.coef.map = list(T = "T", Z = "Z", E = "E"), | |
custom.model.names = c("Naive", "Naive + E", "2SLS", "2SLS + E", "God"), | |
dcolumn = TRUE, | |
use.packges = TRUE) | |
# example 5 | |
model = " | |
Y ~ 0.3*T + 0.6*U | |
T ~ 0.5*Z + 0.5*U | |
D ~ 1.5*T | |
" | |
dat = simulateData(model, sample.nobs = 20000, | |
model.type = "sem", orthogonal = TRUE) | |
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z) | |
dat = data.table(dat) | |
dat[, Dd := ifelse(D < 0, 0, 1)] | |
m1 = lm(Y ~ T, data = dat[Dd == 1]) | |
m2 = ivreg(Y ~ T , ~ Z , data = dat[Dd == 1]) | |
m3 = lm(Y ~ T + U, data = dat) | |
screenreg(list(m1, m2, m3)) | |
cor(dat[Dd == 1, .(T, Y)]) | |
cor(dat[Dd == 0, .(T, Y)]) | |
texreg(list(m1, m2, m3), | |
custom.coef.map = list(T = "T"), | |
custom.model.names = c("Naive", "2SLS", "God"), | |
dcolumn = TRUE, | |
use.packges = TRUE) | |
# heterogeneity | |
# example 6 | |
a = " | |
Y ~ 0.3 *T + 0.6*U | |
T ~ 0.01 *Z + 0.5*U | |
" | |
b = " | |
Y ~ 0.6*T + 0.6*U | |
T ~ 0.7*Z + 0.5*U | |
" | |
c = " | |
Y ~ 0.1*T + 0.6*U | |
T ~ 0.3*Z + 0.5*U | |
" | |
d = " | |
Y ~ 0.5*T + 0.6*U | |
T ~ 0.9*Z + 0.5*U | |
" | |
mean(c(0.3, 0.6)) | |
mean(c(0.3, 0.6, 0.1, 0.5)) | |
# first two groups | |
datasets = list() | |
for (i in c("a", "b")) { | |
temp = simulateData(get(i), sample.nobs = 10000, | |
model.type = "sem", orthogonal = TRUE) | |
temp$pop = i | |
datasets[[i]] = temp | |
} | |
dat = rbindlist(datasets) | |
m1 = lm(Y ~ T, data = dat) | |
m2 = ivreg(Y ~ T, ~ Z, data = dat) | |
m3 = lm(Y ~ T + U + as.factor(pop), data = dat) | |
screenreg(list(m1, m2, m3)) | |
texreg(list(m1, m2, m3), | |
custom.coef.map = list(T = "T"), | |
custom.model.names = c("Naive", "2SLS", "God using OLS"), | |
dcolumn = TRUE, | |
use.packges = TRUE) | |
# all groups | |
datasets = list() | |
for (i in c("a", "b", "c", "d")) { | |
temp = simulateData(get(i), sample.nobs = 1000, | |
model.type = "sem", orthogonal = TRUE) | |
temp$pop = i | |
datasets[[i]] = temp | |
} | |
dat = rbindlist(datasets) | |
m1 = lm(Y ~ T, data = dat) | |
m2 = ivreg(Y ~ T, ~ Z, data = dat) | |
m3 = lm(Y ~ T + U, data = dat) | |
m4 = lmer(Y ~ T + U + (T|pop), data = dat) | |
screenreg(list(m1, m2, m3, m4)) | |
texreg(list(m1, m2, m3, m4), | |
custom.coef.map = list(T = "T"), | |
custom.model.names = c("Naive", "2SLS", "God using OLS", "God using ML"), | |
dcolumn = TRUE, | |
use.packges = TRUE) | |
# monotonicity | |
a = " | |
Y ~ 0.3*T + 0.6*U | |
T ~ -0.5*Z + 0.5*U | |
" | |
b = " | |
Y ~ 0.6*T + 0.6*U | |
T ~ 0.7*Z + 0.5*U | |
" | |
datasets = list() | |
for (i in c("a", "b")) { | |
temp = simulateData(get(i), sample.nobs = 10000, | |
model.type = "sem", orthogonal = TRUE) | |
temp$pop = i | |
datasets[[i]] = temp | |
} | |
dat = rbindlist(datasets) | |
m1 = lm(Y ~ T, data = dat) | |
m2 = ivreg(Y ~ T, ~ Z, data = dat) | |
m3 = lm(Y ~ T + U + as.factor(pop), data = dat) | |
texreg(list(m1, m2, m3), | |
custom.coef.map = list(T = "T"), | |
custom.model.names = c("Naive", "2SLS", "God using OLS"), | |
dcolumn = TRUE, | |
use.packges = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment