Skip to content

Instantly share code, notes, and snippets.

@sdaza
Last active February 16, 2021 09:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sdaza/99c4a13349858e2745733bd6e998f19f to your computer and use it in GitHub Desktop.
Save sdaza/99c4a13349858e2745733bd6e998f19f to your computer and use it in GitHub Desktop.
Intro to instrumental variables
# intro to instrumental variables
# author: sebastian daza
# load libraries
library(simsem)
library(data.table)
library(texreg)
library(ivpack)
library(lme4)
library(ggplot2)
# colliders
model = "
Z ~ 1.5*X
Z ~ 1.5*Y
"
dat = simulateData(model, sample.nobs = 5000, model.type = "sem",
orthogonal = TRUE)
dat = data.table(dat)
dat[, Zd := ifelse(Z > 0, 1, 0)]
ggplot(dat[Zd == 1], aes(X, Y)) + geom_point()
ggplot(dat, aes(X, Y, color = Zd)) + geom_point()
m1 = lm(Y ~ X, data = dat)
m2 = lm(Y ~ X + Z, data = dat)
# example 1
model = "
Y ~ 0.6*U
T ~ 0.5*Z + 0.5*U
"
exploreIV = function(model) {
dat = simulateData(model, sample.nobs = 5000,
model.type = "sem", orthogonal = TRUE)
m1 = lm(Y ~ T, data = dat)
m2 = lm(Y ~ T + Z, data = dat)
dat$Tf = fitted.values(lm(T ~ Z, data = dat))
m3 = lm(Y ~ Tf, dat = dat)
m4 = ivreg(Y ~ T, ~ Z, data = dat)
m5 = lm(Y ~ T + U, data = dat)
return(list(dat, m1, m2, m3, m4, m5))
}
model_names = c("Naive", "Bias amplifier", "2SLS by hand", "2SLS", "God")
output = exploreIV(model)
texreg(output[-1],
custom.coef.map = list(T = "T", Tf = "Tf", Z = "Z", U = "U"),
dcolumn = TRUE,
custom.model.names = model_names)
dat = output[[1]]
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z)
(0.5 * 0.0) / 0.5
# example 2
model = "
Y ~ 0.6*U
T ~ 0.5*U
Z ~ 0.5*T
"
dat = simulateData(model, sample.nobs = 100000,
model.type = "sem", orthogonal = TRUE)
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z)
0.5*0.6
output = exploreIV(model)
texreg(output[c(2, 5, 6)],
dcolumn = TRUE,
custom.coef.map = list(T = "T", Tf = "Tf", Z = "Z", U = "U"),
custom.model.names = model_names[c(1, 4, 5)])
# example 3
model = "
Y ~ 0.6*U3 + 0.5*D
T ~ 0.5*U2 + 0.5*Z + 0.6*U3
Z ~ 0.5*U1
D ~ 0.5*U2 + 0.3*U1
"
dat = simulateData(model, sample.nobs = 5000,
model.type = "sem", orthogonal = TRUE)
m1 = lm(Y ~ T, data = dat)
m2 = lm(Y ~ T + D, data = dat)
m3 = ivreg(Y ~ T, ~ Z, data = dat)
m4 = ivreg(Y ~ T + D, ~ Z + D, data = dat)
m5 = lm(Y ~ T + U3 + D, data = dat)
screenreg(list(m1, m2, m3, m4, m5))
texreg(list(m1, m2, m3, m4, m5),
custom.coef.map = list(T = "T", Z = "Z", D = "D"),
custom.model.names = c("Naive", "Naive + D", "2SLS", "2SLS + D", "God"),
dcolumn = TRUE,
use.packges = TRUE)
# example 4
model = "
Y ~ 1.5*E + 1.5*U1 + 0.5*U2
T ~ 0.3*Z + 0.5*U2
E ~ 0.4*Z + 0.5*U1
"
dat = simulateData(model, sample.nobs = 10000,
model.type = "sem", orthogonal = TRUE)
m1 = lm(Y ~ T, data = dat)
m2 = lm(Y ~ T + E, data = dat)
m3 = ivreg(Y ~ T, ~ Z, data = dat)
m4 = ivreg(Y ~ T + E, ~ Z + E, data = dat)
m5 = lm(Y ~ T + U1 + U2 + E, data = dat)
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z)
screenreg(list(m1, m2, m3, m4, m5))
texreg(list(m1, m2, m3, m4, m5),
custom.coef.map = list(T = "T", Z = "Z", E = "E"),
custom.model.names = c("Naive", "Naive + E", "2SLS", "2SLS + E", "God"),
dcolumn = TRUE,
use.packges = TRUE)
# example 5
model = "
Y ~ 0.3*T + 0.6*U
T ~ 0.5*Z + 0.5*U
D ~ 1.5*T
"
dat = simulateData(model, sample.nobs = 20000,
model.type = "sem", orthogonal = TRUE)
cov(dat$Y, dat$Z) / cov(dat$T,dat$Z)
dat = data.table(dat)
dat[, Dd := ifelse(D < 0, 0, 1)]
m1 = lm(Y ~ T, data = dat[Dd == 1])
m2 = ivreg(Y ~ T , ~ Z , data = dat[Dd == 1])
m3 = lm(Y ~ T + U, data = dat)
screenreg(list(m1, m2, m3))
cor(dat[Dd == 1, .(T, Y)])
cor(dat[Dd == 0, .(T, Y)])
texreg(list(m1, m2, m3),
custom.coef.map = list(T = "T"),
custom.model.names = c("Naive", "2SLS", "God"),
dcolumn = TRUE,
use.packges = TRUE)
# heterogeneity
# example 6
a = "
Y ~ 0.3 *T + 0.6*U
T ~ 0.01 *Z + 0.5*U
"
b = "
Y ~ 0.6*T + 0.6*U
T ~ 0.7*Z + 0.5*U
"
c = "
Y ~ 0.1*T + 0.6*U
T ~ 0.3*Z + 0.5*U
"
d = "
Y ~ 0.5*T + 0.6*U
T ~ 0.9*Z + 0.5*U
"
mean(c(0.3, 0.6))
mean(c(0.3, 0.6, 0.1, 0.5))
# first two groups
datasets = list()
for (i in c("a", "b")) {
temp = simulateData(get(i), sample.nobs = 10000,
model.type = "sem", orthogonal = TRUE)
temp$pop = i
datasets[[i]] = temp
}
dat = rbindlist(datasets)
m1 = lm(Y ~ T, data = dat)
m2 = ivreg(Y ~ T, ~ Z, data = dat)
m3 = lm(Y ~ T + U + as.factor(pop), data = dat)
screenreg(list(m1, m2, m3))
texreg(list(m1, m2, m3),
custom.coef.map = list(T = "T"),
custom.model.names = c("Naive", "2SLS", "God using OLS"),
dcolumn = TRUE,
use.packges = TRUE)
# all groups
datasets = list()
for (i in c("a", "b", "c", "d")) {
temp = simulateData(get(i), sample.nobs = 1000,
model.type = "sem", orthogonal = TRUE)
temp$pop = i
datasets[[i]] = temp
}
dat = rbindlist(datasets)
m1 = lm(Y ~ T, data = dat)
m2 = ivreg(Y ~ T, ~ Z, data = dat)
m3 = lm(Y ~ T + U, data = dat)
m4 = lmer(Y ~ T + U + (T|pop), data = dat)
screenreg(list(m1, m2, m3, m4))
texreg(list(m1, m2, m3, m4),
custom.coef.map = list(T = "T"),
custom.model.names = c("Naive", "2SLS", "God using OLS", "God using ML"),
dcolumn = TRUE,
use.packges = TRUE)
# monotonicity
a = "
Y ~ 0.3*T + 0.6*U
T ~ -0.5*Z + 0.5*U
"
b = "
Y ~ 0.6*T + 0.6*U
T ~ 0.7*Z + 0.5*U
"
datasets = list()
for (i in c("a", "b")) {
temp = simulateData(get(i), sample.nobs = 10000,
model.type = "sem", orthogonal = TRUE)
temp$pop = i
datasets[[i]] = temp
}
dat = rbindlist(datasets)
m1 = lm(Y ~ T, data = dat)
m2 = ivreg(Y ~ T, ~ Z, data = dat)
m3 = lm(Y ~ T + U + as.factor(pop), data = dat)
texreg(list(m1, m2, m3),
custom.coef.map = list(T = "T"),
custom.model.names = c("Naive", "2SLS", "God using OLS"),
dcolumn = TRUE,
use.packges = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment