Last active
April 19, 2019 12:37
-
-
Save BioSciEconomist/a72fae6e01053fdb6d13c9a80d8e39f9 to your computer and use it in GitHub Desktop.
Demonstrate how an IV captures the treatment effect of the compliers or LATE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: ex LATE and IV.R | |
# | DATE: 4/18/19 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: /Google Drive/R Training | |
# *---------------------------------------------------------------- | |
# | PURPOSE: demonstrate how an IV captures the treatment effect of the compliers, or LATE | |
# *---------------------------------------------------------------- | |
# see companion blog post: https://econometricsense.blogspot.com/2019/04/intent-to-treat-instrumental-variables.html | |
# this is largely based on: http://egap.org/methods-guides/10-things-you-need-know-about-local-average-treatment-effect | |
# additional references: | |
# 1) Angrist, Joshua D., et al. “Identification of Causal Effects Using Instrumental Variables.” | |
# Journal of the American Statistical Association, vol. 91, no. 434, 1996, pp. 444–455. JSTOR, www.jstor.org/stable/2291629. | |
# 2) Angrist, J.D. J Exp Criminol (2006) 2: 23. https://doi.org/10.1007/s11292-005-5126-x | |
# see also: https://theincidentaleconomist.com/wordpress/instrumental-variable-corrected-randomized-trial/ | |
library(dplyr) # data wrangling and aggregations | |
library(AER) # for IV estimation | |
#----------------------------------- | |
# simulate data for LATE example | |
#----------------------------------- | |
Z <- list() # this will be our random treatment assignment indicator - and our 'instrument' | |
# create 100 treatment cases | |
for(i in 1:100) { | |
Z[i] = 1 | |
} | |
# create 100 controls | |
for(i in 101:200) { | |
Z[i] = 0 | |
} | |
# initialize key variables | |
status <- list() # label for our compliers and never takers | |
y <- list() # outcome values | |
D <- list() # treatment recieved indicator - note the combination of treatment recieved (D) and treatment assigned | |
# determines our 'compliers' and 'never takers' | |
# the first 20 of the treatment cases will be 'never takers' with an average outcome (y) value = 5 | |
for(i in 1:20) { | |
status[i] = 'never taker' | |
y[i] = 5 | |
D[i] = 0 # never takers have a treatment recieved indicator = 0 regardless of treatment assignment (Z) | |
} | |
# the remaining treatment cases will be 'compliers' with an average outcome (y) value = 25 | |
for(i in 21:100) { | |
status[i] = 'complier' | |
y[i] = 25 | |
D[i] = 1 # compliers have a treatment recieved indicator = 1 when treatment assigned Z = 1 | |
} | |
# the first 20 of our controls will be 'never takers' with an average outcome (y) value = 5 | |
for(i in 101:120) { | |
status[i] = 'never taker' | |
y[i] = 5 | |
D[i] = 0 # never takers have a treatment recieved indicator = 0 regardless of treatment assignment (Z) | |
} | |
# the remaining contrls will be 'compliers' with average outcome (y) value = 20 | |
for(i in 121:200) { | |
status[i] = 'complier' | |
y[i] = 20 | |
D[i] = 0 # compliers have a treatment recieved indicator = 0 when treatment assigned Z = 0 | |
} | |
# pack up lists generated above into a data frame | |
y <- as.numeric(as.character(unlist(y))) | |
Z <- as.numeric(as.character(unlist(Z))) | |
D <- as.numeric(as.character(unlist(D))) | |
status <- as.character(unlist(status)) | |
df <- data.frame(y,Z,D,status) | |
#---------------------------------------- | |
# summarize our toy data | |
#---------------------------------------- | |
table(status,Z) # we see that we have 40 never takers and 160 compliers | |
# break down outcomes by treatment assigned, treatment recieved and compliance status | |
df%>% | |
group_by(status,D,Z)%>% | |
summarize(Yavg = mean(y)) | |
df%>% | |
group_by(D)%>% | |
summarize(Yavg = mean(y)) | |
# from this data it is clear that the difference in outcomes from our compliers is 5 that is the LATE we will | |
# identify using instrumental variables estimation below, and compare that to our ITT and as treated analysis | |
#-------------------------------------- | |
# analysis | |
#-------------------------------------- | |
summary(lm(y~Z, data = df)) # ITT estimate EST_B = 4 | |
# compare those in treatment group that recieved treatment (compliers) to all controls | |
mean(df$y[df$Z ==1 & df$D ==1]) - mean(df$y[df$Z ==0]) # EST_B = 8 | |
# compare 'as treated' to those not treated | |
summary(lm(y~D, data = df)) # EST_B = 10 | |
# estimate LATE via 2SLS | |
D_star <- predict(lm(D~Z, data = df)) # 1st stage regression | |
lm(y~D_star, data = df) # 2nd stage regression EST_BIV = 5 | |
# estimate local average treatment effecit or b_iv uisng Z or random treatment assignment as an instrumental variable using ivreg | |
summary(ivreg(y ~ D | Z,data =df)) | |
# notice this is the same as the difference in means for the compliers in treatment and contorl groups (which is LATE) | |
mean(df$y[df$status == 'complier' & df$Z ==1]) - mean(df$y[df$status == 'complier' & df$Z ==0]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment