Created
April 9, 2019 17:20
-
-
Save steven-tey/dbdf78992dfd7805c46016ec3bb1dbdc to your computer and use it in GitHub Desktop.
CS112 Assignment 4 - Causal Inference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################## | |
# QUESTION 1 - Debugging # | |
########################## | |
# (1a) | |
# genout <- GenMatch(Tr=treat, X=X) | |
# summary(mout) | |
# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+ | |
# re75+ re74+ I(re74*re75) + re78, | |
# match.out=genout, nboots=500) | |
# In this example, the "match.out" parameter in the MatchBalance function is denoted | |
# as "genout" - which means that we essentially have the genetic object as the | |
# match.out object. This is incorrect, as we should be using the output from the | |
# match function instead (mout), given the fact that we should be doing match | |
# before matchbalance. Also, the re78 variable shouldn't be a part of the MatchBalance | |
# function, since it is not part of the number of independent variables (X) delineated | |
# in both the genout & mout functions. This is because re78 is essentially the | |
# treatment effect and has been expressed by the "treat" term at the very beginning. | |
# (1b) | |
# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATT", exact = TRUE, M=1, | |
# pop.size=16, max.generations=10, wait.generations=1) | |
#The outcome variable | |
# Y=re78/1000 | |
# | |
# Now that GenMatch() has found the optimal weights, let's estimate | |
# our causal effect of interest using those weights | |
# | |
# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout) | |
# summary(mout) | |
# | |
#Let's determine if balance has actually been obtained on the variables of interest | |
# | |
# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+ | |
# re75+ re74+ I(re74*re75), | |
# match.out=mout, nboots=500) | |
# The problem with this code is the difference in the "exact" parameters between | |
# the GenMatch and the Match functions. In GenMatch, exact=TRUE, hence we are matching | |
# exactly on all the axis. However, in Match, we are not doing that, since it is | |
# not the default settings, and the exact parameter is not specified here. Therefore, | |
# there will be a dissonance in the matching mechanisms of both functions, which will | |
# break the code. | |
# (1c) | |
# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat) | |
#The outcome variable | |
# Y=re78/1000 | |
# | |
# Now that GenMatch() has found the optimal weights, let's estimate | |
# our causal effect of interest using those weights | |
# | |
# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout) | |
# summary(mout) | |
# | |
#Let's determine if balance has actually been obtained on the variables of interest | |
# | |
# mb <- MatchBalance(re78~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+ | |
# re75+ re74+ I(re74*re75), | |
# match.out=mout, nboots=500) | |
# In this case, the mistake lies in the MatchBalance function - although all the | |
# covariates are correct in this case, the treatment indicator (independent variable) | |
# is incorrect. Instead of using re78 (which is the treatment effect/dependent | |
# variable), we should be using the "treat" term instead. | |
############## | |
# QUESTION 2 # | |
############## | |
foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv") | |
# extract relevant columns | |
foo <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)] | |
# remove 2 rows with missing data | |
foo <- foo[c(-19, -47), ] | |
# check that all missing data is gone... | |
which(is.na(foo) == TRUE) | |
# take a peek at the data set (identify the columns) | |
head(foo) | |
# the basic model (without any interaction terms) | |
basic.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4, data = foo, family = "binomial") | |
# the additional model (with the interaction term wardur*untype4) | |
additional.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + wardur*untype4 , data = foo, family = "binomial") | |
# the advanced model (with the interaction term logcost*untype4) | |
advanced.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + logcost*untype4 , data = foo, family = "binomial") | |
# Referring to Figure 4, we can see that the independent variable is the duration of | |
# war. Therefore, we will be looping through values of "wardur" to obtain the | |
# treatment effects at different levels of "wardur", whilst holding the predictors | |
# at their means. | |
# First, we have to create empty storage vectors to store the values generated from | |
# the for loops. | |
basic.model.store <- rep(0, 106) | |
additional.model.store <- rep(0, 106) | |
advanced.model.store <- rep(0, 106) | |
a = 0 | |
# Next, we will have to hold all the predictors at their means, except for wardur | |
# and untype4. | |
mean.wartype <- mean(foo$wartype) | |
mean.logcost <- mean(foo$logcost) | |
mean.factnum <- mean(foo$factnum) | |
mean.factnum2 <- mean(foo$factnum2) | |
mean.trnsfcap <- mean(foo$trnsfcap) | |
mean.develop <- mean(foo$develop) | |
mean.exp <- mean(foo$exp) | |
mean.decade <- mean(foo$decade) | |
mean.treaty <- mean(foo$treaty) | |
# Then, we create the for loop. Since the average treatment effect (ATE) is the | |
# difference between the treatment and the control groups, we will have to set them | |
# up separately and find the difference. Then, after each loop, we store the results | |
# the aforementioned storage vectors. | |
for(i in seq(0, 315, 3)) { | |
a = a+1 | |
treatment1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1) | |
control1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0) | |
predict.treatment1 = predict(basic.model, treatment1, type = "response") | |
predict.control1 = predict(basic.model, control1, type = "response") | |
treatment.effect1 = predict.treatment1 - predict.control1 | |
basic.model.store[a] = treatment.effect1 | |
treatment2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1) | |
control2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0) | |
predict.treatment2 = predict(additional.model, treatment2, type = "response") | |
predict.control2 = predict(additional.model, control2, type = "response") | |
treatment.effect2 = predict.treatment2 - predict.control2 | |
additional.model.store[a] = treatment.effect2 | |
treatment3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1) | |
control3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0) | |
predict.treatment3 = predict(advanced.model, treatment3, type = "response") | |
predict.control3 = predict(advanced.model, control3, type = "response") | |
treatment.effect3 = predict.treatment3 - predict.control3 | |
advanced.model.store[a] = treatment.effect3 | |
} | |
# Finally, we will plot two graphs: the first one to replicate Figure 4, and the | |
# second one where we use a different interaction term, logcost*untype4. We will still | |
# include the basic model in the second graph as a baseline comparison. | |
# First Graph: | |
x_axis = seq(0, 315, 3) | |
plot(x_axis, additional.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations") | |
lines(x_axis, basic.model.store, lty = 5) | |
text(88, 0.3, labels = "Model with interaction term wardur*untype4") | |
text(190, 0.5, labels = "Dotted: Original model") | |
yticks = seq(0, 0.8, 0.05) | |
xticks = seq(5, 315, 7.5) | |
axis(side=1, at=xticks, labels=xticks) | |
axis(side=2, at=yticks, labels=yticks) | |
# Second Graph: | |
x_axis = seq(0, 315, 3) | |
plot(x_axis, advanced.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations") | |
lines(x_axis, basic.model.store, lty = 5) | |
text(160, 0.74, labels = "Model with interaction term logcost*untype4") | |
text(190, 0.5, labels = "Dotted: Original model") | |
yticks = seq(0, 0.8, 0.05) | |
xticks = seq(5, 315, 7.5) | |
axis(side=1, at=xticks, labels=xticks) | |
axis(side=2, at=yticks, labels=yticks) | |
############## | |
# QUESTION 3 # | |
############## | |
# Define treatment as below: | |
# Tr <- rep(0, length(foo$uncint)) | |
# Tr[which(foo$uncint != 0 & foo$uncint != 1)] <- 1 | |
# What does this mean? What is "treatment"? | |
# According to the handbook, the term uncint represents the type of UN operation | |
# from least intrusive (0) to most intrusive (4). Therefore, this line of code | |
# categorizes the various uncint values in the foo dataset into two big categories: | |
# the ones from 0 to 1 will go under Tr == 0, which means there is no treatment | |
# being applied. The ones from 2 to 4 will go under Tr == 1, which is the category | |
# where treatment is being applied. In this context, treatment is basically intrusive | |
# measures employed to diffuse/stop regional conflicts. | |
############## | |
# QUESTION 4 # | |
############## | |
# (4a) | |
# What is the effect of the presence of intrusive UN intervention (treatment) on the | |
# lenient peacebuilding success 2 years (PBS2L) and 5 years (PBS5L) after the war, | |
# in comparison to the lack of intrusive UN intervention (control)? | |
# (4b) | |
# In this case, SUTVA doesn't necessarily hold because the UN peacebuilding | |
# intervention in one country might affect the peacebuilding success in neighboring | |
# countries, due to international ties and various geographical and situational | |
# factors, and thus, the treatment applied to one unit might inadvertently affect | |
# the outcome for another unit. | |
# The 'restrict' function in Match/GenMatch can be used to restrict the possible | |
# matches that we get from the matching function, thus, we can use this to restrict | |
# the matches in such a way to make sure that neighboring countries do not get | |
# matched together as treatment and control pairs in the matching process - this is | |
# given that geographical connections are the reason behind an unheld SUTVA. | |
# (4c) | |
foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv") | |
# remove 2 rows with missing data | |
foo <- foo[c(-19, -47), ] | |
# Now, we will have to change the treatment to a binary value in order to run | |
# regression. | |
pbs2l <- as.numeric(foo$pbs2l) - 1 | |
pbs5l <- as.numeric(foo$pbs5l) - 1 | |
# Then, we have to replace all the NAs with the integer 0. | |
which.have.NAs <- which(is.na(pbs5l == TRUE)) | |
pbs5l[which.have.NAs] <- 0 | |
# Then, we categorize the types of UN operations into two groups: control (the None | |
# ones) and treatment (everything else) | |
#Tr <- rep(0, length(foo$untype)) | |
#Tr = foo$untype3 +foo$untype4 +foo$untype5 | |
Tr <- rep(0, length(foo$uncint)) | |
Tr[which(foo$uncint != "None")] <- 1 | |
# Extract relevant columns and form a new data set | |
data <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)] | |
# Now, we have to append the binary data we created earlier to the new data set | |
cbind(data, pbs2l, pbs5l, Tr) | |
# check that all missing data is gone... | |
which(is.na(data) == TRUE) | |
# Now, we can run logistic regression | |
glm2y = glm(pbs2l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data) | |
glm5y = glm(pbs5l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data) | |
# We will also run MatchBalance to check for the lowest p-value | |
glmMB = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, data = data, nboots = 1000) | |
# Now, we find the treaetment effect by using the same method as Question 2 | |
# For Lenient PB success two years after the war | |
treat.pbs2l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade)) | |
ctrl.pbs2l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade)) | |
effect.pbs2l = predict(glm2y, treat.pbs2l, type = "response") - predict(glm2y, ctrl.pbs2l, type = "response") | |
# For Lenient PB success five years after the war | |
treat.pbs5l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade)) | |
ctrl.pbs5l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade)) | |
effect.pbs5l = predict(glm5y, treat.pbs5l, type = "response") - predict(glm5y, ctrl.pbs5l, type = "response") | |
# Propensity Score Matching | |
# First, we have to find the propensity scores | |
prop.scores = glm(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, family = "binomial") | |
# Then, we can run propensity score matching for pbs2l (no bias adj) | |
X <- prop.scores$fitted.values | |
Tr <- Tr | |
match.prop1 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT") | |
summary(match.prop1) | |
psm.pbs2l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop1, nboots=1000) | |
# Using the same method, we can run propensity score matching for pbs2l (bias adj) | |
X <- prop.scores$fitted.values | |
Tr <- Tr | |
match.prop2 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE) | |
summary(match.prop2) | |
psm.pbs2l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop2, nboots=1000) | |
# Then, we can run propensity score matching for pbs5l (no bias adj) | |
X <- prop.scores$fitted.values | |
Tr <- Tr | |
match.prop3 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", caliper = 0.025, M=2) | |
summary(match.prop3) | |
psm.pbs5l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop3, nboots=1000) | |
# Using the same method, we can run propensity score matching for pbs51 (bias adj) | |
X <- prop.scores$fitted.values | |
Tr <- Tr | |
match.prop4 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE, caliper = 0.025, M=2) | |
summary(match.prop4) | |
psm.pbs5l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop4, nboots=1000) | |
# Genetic Matching | |
library(rgenoud) | |
X <- cbind(data[ ,c(11:16, 18:21)]) | |
Y2 = pbs2l | |
Y5 = pbs5l | |
Tr <- Tr | |
genout <- GenMatch(X = X, Tr = Tr, estimand = "ATT", M=1, pop.size=300, max.generations=30, wait.generations=30) | |
# GenMatch for pbs2l without bias | |
match1 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout) | |
gm.pbs2l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match1, nboots=1000) | |
summary(gm.pbs2l.nobias) | |
summary(match1) | |
# GenMatch for pbs2l with bias | |
match2 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE) | |
gm.pbs2l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match2, nboots=1000) | |
summary(gm.pbs2l.withbias) | |
summary(match2) | |
# GenMatch for pbs5l without bias | |
match3 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout) | |
gm.pbs5l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match3, nboots=1000) | |
summary(gm.pbs5l.nobias) | |
summary(match3) | |
# GenMatch for pbs5l with bias | |
match4 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE) | |
gm.pbs5l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match4, nboots=1000) | |
summary(gm.pbs5l.withbias) | |
summary(match4) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment