Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save steven-tey/dbdf78992dfd7805c46016ec3bb1dbdc to your computer and use it in GitHub Desktop.
Save steven-tey/dbdf78992dfd7805c46016ec3bb1dbdc to your computer and use it in GitHub Desktop.
CS112 Assignment 4 - Causal Inference
##########################
# QUESTION 1 - Debugging #
##########################
# (1a)
# genout <- GenMatch(Tr=treat, X=X)
# summary(mout)
# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
# re75+ re74+ I(re74*re75) + re78,
# match.out=genout, nboots=500)
# In this example, the "match.out" parameter in the MatchBalance function is denoted
# as "genout" - which means that we essentially have the genetic object as the
# match.out object. This is incorrect, as we should be using the output from the
# match function instead (mout), given the fact that we should be doing match
# before matchbalance. Also, the re78 variable shouldn't be a part of the MatchBalance
# function, since it is not part of the number of independent variables (X) delineated
# in both the genout & mout functions. This is because re78 is essentially the
# treatment effect and has been expressed by the "treat" term at the very beginning.
# (1b)
# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATT", exact = TRUE, M=1,
# pop.size=16, max.generations=10, wait.generations=1)
#The outcome variable
# Y=re78/1000
#
# Now that GenMatch() has found the optimal weights, let's estimate
# our causal effect of interest using those weights
#
# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout)
# summary(mout)
#
#Let's determine if balance has actually been obtained on the variables of interest
#
# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
# re75+ re74+ I(re74*re75),
# match.out=mout, nboots=500)
# The problem with this code is the difference in the "exact" parameters between
# the GenMatch and the Match functions. In GenMatch, exact=TRUE, hence we are matching
# exactly on all the axis. However, in Match, we are not doing that, since it is
# not the default settings, and the exact parameter is not specified here. Therefore,
# there will be a dissonance in the matching mechanisms of both functions, which will
# break the code.
# (1c)
# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat)
#The outcome variable
# Y=re78/1000
#
# Now that GenMatch() has found the optimal weights, let's estimate
# our causal effect of interest using those weights
#
# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout)
# summary(mout)
#
#Let's determine if balance has actually been obtained on the variables of interest
#
# mb <- MatchBalance(re78~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
# re75+ re74+ I(re74*re75),
# match.out=mout, nboots=500)
# In this case, the mistake lies in the MatchBalance function - although all the
# covariates are correct in this case, the treatment indicator (independent variable)
# is incorrect. Instead of using re78 (which is the treatment effect/dependent
# variable), we should be using the "treat" term instead.
##############
# QUESTION 2 #
##############
foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")
# extract relevant columns
foo <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)]
# remove 2 rows with missing data
foo <- foo[c(-19, -47), ]
# check that all missing data is gone...
which(is.na(foo) == TRUE)
# take a peek at the data set (identify the columns)
head(foo)
# the basic model (without any interaction terms)
basic.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4, data = foo, family = "binomial")
# the additional model (with the interaction term wardur*untype4)
additional.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + wardur*untype4 , data = foo, family = "binomial")
# the advanced model (with the interaction term logcost*untype4)
advanced.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + logcost*untype4 , data = foo, family = "binomial")
# Referring to Figure 4, we can see that the independent variable is the duration of
# war. Therefore, we will be looping through values of "wardur" to obtain the
# treatment effects at different levels of "wardur", whilst holding the predictors
# at their means.
# First, we have to create empty storage vectors to store the values generated from
# the for loops.
basic.model.store <- rep(0, 106)
additional.model.store <- rep(0, 106)
advanced.model.store <- rep(0, 106)
a = 0
# Next, we will have to hold all the predictors at their means, except for wardur
# and untype4.
mean.wartype <- mean(foo$wartype)
mean.logcost <- mean(foo$logcost)
mean.factnum <- mean(foo$factnum)
mean.factnum2 <- mean(foo$factnum2)
mean.trnsfcap <- mean(foo$trnsfcap)
mean.develop <- mean(foo$develop)
mean.exp <- mean(foo$exp)
mean.decade <- mean(foo$decade)
mean.treaty <- mean(foo$treaty)
# Then, we create the for loop. Since the average treatment effect (ATE) is the
# difference between the treatment and the control groups, we will have to set them
# up separately and find the difference. Then, after each loop, we store the results
# the aforementioned storage vectors.
for(i in seq(0, 315, 3)) {
a = a+1
treatment1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
control1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
predict.treatment1 = predict(basic.model, treatment1, type = "response")
predict.control1 = predict(basic.model, control1, type = "response")
treatment.effect1 = predict.treatment1 - predict.control1
basic.model.store[a] = treatment.effect1
treatment2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
control2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
predict.treatment2 = predict(additional.model, treatment2, type = "response")
predict.control2 = predict(additional.model, control2, type = "response")
treatment.effect2 = predict.treatment2 - predict.control2
additional.model.store[a] = treatment.effect2
treatment3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
control3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
predict.treatment3 = predict(advanced.model, treatment3, type = "response")
predict.control3 = predict(advanced.model, control3, type = "response")
treatment.effect3 = predict.treatment3 - predict.control3
advanced.model.store[a] = treatment.effect3
}
# Finally, we will plot two graphs: the first one to replicate Figure 4, and the
# second one where we use a different interaction term, logcost*untype4. We will still
# include the basic model in the second graph as a baseline comparison.
# First Graph:
x_axis = seq(0, 315, 3)
plot(x_axis, additional.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations")
lines(x_axis, basic.model.store, lty = 5)
text(88, 0.3, labels = "Model with interaction term wardur*untype4")
text(190, 0.5, labels = "Dotted: Original model")
yticks = seq(0, 0.8, 0.05)
xticks = seq(5, 315, 7.5)
axis(side=1, at=xticks, labels=xticks)
axis(side=2, at=yticks, labels=yticks)
# Second Graph:
x_axis = seq(0, 315, 3)
plot(x_axis, advanced.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations")
lines(x_axis, basic.model.store, lty = 5)
text(160, 0.74, labels = "Model with interaction term logcost*untype4")
text(190, 0.5, labels = "Dotted: Original model")
yticks = seq(0, 0.8, 0.05)
xticks = seq(5, 315, 7.5)
axis(side=1, at=xticks, labels=xticks)
axis(side=2, at=yticks, labels=yticks)
##############
# QUESTION 3 #
##############
# Define treatment as below:
# Tr <- rep(0, length(foo$uncint))
# Tr[which(foo$uncint != 0 & foo$uncint != 1)] <- 1
# What does this mean? What is "treatment"?
# According to the handbook, the term uncint represents the type of UN operation
# from least intrusive (0) to most intrusive (4). Therefore, this line of code
# categorizes the various uncint values in the foo dataset into two big categories:
# the ones from 0 to 1 will go under Tr == 0, which means there is no treatment
# being applied. The ones from 2 to 4 will go under Tr == 1, which is the category
# where treatment is being applied. In this context, treatment is basically intrusive
# measures employed to diffuse/stop regional conflicts.
##############
# QUESTION 4 #
##############
# (4a)
# What is the effect of the presence of intrusive UN intervention (treatment) on the
# lenient peacebuilding success 2 years (PBS2L) and 5 years (PBS5L) after the war,
# in comparison to the lack of intrusive UN intervention (control)?
# (4b)
# In this case, SUTVA doesn't necessarily hold because the UN peacebuilding
# intervention in one country might affect the peacebuilding success in neighboring
# countries, due to international ties and various geographical and situational
# factors, and thus, the treatment applied to one unit might inadvertently affect
# the outcome for another unit.
# The 'restrict' function in Match/GenMatch can be used to restrict the possible
# matches that we get from the matching function, thus, we can use this to restrict
# the matches in such a way to make sure that neighboring countries do not get
# matched together as treatment and control pairs in the matching process - this is
# given that geographical connections are the reason behind an unheld SUTVA.
# (4c)
foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")
# remove 2 rows with missing data
foo <- foo[c(-19, -47), ]
# Now, we will have to change the treatment to a binary value in order to run
# regression.
pbs2l <- as.numeric(foo$pbs2l) - 1
pbs5l <- as.numeric(foo$pbs5l) - 1
# Then, we have to replace all the NAs with the integer 0.
which.have.NAs <- which(is.na(pbs5l == TRUE))
pbs5l[which.have.NAs] <- 0
# Then, we categorize the types of UN operations into two groups: control (the None
# ones) and treatment (everything else)
#Tr <- rep(0, length(foo$untype))
#Tr = foo$untype3 +foo$untype4 +foo$untype5
Tr <- rep(0, length(foo$uncint))
Tr[which(foo$uncint != "None")] <- 1
# Extract relevant columns and form a new data set
data <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)]
# Now, we have to append the binary data we created earlier to the new data set
cbind(data, pbs2l, pbs5l, Tr)
# check that all missing data is gone...
which(is.na(data) == TRUE)
# Now, we can run logistic regression
glm2y = glm(pbs2l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data)
glm5y = glm(pbs5l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data)
# We will also run MatchBalance to check for the lowest p-value
glmMB = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, data = data, nboots = 1000)
# Now, we find the treaetment effect by using the same method as Question 2
# For Lenient PB success two years after the war
treat.pbs2l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
ctrl.pbs2l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
effect.pbs2l = predict(glm2y, treat.pbs2l, type = "response") - predict(glm2y, ctrl.pbs2l, type = "response")
# For Lenient PB success five years after the war
treat.pbs5l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
ctrl.pbs5l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
effect.pbs5l = predict(glm5y, treat.pbs5l, type = "response") - predict(glm5y, ctrl.pbs5l, type = "response")
# Propensity Score Matching
# First, we have to find the propensity scores
prop.scores = glm(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, family = "binomial")
# Then, we can run propensity score matching for pbs2l (no bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop1 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT")
summary(match.prop1)
psm.pbs2l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop1, nboots=1000)
# Using the same method, we can run propensity score matching for pbs2l (bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop2 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE)
summary(match.prop2)
psm.pbs2l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop2, nboots=1000)
# Then, we can run propensity score matching for pbs5l (no bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop3 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", caliper = 0.025, M=2)
summary(match.prop3)
psm.pbs5l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop3, nboots=1000)
# Using the same method, we can run propensity score matching for pbs51 (bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop4 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE, caliper = 0.025, M=2)
summary(match.prop4)
psm.pbs5l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop4, nboots=1000)
# Genetic Matching
library(rgenoud)
X <- cbind(data[ ,c(11:16, 18:21)])
Y2 = pbs2l
Y5 = pbs5l
Tr <- Tr
genout <- GenMatch(X = X, Tr = Tr, estimand = "ATT", M=1, pop.size=300, max.generations=30, wait.generations=30)
# GenMatch for pbs2l without bias
match1 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout)
gm.pbs2l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match1, nboots=1000)
summary(gm.pbs2l.nobias)
summary(match1)
# GenMatch for pbs2l with bias
match2 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE)
gm.pbs2l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match2, nboots=1000)
summary(gm.pbs2l.withbias)
summary(match2)
# GenMatch for pbs5l without bias
match3 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout)
gm.pbs5l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match3, nboots=1000)
summary(gm.pbs5l.nobias)
summary(match3)
# GenMatch for pbs5l with bias
match4 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE)
gm.pbs5l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match4, nboots=1000)
summary(gm.pbs5l.withbias)
summary(match4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment