steven-tey/gist:dbdf78992dfd7805c46016ec3bb1dbdc

## gistfile1.txt
##########################
# QUESTION 1 - Debugging #
##########################

# (1a)

# genout <- GenMatch(Tr=treat, X=X)

# summary(mout)

# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
#                     re75+ re74+ I(re74*re75) + re78,
#                   match.out=genout, nboots=500)

# In this example, the "match.out" parameter in the MatchBalance function is denoted
# as "genout" - which means that we essentially have the genetic object as the
# match.out object. This is incorrect, as we should be using the output from the
# match function instead (mout), given the fact that we should be doing  match
# before matchbalance. Also, the re78 variable shouldn't be a part of the MatchBalance
# function, since it is not part of the number of independent variables (X) delineated
# in both the genout & mout functions. This is because re78 is essentially the
# treatment effect and has been expressed by the "treat" term at the very beginning.


# (1b)

# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATT", exact = TRUE, M=1,
# pop.size=16, max.generations=10, wait.generations=1)

#The outcome variable
# Y=re78/1000

#
# Now that GenMatch() has found the optimal weights, let's estimate
# our causal effect of interest using those weights
#
# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout)
# summary(mout)

#
#Let's determine if balance has actually been obtained on the variables of interest
#
# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
#                     re75+ re74+ I(re74*re75),
#                   match.out=mout, nboots=500)

# The problem with this code is the difference in the "exact" parameters between
# the GenMatch and the Match functions. In GenMatch, exact=TRUE, hence we are matching
# exactly on all the axis. However, in Match, we are not doing that, since it is
# not the default settings, and the exact parameter is not specified here. Therefore,
# there will be a dissonance in the matching mechanisms of both functions, which will
# break the code.

# (1c)
# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat)

#The outcome variable
# Y=re78/1000

#
# Now that GenMatch() has found the optimal weights, let's estimate
# our causal effect of interest using those weights
#
# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout)
# summary(mout)

#
#Let's determine if balance has actually been obtained on the variables of interest
#
# mb <- MatchBalance(re78~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
#                     re75+ re74+ I(re74*re75),
#                   match.out=mout, nboots=500)

# In this case, the mistake lies in the MatchBalance function - although all the
# covariates are correct in this case, the treatment indicator (independent variable)
# is incorrect. Instead of using re78 (which is the treatment effect/dependent
# variable), we should be using the "treat" term instead.


##############
# QUESTION 2 #
##############

foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")

# extract relevant columns
foo <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)]

# remove 2 rows with missing data
foo <- foo[c(-19, -47), ]

# check that all missing data is gone...
which(is.na(foo) == TRUE)

# take a peek at the data set (identify the columns)
head(foo)

# the basic model (without any interaction terms)
basic.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4, data = foo, family = "binomial")

# the additional model (with the interaction term wardur*untype4)
additional.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + wardur*untype4 , data = foo, family = "binomial")

# the advanced model (with the interaction term logcost*untype4)
advanced.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + logcost*untype4 , data = foo, family = "binomial")

# Referring to Figure 4, we can see that the independent variable is the duration of
# war. Therefore, we will be looping through values of "wardur" to obtain the
# treatment effects at different levels of "wardur", whilst holding the predictors
# at their means.

# First, we have to create empty storage vectors to store the values generated from
# the for loops.

basic.model.store <- rep(0, 106)
additional.model.store <- rep(0, 106)
advanced.model.store <- rep(0, 106)

a = 0

# Next, we will have to hold all the predictors at their means, except for wardur
# and untype4.

mean.wartype <- mean(foo$wartype)
mean.logcost <- mean(foo$logcost)
mean.factnum <- mean(foo$factnum)
mean.factnum2 <- mean(foo$factnum2)
mean.trnsfcap <- mean(foo$trnsfcap)
mean.develop <- mean(foo$develop)
mean.exp <- mean(foo$exp)
mean.decade <- mean(foo$decade)
mean.treaty <- mean(foo$treaty)

# Then, we create the for loop. Since the average treatment effect (ATE) is the
# difference between the treatment and the control groups, we will have to set them
# up separately and find the difference. Then, after each loop, we store the results
# the aforementioned storage vectors.

for(i in seq(0, 315, 3)) {
  a = a+1

  treatment1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
  control1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
  predict.treatment1 = predict(basic.model, treatment1, type = "response")
  predict.control1 = predict(basic.model, control1, type = "response")
  treatment.effect1 = predict.treatment1 - predict.control1
  basic.model.store[a] = treatment.effect1

  treatment2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
  control2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
  predict.treatment2 = predict(additional.model, treatment2, type = "response")
  predict.control2 = predict(additional.model, control2, type = "response")
  treatment.effect2 = predict.treatment2 - predict.control2
  additional.model.store[a] = treatment.effect2

  treatment3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
  control3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
  predict.treatment3 = predict(advanced.model, treatment3, type = "response")
  predict.control3 = predict(advanced.model, control3, type = "response")
  treatment.effect3 = predict.treatment3 - predict.control3
  advanced.model.store[a] = treatment.effect3

}

# Finally, we will plot two graphs: the first one to replicate Figure 4, and the
# second one where we use a different interaction term, logcost*untype4. We will still
# include the basic model in the second graph as a baseline comparison.

# First Graph:

x_axis = seq(0, 315, 3)
plot(x_axis, additional.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations")
lines(x_axis, basic.model.store, lty = 5)
text(88, 0.3, labels = "Model with interaction term wardur*untype4")
text(190, 0.5, labels = "Dotted: Original model")

yticks = seq(0, 0.8, 0.05)
xticks = seq(5, 315, 7.5)
axis(side=1, at=xticks, labels=xticks)
axis(side=2, at=yticks, labels=yticks)

# Second Graph:

x_axis = seq(0, 315, 3)
plot(x_axis, advanced.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations")
lines(x_axis, basic.model.store, lty = 5)
text(160, 0.74, labels = "Model with interaction term logcost*untype4")
text(190, 0.5, labels = "Dotted: Original model")

yticks = seq(0, 0.8, 0.05)
xticks = seq(5, 315, 7.5)
axis(side=1, at=xticks, labels=xticks)
axis(side=2, at=yticks, labels=yticks)


##############
# QUESTION 3 #
##############

# Define treatment as below:
#  Tr <- rep(0, length(foo$uncint))
# Tr[which(foo$uncint != 0 & foo$uncint != 1)] <- 1
# What does this mean? What is "treatment"?

# According to the handbook, the term uncint represents the type of UN operation
# from least intrusive (0) to most intrusive (4). Therefore, this line of code
# categorizes the various uncint values in the foo dataset into two big categories:
# the ones from 0 to 1 will go under Tr == 0, which means there is no treatment
# being applied. The ones from 2 to 4 will go under Tr == 1, which is the category
# where treatment is being applied. In this context, treatment is basically intrusive
# measures employed to diffuse/stop regional conflicts.


##############
# QUESTION 4 #
##############

# (4a)

# What is the effect of the presence of intrusive UN intervention (treatment) on the
# lenient peacebuilding success 2 years (PBS2L) and 5 years (PBS5L) after the war,
# in comparison to the lack of intrusive UN intervention (control)?

# (4b)

# In this case, SUTVA doesn't necessarily hold because the UN peacebuilding
# intervention in one country might affect the peacebuilding success in neighboring
# countries, due to international ties and various geographical and situational
# factors, and thus, the treatment applied to one unit might inadvertently affect
# the outcome for another unit.

# The 'restrict' function in Match/GenMatch can be used to restrict the possible
# matches that we get from the matching function, thus, we can use this to restrict
# the matches in such a way to make sure that neighboring countries do not get
# matched together as treatment and control pairs in the matching process - this is
# given that geographical connections are the reason behind an unheld SUTVA.

# (4c)

foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")

# remove 2 rows with missing data
foo <- foo[c(-19, -47), ]

# Now, we will have to change the treatment to a binary value in order to run
# regression.

pbs2l <- as.numeric(foo$pbs2l) - 1
pbs5l <- as.numeric(foo$pbs5l) - 1

# Then, we have to replace all the NAs with the integer 0.

which.have.NAs <- which(is.na(pbs5l == TRUE))
pbs5l[which.have.NAs] <- 0

# Then, we categorize the types of UN operations into two groups: control (the None
# ones) and treatment (everything else)

#Tr <- rep(0, length(foo$untype))
#Tr = foo$untype3 +foo$untype4 +foo$untype5
Tr <- rep(0, length(foo$uncint))
Tr[which(foo$uncint != "None")] <- 1

# Extract relevant columns and form a new data set
data <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)]

# Now, we have to append the binary data we created earlier to the new data set
cbind(data, pbs2l, pbs5l, Tr)

# check that all missing data is gone...
which(is.na(data) == TRUE)

# Now, we can run logistic regression
glm2y = glm(pbs2l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data)
glm5y = glm(pbs5l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data)

# We will also run MatchBalance to check for the lowest p-value
glmMB = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, data = data, nboots = 1000)

# Now, we find the treaetment effect by using the same method as Question 2

# For Lenient PB success two years after the war
treat.pbs2l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
ctrl.pbs2l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
effect.pbs2l = predict(glm2y, treat.pbs2l, type = "response") - predict(glm2y, ctrl.pbs2l, type = "response")

# For Lenient PB success five years after the war
treat.pbs5l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
ctrl.pbs5l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
effect.pbs5l = predict(glm5y, treat.pbs5l, type = "response") - predict(glm5y, ctrl.pbs5l, type = "response")

# Propensity Score Matching

# First, we have to find the propensity scores
prop.scores = glm(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, family = "binomial")

# Then, we can run propensity score matching for pbs2l (no bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop1 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT")
summary(match.prop1)
psm.pbs2l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop1, nboots=1000)

# Using the same method, we can run propensity score matching for pbs2l (bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop2 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE)
summary(match.prop2)
psm.pbs2l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop2, nboots=1000)

# Then, we can run propensity score matching for pbs5l (no bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop3 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", caliper = 0.025, M=2)
summary(match.prop3)
psm.pbs5l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop3, nboots=1000)

# Using the same method, we can run propensity score matching for pbs51 (bias adj)
X <- prop.scores$fitted.values
Tr <- Tr
match.prop4 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE, caliper = 0.025, M=2)
summary(match.prop4)
psm.pbs5l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop4, nboots=1000)


# Genetic Matching

library(rgenoud)
X <- cbind(data[ ,c(11:16, 18:21)])
Y2 = pbs2l
Y5 = pbs5l
Tr <- Tr
genout <- GenMatch(X = X, Tr = Tr, estimand = "ATT", M=1, pop.size=300, max.generations=30, wait.generations=30)
# GenMatch for pbs2l without bias
match1 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout)
gm.pbs2l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match1, nboots=1000)
summary(gm.pbs2l.nobias)
summary(match1)

# GenMatch for pbs2l with bias
match2 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE)
gm.pbs2l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match2, nboots=1000)
summary(gm.pbs2l.withbias)
summary(match2)

# GenMatch for pbs5l without bias
match3 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout)
gm.pbs5l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match3, nboots=1000)
summary(gm.pbs5l.nobias)
summary(match3)

# GenMatch for pbs5l with bias
match4 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE)
gm.pbs5l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match4, nboots=1000)
summary(gm.pbs5l.withbias)
summary(match4)
	##########################
	# QUESTION 1 - Debugging #
	##########################

	# (1a)

	# genout <- GenMatch(Tr=treat, X=X)

	# summary(mout)

	# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
	# re75+ re74+ I(re74*re75) + re78,
	# match.out=genout, nboots=500)

	# In this example, the "match.out" parameter in the MatchBalance function is denoted
	# as "genout" - which means that we essentially have the genetic object as the
	# match.out object. This is incorrect, as we should be using the output from the
	# match function instead (mout), given the fact that we should be doing match
	# before matchbalance. Also, the re78 variable shouldn't be a part of the MatchBalance
	# function, since it is not part of the number of independent variables (X) delineated
	# in both the genout & mout functions. This is because re78 is essentially the
	# treatment effect and has been expressed by the "treat" term at the very beginning.


	# (1b)

	# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat, estimand="ATT", exact = TRUE, M=1,
	# pop.size=16, max.generations=10, wait.generations=1)

	#The outcome variable
	# Y=re78/1000

	#
	# Now that GenMatch() has found the optimal weights, let's estimate
	# our causal effect of interest using those weights
	#
	# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout)
	# summary(mout)

	#
	#Let's determine if balance has actually been obtained on the variables of interest
	#
	# mb <- MatchBalance(treat~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
	# re75+ re74+ I(re74*re75),
	# match.out=mout, nboots=500)

	# The problem with this code is the difference in the "exact" parameters between
	# the GenMatch and the Match functions. In GenMatch, exact=TRUE, hence we are matching
	# exactly on all the axis. However, in Match, we are not doing that, since it is
	# not the default settings, and the exact parameter is not specified here. Therefore,
	# there will be a dissonance in the matching mechanisms of both functions, which will
	# break the code.

	# (1c)
	# genout <- GenMatch(Tr=treat, X=X, BalanceMatrix=BalanceMat)

	#The outcome variable
	# Y=re78/1000

	#
	# Now that GenMatch() has found the optimal weights, let's estimate
	# our causal effect of interest using those weights
	#
	# mout <- Match(Y=Y, Tr=treat, X=X, Weight.matrix=genout)
	# summary(mout)

	#
	#Let's determine if balance has actually been obtained on the variables of interest
	#
	# mb <- MatchBalance(re78~age +educ+black+ hisp+ married+ nodegr+ u74+ u75+
	# re75+ re74+ I(re74*re75),
	# match.out=mout, nboots=500)

	# In this case, the mistake lies in the MatchBalance function - although all the
	# covariates are correct in this case, the treatment indicator (independent variable)
	# is incorrect. Instead of using re78 (which is the treatment effect/dependent
	# variable), we should be using the "treat" term instead.



	##############
	# QUESTION 2 #
	##############

	foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")

	# extract relevant columns
	foo <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)]

	# remove 2 rows with missing data
	foo <- foo[c(-19, -47), ]

	# check that all missing data is gone...
	which(is.na(foo) == TRUE)

	# take a peek at the data set (identify the columns)
	head(foo)

	# the basic model (without any interaction terms)
	basic.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4, data = foo, family = "binomial")

	# the additional model (with the interaction term wardur*untype4)
	additional.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + wardur*untype4 , data = foo, family = "binomial")

	# the advanced model (with the interaction term logcost*untype4)
	advanced.model = glm(pbs2s3 ~ wartype + logcost + wardur+ factnum + factnum2 + trnsfcap + develop + exp + decade + treaty + untype4 + logcost*untype4 , data = foo, family = "binomial")

	# Referring to Figure 4, we can see that the independent variable is the duration of
	# war. Therefore, we will be looping through values of "wardur" to obtain the
	# treatment effects at different levels of "wardur", whilst holding the predictors
	# at their means.

	# First, we have to create empty storage vectors to store the values generated from
	# the for loops.

	basic.model.store <- rep(0, 106)
	additional.model.store <- rep(0, 106)
	advanced.model.store <- rep(0, 106)

	a = 0

	# Next, we will have to hold all the predictors at their means, except for wardur
	# and untype4.

	mean.wartype <- mean(foo$wartype)
	mean.logcost <- mean(foo$logcost)
	mean.factnum <- mean(foo$factnum)
	mean.factnum2 <- mean(foo$factnum2)
	mean.trnsfcap <- mean(foo$trnsfcap)
	mean.develop <- mean(foo$develop)
	mean.exp <- mean(foo$exp)
	mean.decade <- mean(foo$decade)
	mean.treaty <- mean(foo$treaty)

	# Then, we create the for loop. Since the average treatment effect (ATE) is the
	# difference between the treatment and the control groups, we will have to set them
	# up separately and find the difference. Then, after each loop, we store the results
	# the aforementioned storage vectors.

	for(i in seq(0, 315, 3)) {
	a = a+1

	treatment1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
	control1 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
	predict.treatment1 = predict(basic.model, treatment1, type = "response")
	predict.control1 = predict(basic.model, control1, type = "response")
	treatment.effect1 = predict.treatment1 - predict.control1
	basic.model.store[a] = treatment.effect1

	treatment2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
	control2 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
	predict.treatment2 = predict(additional.model, treatment2, type = "response")
	predict.control2 = predict(additional.model, control2, type = "response")
	treatment.effect2 = predict.treatment2 - predict.control2
	additional.model.store[a] = treatment.effect2

	treatment3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 1)
	control3 = data.frame(wartype = mean.wartype, logcost = mean.logcost, wardur = i, factnum = mean.factnum, factnum2 = mean.factnum2, trnsfcap = mean.trnsfcap, develop = mean.develop, exp = mean.exp, decade = mean.decade, treaty = mean.treaty, untype4 = 0)
	predict.treatment3 = predict(advanced.model, treatment3, type = "response")
	predict.control3 = predict(advanced.model, control3, type = "response")
	treatment.effect3 = predict.treatment3 - predict.control3
	advanced.model.store[a] = treatment.effect3

	}

	# Finally, we will plot two graphs: the first one to replicate Figure 4, and the
	# second one where we use a different interaction term, logcost*untype4. We will still
	# include the basic model in the second graph as a baseline comparison.

	# First Graph:

	x_axis = seq(0, 315, 3)
	plot(x_axis, additional.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations")
	lines(x_axis, basic.model.store, lty = 5)
	text(88, 0.3, labels = "Model with interaction term wardur*untype4")
	text(190, 0.5, labels = "Dotted: Original model")

	yticks = seq(0, 0.8, 0.05)
	xticks = seq(5, 315, 7.5)
	axis(side=1, at=xticks, labels=xticks)
	axis(side=2, at=yticks, labels=yticks)

	# Second Graph:

	x_axis = seq(0, 315, 3)
	plot(x_axis, advanced.model.store, type = "l", xaxs="i", yaxs="i", xaxt='n', yaxt='n', ylim = c(0, 0.8), xlim = c(0, 315), main= "Causal Effect of Multidimensional UN Peacekeeping Operations", cex.main = 1.5, xlab = "Duration of wars in months", ylab = "Marginal effects of UN peacekeeping operations")
	lines(x_axis, basic.model.store, lty = 5)
	text(160, 0.74, labels = "Model with interaction term logcost*untype4")
	text(190, 0.5, labels = "Dotted: Original model")

	yticks = seq(0, 0.8, 0.05)
	xticks = seq(5, 315, 7.5)
	axis(side=1, at=xticks, labels=xticks)
	axis(side=2, at=yticks, labels=yticks)


	##############
	# QUESTION 3 #
	##############

	# Define treatment as below:
	# Tr <- rep(0, length(foo$uncint))
	# Tr[which(foo$uncint != 0 & foo$uncint != 1)] <- 1
	# What does this mean? What is "treatment"?

	# According to the handbook, the term uncint represents the type of UN operation
	# from least intrusive (0) to most intrusive (4). Therefore, this line of code
	# categorizes the various uncint values in the foo dataset into two big categories:
	# the ones from 0 to 1 will go under Tr == 0, which means there is no treatment
	# being applied. The ones from 2 to 4 will go under Tr == 1, which is the category
	# where treatment is being applied. In this context, treatment is basically intrusive
	# measures employed to diffuse/stop regional conflicts.


	##############
	# QUESTION 4 #
	##############

	# (4a)

	# What is the effect of the presence of intrusive UN intervention (treatment) on the
	# lenient peacebuilding success 2 years (PBS2L) and 5 years (PBS5L) after the war,
	# in comparison to the lack of intrusive UN intervention (control)?

	# (4b)

	# In this case, SUTVA doesn't necessarily hold because the UN peacebuilding
	# intervention in one country might affect the peacebuilding success in neighboring
	# countries, due to international ties and various geographical and situational
	# factors, and thus, the treatment applied to one unit might inadvertently affect
	# the outcome for another unit.

	# The 'restrict' function in Match/GenMatch can be used to restrict the possible
	# matches that we get from the matching function, thus, we can use this to restrict
	# the matches in such a way to make sure that neighboring countries do not get
	# matched together as treatment and control pairs in the matching process - this is
	# given that geographical connections are the reason behind an unheld SUTVA.

	# (4c)

	foo <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")

	# remove 2 rows with missing data
	foo <- foo[c(-19, -47), ]

	# Now, we will have to change the treatment to a binary value in order to run
	# regression.

	pbs2l <- as.numeric(foo$pbs2l) - 1
	pbs5l <- as.numeric(foo$pbs5l) - 1

	# Then, we have to replace all the NAs with the integer 0.

	which.have.NAs <- which(is.na(pbs5l == TRUE))
	pbs5l[which.have.NAs] <- 0

	# Then, we categorize the types of UN operations into two groups: control (the None
	# ones) and treatment (everything else)

	#Tr <- rep(0, length(foo$untype))
	#Tr = foo$untype3 +foo$untype4 +foo$untype5
	Tr <- rep(0, length(foo$uncint))
	Tr[which(foo$uncint != "None")] <- 1

	# Extract relevant columns and form a new data set
	data <- foo[, c(6:8, 11:16, 99, 50, 114, 49, 63, 136, 109, 126, 48, 160, 142, 10)]

	# Now, we have to append the binary data we created earlier to the new data set
	cbind(data, pbs2l, pbs5l, Tr)

	# check that all missing data is gone...
	which(is.na(data) == TRUE)

	# Now, we can run logistic regression
	glm2y = glm(pbs2l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data)
	glm5y = glm(pbs5l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, family=binomial, data=data)

	# We will also run MatchBalance to check for the lowest p-value
	glmMB = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade + Tr, data = data, nboots = 1000)

	# Now, we find the treaetment effect by using the same method as Question 2

	# For Lenient PB success two years after the war
	treat.pbs2l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
	ctrl.pbs2l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
	effect.pbs2l = predict(glm2y, treat.pbs2l, type = "response") - predict(glm2y, ctrl.pbs2l, type = "response")

	# For Lenient PB success five years after the war
	treat.pbs5l = data.frame(Tr = 1, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
	ctrl.pbs5l = data.frame(Tr = 0, wartype = mean(data$wartype), logcost = mean(data$logcost), wardur = mean(data$wardur), factnum = mean(data$factnum), factnum2 = mean(data$factnum2), trnsfcap = mean(data$trnsfcap), treaty = mean(data$treaty), exp = mean(data$exp), develop = mean(data$develop), decade = mean(data$decade))
	effect.pbs5l = predict(glm5y, treat.pbs5l, type = "response") - predict(glm5y, ctrl.pbs5l, type = "response")

	# Propensity Score Matching

	# First, we have to find the propensity scores
	prop.scores = glm(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, family = "binomial")

	# Then, we can run propensity score matching for pbs2l (no bias adj)
	X <- prop.scores$fitted.values
	Tr <- Tr
	match.prop1 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT")
	summary(match.prop1)
	psm.pbs2l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop1, nboots=1000)

	# Using the same method, we can run propensity score matching for pbs2l (bias adj)
	X <- prop.scores$fitted.values
	Tr <- Tr
	match.prop2 <- Match(Y=pbs2l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE)
	summary(match.prop2)
	psm.pbs2l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop2, nboots=1000)

	# Then, we can run propensity score matching for pbs5l (no bias adj)
	X <- prop.scores$fitted.values
	Tr <- Tr
	match.prop3 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", caliper = 0.025, M=2)
	summary(match.prop3)
	psm.pbs5l.nobias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop3, nboots=1000)

	# Using the same method, we can run propensity score matching for pbs51 (bias adj)
	X <- prop.scores$fitted.values
	Tr <- Tr
	match.prop4 <- Match(Y=pbs5l, X=X, Tr=Tr, estimand = "ATT", BiasAdjust = TRUE, caliper = 0.025, M=2)
	summary(match.prop4)
	psm.pbs5l.withbias = MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match.prop4, nboots=1000)


	# Genetic Matching

	library(rgenoud)
	X <- cbind(data[ ,c(11:16, 18:21)])
	Y2 = pbs2l
	Y5 = pbs5l
	Tr <- Tr
	genout <- GenMatch(X = X, Tr = Tr, estimand = "ATT", M=1, pop.size=300, max.generations=30, wait.generations=30)
	# GenMatch for pbs2l without bias
	match1 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout)
	gm.pbs2l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match1, nboots=1000)
	summary(gm.pbs2l.nobias)
	summary(match1)

	# GenMatch for pbs2l with bias
	match2 <- Match(X=X, Y=Y2, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE)
	gm.pbs2l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match2, nboots=1000)
	summary(gm.pbs2l.withbias)
	summary(match2)

	# GenMatch for pbs5l without bias
	match3 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout)
	gm.pbs5l.nobias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match3, nboots=1000)
	summary(gm.pbs5l.nobias)
	summary(match3)

	# GenMatch for pbs5l with bias
	match4 <- Match(X=X, Y=Y5, Tr=Tr, estimand = "ATT", Weight.matrix=genout, BiasAdjust = TRUE)
	gm.pbs5l.withbias <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty + develop + exp + decade, data = data, match.out=match4, nboots=1000)
	summary(gm.pbs5l.withbias)
	summary(match4)