Anna-Nevm/Causal Inference.R

## Causal Inference.R

# ________________________________________QUESTION 2 ______________________________________________#


#load the data
foo<- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")

#extracting relevant columns
foo_s <- foo[, c(6:8, 11:16,99, 50, 114, 49, 52, 63, 136, 109, 126, 48, 160, 142, 10, 108)]
# removing 2 rows with missing data
foo <- foo[c(-19, -47), ]
# checking that all missing data is gone...
which(is.na(foo) == TRUE)


#specifying logistic regression model from the original paper
glm_original <- glm(pbs2s3 ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap +
              develop + exp + decade + treaty +untype4,
            data=foo, family=binomial)
#specifying our model with two new interaction terms
glm_new <- glm(pbs2s3 ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap +
                 develop + exp + decade + treaty + untype4 + (exp*untype4) + (wardur*logcost),
               data=foo, family=binomial)

#holding predictors at their means
mean.wartype <- mean(foo$wartype)
mean.logcost <- mean(foo$logcost)
mean.factnum <- mean(foo$factnum)
mean.factnum2 <- mean(foo$factnum2)
mean.trnsfcap <- mean(foo$trnsfcap)
mean.develop <- mean(foo$develop)
mean.exp <- mean(foo$exp)
mean.decade <- mean(foo$decade)
mean.treaty <- mean(foo$treaty)

#creating a logit function to obtain regression predictions using a for loop
get_logit <- function(X, coef) {
  logit <- coef[1] + sum(coef[2:length(coef)]*X)
  return(exp(logit) / (1 + exp(logit)))
}

#creating storage vectors for the original model
storage_original_treat <- rep(NA, 315)
storage_original_control <- rep(NA, 315)

#looping through values of wardur to obtain treatment effects at different levels
for (wardur in 1:315) {

  X.treat <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
               mean.trnsfcap, mean.develop, mean.exp, mean.decade, mean.treaty, 1)
  X.control <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
                 mean.trnsfcap, mean.develop, mean.exp, mean.decade, mean.treaty, 0)

  storage_original_treat[wardur]  <- get_logit(X.treat, coef(glm_original))
  storage_original_control[wardur]  <- get_logit(X.control, coef(glm_original))
}

#calculating the treatment effect of untype4 (multidimensional peacekeeping) in original model
original_y <- storage_original_treat- storage_original_control
original_y


#repeating the process for our new model with two interaction terms
storage_new_treat <- rep(NA, 315)
storage_new_control <- rep(NA, 315)

for (wardur in 1:315) {
  X.treat <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
               mean.trnsfcap, mean.develop, mean.exp, mean.decade, mean.treaty, 1, mean.exp*1, wardur*mean.logcost)
  X.control <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
                 mean.trnsfcap, mean.develop, mean.exp,mean.decade, mean.treaty, 0,mean.exp*0, wardur*mean.logcost)
  storage_new_treat[wardur]  <- get_logit(X.treat, coef(glm_new))
  storage_new_control[wardur]  <- get_logit(X.control, coef(glm_new))
}

#calculating the treatment effect of untype4 (multidimensional peacekeeping) in new model
new_y <- storage_new_treat - storage_new_control
new_y


#plotting the causal effect obtained by two models against the war duration
plot(1:315, original_y, type = "l", lty=3,lwd=2, ylim = c(0, 0.8),
     xlab = "Duration of war in months", ylab = "Treatment effect of UN peacekeeping operations",
     cex.lab=1.35, cex.axis = 0.9, tck=0.018)
lines(1:315, new_y, col= 'black', lwd=2, ylim = c(0, 0.8))


# _____________________________________________QUESTION 3_____________________________________#


# checking what uncint is about;
foo$uncint
# Levels:Enforcement None Observer PKO


Tr <- rep(0, length(foo$uncint))
# creates a vector filled with zeros, number of zeros = length(foo$uncint)
Tr[which(foo$uncint!= "None"] <- 1
# "!=" means "not equal to";
# corrects the value from 0 to 1 for elements in Tr vector that have value of uncint other than "None".


# _____________________________________________QUESTION 4_____________________________________#


#loading the data
data_4 <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")
#removing rows with missing data
data_4 <- data_4[c(-19, -47), ]

#treatment assignment
Tr <- rep(0, length(data_4$uncint))
data_4$uncint
#Treatment is assigned to wars that had some sort of UN interventions(Enforcement,Observer,PKO)
#control - to these that had None
Tr[which(data_4$uncint!= "None")] <- 1
data_4$Tr <- Tr


#_______________Estimating the impact of treatment (UN interventions) on foo$pbs5l and foo$pbs2l_______________#


library(Matching)

#checking the initial balance
mb1 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 +
                      trnsfcap + develop + exp + decade + treaty, data = data_4, nboots=500)

#checking for NAs:
summary(data_4$pbs2l)
summary(data_4$pbs5l)
NAs <- is.na(data_4$pbs5l)


#_________________________________________LOGISTIC REGRESSION___________________________________#

#specifying logistic regression model
regression_2 <- glm(pbs2l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + untype4 + treaty +
             develop + exp + decade+Tr, data=data_4, family="binomial")

#generating counterfactuals for the estimation of the tretament effect
counter_factual <- data_4
counter_factual$Tr <- 1 - data_4$Tr
counter_factuals_2 <- predict(regression_2, newdata=counter_factual, type="response")

#estimating treatment effect
treatment_effects_2 <- rep(NA, nrow(data_4))
t_e <- data_4$Tr == 1
treatment_effects_2[t_e] <- regression_2$fitted.values[t_e] - counter_factuals_2[t_e]
treatment_effects_2[!t_e] <- counter_factuals_2[!t_e] - regression_2$fitted.values[!t_e]

#Tmt effect estimate
mean(treatment_effects_2)
#Tmt effect sd error
sd(treatment_effects_2)


#repeating the same for pbs5l
regression_5 <-glm(pbs5l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + untype4 + treaty +
                     develop + exp + decade+Tr, data=data_4[!NAs,], family="binomial")
summary(regression_5)
?Match
counter_factual_5 <- data_4[!NAs,]
counter_factual_5$Tr <- 1 - data_4$Tr[!NAs]
counter.factuals_5 <- predict(regression_5, newdata=counter_factual_5, type="response")

treatment_effects_5 <- rep(NA, nrow(data_4[!NAs,]))
t_e_5 <- data_4[!NAs,]$Tr == 1
treatment_effects_5[t_e_5] <- regression_5$fitted.values[t_e_5] - counter.factuals_5[t_e_5]
treatment_effects_5[!t_e_5] <- counter.factuals_5[!t_e_5] - regression_5$fitted.values[!t_e_5]

#Tmt effect estimate
mean(treatment_effects_5)
#Tmt effect sd error
sd(treatment_effects_5)


# ___________________________________PROPENSITY-SCORE MATCHING___________________________________#
# Compute the propensity scores
propensity_score_model <- glm(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
                 develop + exp + decade, data=data_4, family="binomial")

#fitted.values are the propensity scores
F_V <- propensity_score_model$fitted.values

# Do the matching for peacebuilding success 2 yrs after the war
matching_propensity_pbs2l <- Match(Y = data_4$pbs2l, Tr=data_4$Tr, X=F_V)
summary(matching_propensity_pbs2l)
#treatment effect estimate is 0.19444;
#treatment effect std error is 0.184

Y_5 <- data_4$pbs5l
mask <- which(!is.na(Y_5))

matching_propensity_pbs5l <- Match(Y = Y_5[mask], Tr=Tr[mask], X=F_V[mask])
summary(matching_propensity_pbs5l)
#treatment effect estimate is 0.18182
#treatment effect std error is 0.17003


# Check the balance
#also have Tr as a thing we're predicting because we're trying to compare the balance across the
balance_propensity_pbs2l <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
                         develop + exp + decade, data=data_4, match.out = matching_propensity_pbs2l, nboots=1000)
#lowest p-value: 0.096
balance_propensity_pbs5l <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
                                           develop + exp + decade, data=data_4, match.out = matching_propensity_pbs5l, nboots=1000)
#lowest p-value: < 2.22e-16


# ___________________________________GENETIC MATCHING_________________________________________________#

#all the covariates to be balanced
X <- cbind(data_4$wartype, data_4$logcost, data_4$wardur, data_4$factnum, data_4$factnum2, data_4$trnsfcap,
           data_4$treaty, data_4$develop, data_4$exp, data_4$decade, (data_4$wardur*data_4$decade))

genout <- GenMatch(X = X, Tr = Tr, pop.size=250, wait.generations=25)
?GenMatch

matchout_pbs2l <- Match(X=X, Tr=Tr,Y = data_4$pbs2l, Weight.matrix = genout)
summary(matchout_pbs2l) # treatment estimate and treatment SD here

mb.out2 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
                          develop + exp + decade + wardur*decade, data=data_4, match.out = matchout_pbs2l, nboots=1000)
#lowest p-value from Match Balance: 0.27994


# repeating the same for 5 years
genout_5 <- GenMatch(Tr=Tr[mask], X=X[mask,], M=1,
                    pop.size=250, wait.generations=25)
m_5  <- Match(Y=Y_5[mask], Tr=Tr[mask], X=X[mask,], M=1, Weight.matrix = genout_5)
summary(m_5)

mb5 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 +
                      trnsfcap + develop + exp + decade + treaty, data= data_4,
                    match.out = m_5, nboots=500)


#__________________________________GENETIC MATCHING WITH PROPENSITY SCORES________________________________#
#same as geentic matching but in cbind include "propensity_score_model$fitted.values"

X_p_s <- cbind(X, propensity_score_model$fitted.values)

genout_2 <- GenMatch(X = X_p_s, Tr = Tr, pop.size=350, wait.generations=25)


matchout_2l <- Match(X=X_p_s, Tr=Tr,Y = data_4$pbs2l, Weight.matrix = genout_2)
summary(matchout_2l)

mb_out2 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
                          develop + exp + decade, data=data_4, match.out = matchout_2l, nboots=1000)
#lowest p-value from Match Balance: 0.27994


# repeating the same for 5 years
genout_5 <- GenMatch(Tr=Tr[mask], X=X_p_s[mask,], M=2,
                     pop.size=350, wait.generations=25)
m_5l  <- Match(Y=Y_5[mask], Tr=Tr[mask], X=X_p_s[mask,], M=2, Weight.matrix = genout_5)
summary(m_5l)

mb5l <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 +
                      trnsfcap + develop + exp + decade + treaty, data= data_4,
                    match.out = m_5l, nboots=500)

	# ________________________________________QUESTION 2 ______________________________________________#


	#load the data
	foo<- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")

	#extracting relevant columns
	foo_s <- foo[, c(6:8, 11:16,99, 50, 114, 49, 52, 63, 136, 109, 126, 48, 160, 142, 10, 108)]
	# removing 2 rows with missing data
	foo <- foo[c(-19, -47), ]
	# checking that all missing data is gone...
	which(is.na(foo) == TRUE)


	#specifying logistic regression model from the original paper
	glm_original <- glm(pbs2s3 ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap +
	develop + exp + decade + treaty +untype4,
	data=foo, family=binomial)
	#specifying our model with two new interaction terms
	glm_new <- glm(pbs2s3 ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap +
	develop + exp + decade + treaty + untype4 + (expuntype4) + (wardurlogcost),
	data=foo, family=binomial)

	#holding predictors at their means
	mean.wartype <- mean(foo$wartype)
	mean.logcost <- mean(foo$logcost)
	mean.factnum <- mean(foo$factnum)
	mean.factnum2 <- mean(foo$factnum2)
	mean.trnsfcap <- mean(foo$trnsfcap)
	mean.develop <- mean(foo$develop)
	mean.exp <- mean(foo$exp)
	mean.decade <- mean(foo$decade)
	mean.treaty <- mean(foo$treaty)

	#creating a logit function to obtain regression predictions using a for loop
	get_logit <- function(X, coef) {
	logit <- coef[1] + sum(coef[2:length(coef)]*X)
	return(exp(logit) / (1 + exp(logit)))
	}

	#creating storage vectors for the original model
	storage_original_treat <- rep(NA, 315)
	storage_original_control <- rep(NA, 315)

	#looping through values of wardur to obtain treatment effects at different levels
	for (wardur in 1:315) {

	X.treat <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
	mean.trnsfcap, mean.develop, mean.exp, mean.decade, mean.treaty, 1)
	X.control <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
	mean.trnsfcap, mean.develop, mean.exp, mean.decade, mean.treaty, 0)

	storage_original_treat[wardur] <- get_logit(X.treat, coef(glm_original))
	storage_original_control[wardur] <- get_logit(X.control, coef(glm_original))
	}

	#calculating the treatment effect of untype4 (multidimensional peacekeeping) in original model
	original_y <- storage_original_treat- storage_original_control
	original_y


	#repeating the process for our new model with two interaction terms
	storage_new_treat <- rep(NA, 315)
	storage_new_control <- rep(NA, 315)

	for (wardur in 1:315) {
	X.treat <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
	mean.trnsfcap, mean.develop, mean.exp, mean.decade, mean.treaty, 1, mean.exp1, wardurmean.logcost)
	X.control <- c(mean.wartype, mean.logcost, wardur, mean.factnum, mean.factnum2,
	mean.trnsfcap, mean.develop, mean.exp,mean.decade, mean.treaty, 0,mean.exp0, wardurmean.logcost)
	storage_new_treat[wardur] <- get_logit(X.treat, coef(glm_new))
	storage_new_control[wardur] <- get_logit(X.control, coef(glm_new))
	}

	#calculating the treatment effect of untype4 (multidimensional peacekeeping) in new model
	new_y <- storage_new_treat - storage_new_control
	new_y


	#plotting the causal effect obtained by two models against the war duration
	plot(1:315, original_y, type = "l", lty=3,lwd=2, ylim = c(0, 0.8),
	xlab = "Duration of war in months", ylab = "Treatment effect of UN peacekeeping operations",
	cex.lab=1.35, cex.axis = 0.9, tck=0.018)
	lines(1:315, new_y, col= 'black', lwd=2, ylim = c(0, 0.8))




	# _____________________________________________QUESTION 3_____________________________________#


	# checking what uncint is about;
	foo$uncint
	# Levels:Enforcement None Observer PKO


	Tr <- rep(0, length(foo$uncint))
	# creates a vector filled with zeros, number of zeros = length(foo$uncint)
	Tr[which(foo$uncint!= "None"] <- 1
	# "!=" means "not equal to";
	# corrects the value from 0 to 1 for elements in Tr vector that have value of uncint other than "None".




	# _____________________________________________QUESTION 4_____________________________________#


	#loading the data
	data_4 <- read.csv("https://course-resources.minerva.kgi.edu/uploaded_files/mke/00086677-3767/peace.csv")
	#removing rows with missing data
	data_4 <- data_4[c(-19, -47), ]

	#treatment assignment
	Tr <- rep(0, length(data_4$uncint))
	data_4$uncint
	#Treatment is assigned to wars that had some sort of UN interventions(Enforcement,Observer,PKO)
	#control - to these that had None
	Tr[which(data_4$uncint!= "None")] <- 1
	data_4$Tr <- Tr



	#_______________Estimating the impact of treatment (UN interventions) on foo$pbs5l and foo$pbs2l_______________#


	library(Matching)

	#checking the initial balance
	mb1 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 +
	trnsfcap + develop + exp + decade + treaty, data = data_4, nboots=500)

	#checking for NAs:
	summary(data_4$pbs2l)
	summary(data_4$pbs5l)
	NAs <- is.na(data_4$pbs5l)


	#_________________________________________LOGISTIC REGRESSION___________________________________#

	#specifying logistic regression model
	regression_2 <- glm(pbs2l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + untype4 + treaty +
	develop + exp + decade+Tr, data=data_4, family="binomial")

	#generating counterfactuals for the estimation of the tretament effect
	counter_factual <- data_4
	counter_factual$Tr <- 1 - data_4$Tr
	counter_factuals_2 <- predict(regression_2, newdata=counter_factual, type="response")

	#estimating treatment effect
	treatment_effects_2 <- rep(NA, nrow(data_4))
	t_e <- data_4$Tr == 1
	treatment_effects_2[t_e] <- regression_2$fitted.values[t_e] - counter_factuals_2[t_e]
	treatment_effects_2[!t_e] <- counter_factuals_2[!t_e] - regression_2$fitted.values[!t_e]

	#Tmt effect estimate
	mean(treatment_effects_2)
	#Tmt effect sd error
	sd(treatment_effects_2)


	#repeating the same for pbs5l
	regression_5 <-glm(pbs5l ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + untype4 + treaty +
	develop + exp + decade+Tr, data=data_4[!NAs,], family="binomial")
	summary(regression_5)
	?Match
	counter_factual_5 <- data_4[!NAs,]
	counter_factual_5$Tr <- 1 - data_4$Tr[!NAs]
	counter.factuals_5 <- predict(regression_5, newdata=counter_factual_5, type="response")

	treatment_effects_5 <- rep(NA, nrow(data_4[!NAs,]))
	t_e_5 <- data_4[!NAs,]$Tr == 1
	treatment_effects_5[t_e_5] <- regression_5$fitted.values[t_e_5] - counter.factuals_5[t_e_5]
	treatment_effects_5[!t_e_5] <- counter.factuals_5[!t_e_5] - regression_5$fitted.values[!t_e_5]

	#Tmt effect estimate
	mean(treatment_effects_5)
	#Tmt effect sd error
	sd(treatment_effects_5)




	# ___________________________________PROPENSITY-SCORE MATCHING___________________________________#
	# Compute the propensity scores
	propensity_score_model <- glm(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
	develop + exp + decade, data=data_4, family="binomial")

	#fitted.values are the propensity scores
	F_V <- propensity_score_model$fitted.values

	# Do the matching for peacebuilding success 2 yrs after the war
	matching_propensity_pbs2l <- Match(Y = data_4$pbs2l, Tr=data_4$Tr, X=F_V)
	summary(matching_propensity_pbs2l)
	#treatment effect estimate is 0.19444;
	#treatment effect std error is 0.184

	Y_5 <- data_4$pbs5l
	mask <- which(!is.na(Y_5))

	matching_propensity_pbs5l <- Match(Y = Y_5[mask], Tr=Tr[mask], X=F_V[mask])
	summary(matching_propensity_pbs5l)
	#treatment effect estimate is 0.18182
	#treatment effect std error is 0.17003


	# Check the balance
	#also have Tr as a thing we're predicting because we're trying to compare the balance across the
	balance_propensity_pbs2l <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
	develop + exp + decade, data=data_4, match.out = matching_propensity_pbs2l, nboots=1000)
	#lowest p-value: 0.096
	balance_propensity_pbs5l <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
	develop + exp + decade, data=data_4, match.out = matching_propensity_pbs5l, nboots=1000)
	#lowest p-value: < 2.22e-16



	# ___________________________________GENETIC MATCHING_________________________________________________#

	#all the covariates to be balanced
	X <- cbind(data_4$wartype, data_4$logcost, data_4$wardur, data_4$factnum, data_4$factnum2, data_4$trnsfcap,
	data_4$treaty, data_4$develop, data_4$exp, data_4$decade, (data_4$wardur*data_4$decade))

	genout <- GenMatch(X = X, Tr = Tr, pop.size=250, wait.generations=25)
	?GenMatch

	matchout_pbs2l <- Match(X=X, Tr=Tr,Y = data_4$pbs2l, Weight.matrix = genout)
	summary(matchout_pbs2l) # treatment estimate and treatment SD here

	mb.out2 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
	develop + exp + decade + wardur*decade, data=data_4, match.out = matchout_pbs2l, nboots=1000)
	#lowest p-value from Match Balance: 0.27994


	# repeating the same for 5 years
	genout_5 <- GenMatch(Tr=Tr[mask], X=X[mask,], M=1,
	pop.size=250, wait.generations=25)
	m_5 <- Match(Y=Y_5[mask], Tr=Tr[mask], X=X[mask,], M=1, Weight.matrix = genout_5)
	summary(m_5)

	mb5 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 +
	trnsfcap + develop + exp + decade + treaty, data= data_4,
	match.out = m_5, nboots=500)



	#__________________________________GENETIC MATCHING WITH PROPENSITY SCORES________________________________#
	#same as geentic matching but in cbind include "propensity_score_model$fitted.values"

	X_p_s <- cbind(X, propensity_score_model$fitted.values)

	genout_2 <- GenMatch(X = X_p_s, Tr = Tr, pop.size=350, wait.generations=25)


	matchout_2l <- Match(X=X_p_s, Tr=Tr,Y = data_4$pbs2l, Weight.matrix = genout_2)
	summary(matchout_2l)

	mb_out2 <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 + trnsfcap + treaty +
	develop + exp + decade, data=data_4, match.out = matchout_2l, nboots=1000)
	#lowest p-value from Match Balance: 0.27994


	# repeating the same for 5 years
	genout_5 <- GenMatch(Tr=Tr[mask], X=X_p_s[mask,], M=2,
	pop.size=350, wait.generations=25)
	m_5l <- Match(Y=Y_5[mask], Tr=Tr[mask], X=X_p_s[mask,], M=2, Weight.matrix = genout_5)
	summary(m_5l)

	mb5l <- MatchBalance(Tr ~ wartype + logcost + wardur + factnum + factnum2 +
	trnsfcap + develop + exp + decade + treaty, data= data_4,
	match.out = m_5l, nboots=500)