Skip to content

Instantly share code, notes, and snippets.

@pattoM
Created March 24, 2018 15:50
Show Gist options
  • Save pattoM/10542456c3fe0b38322d7c802cae8c11 to your computer and use it in GitHub Desktop.
Save pattoM/10542456c3fe0b38322d7c802cae8c11 to your computer and use it in GitHub Desktop.
#QUESTION 1
#read data
control <- read.table("nswre74_control.txt", header=FALSE)
treated <- read.table("nswre74_treated.txt", header=FALSE)
#simple difference in means for 1978 earnings
diff_means <- mean(treated$V10) - mean(control$V10)
#output diff in means
diff_means
#bind data and print
alldata <- rbind(control, treated)
alldata
#univariate linear regression model and summary
fit1 <- lm(alldata$V10 ~ alldata$V1)
summary(fit1)
#QUESTION 2
#substituting cps controls.txt for controls
cps_control <- read.table("cps_controls.txt", header=FALSE)
cps_control
#diff means
cps_diff_means <- mean(treated$V10) - mean(cps_control$V10)
cps_diff_means
#binding initial treated and the new cpscontrols
newdata <- rbind(treated, cps_control)
newdata
#linear model fitting to get pvalues etc
fit2 <- glm(newdata$V10 ~ newdata$V1)
summary(fit2)
#QUESTION 3
#Diving into matching(pscore)
set.seed(1234)
library(Matching)
#defining variables
Tr <- newdata$V1
Y <- newdata$V10
X <- cbind(newdata$V2, newdata$V3, newdata$V4, newdata$V5, newdata$V6, newdata$V7, newdata$V8, newdata$V9)
summary(Tr)
summary(Y)
summary(X)
#pscore model
pscoremod <- glm(Tr ~ X, family = binomial)
summary(pscoremod)
#matching
matchedresult <- Match(Y = Y, Tr = Tr, X = pscoremod$fitted, estimand = "ATT", M = 1,ties = TRUE, replace = TRUE)
summary(matchedresult)
te_estimate1 <- lm(Y ~ Tr, data=matchedresult$mdata)
summary(te_estimate1)
#checking balance
MatchBalance(Tr ~X, match.out = matchedresult,nboots = 0)
#plot to visualize the balance
#I use age because it has wider range of values and will visualize well
qqplot(newdata$V2[matchedresult$index.control],newdata$V2[matchedresult$index.treated])
abline(coef = c(0,1),col=3)
#QUESTION 4
#multivariate matching procedure - split columns with different values from before
set.seed(1234)
Tr_mvar <- newdata$V1
Y_mvar <- newdata$V10
#use the pscore from the previous question in the X_mvar
X_mvar <- cbind(newdata$V2, newdata$V3, newdata$V4==0,newdata$V4==1, newdata$V5==0,newdata$V5==1, newdata$V6==0,newdata$V6==1, newdata$V7==0,newdata$V7==1, newdata$V8, newdata$V9, pscoremod$fitted)
summary(Tr_mvar)
summary(Y_mvar)
summary(X_mvar)
#matching
matchedresult2 <- Match(Y = Y_mvar, Tr = Tr_mvar, X = X_mvar, estimand = "ATT", M = 1,ties = TRUE, replace = TRUE)
summary(matchedresult2)
#checking balance
MatchBalance(Tr_mvar ~ X_mvar, match.out = matchedresult2,nboots = 100)
#plot to visualize the balance
#I use age because it has wider range of values and will visualize well
qqplot(newdata$V2[matchedresult2$index.control],newdata$V2[matchedresult2$index.treated])
abline(coef = c(0,1),col=3)
#QUESTION 5
#Repeating qn 4,5 with genetic matching
library(rgenoud)
#calculate weights using genetic matching
#Repeat for question 3 - shares same variables
weights1 <- GenMatch(Tr = Tr,X = X,BalanceMatrix = X,pop.size = 10)
matchedgen <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = weights1)
summary(matchedgen)
MatchBalance(Tr ~ X,match.out = matchedgen,nboots = 100 )
#visualizing balance with the age as an example
qqplot(newdata$V2[matchedgen$index.control],newdata$V2[matchedgen$index.treated])
abline(coef = c(0,1),col=4)
#using gen match on question 4
weights2 <- GenMatch(Tr = Tr_mvar,X = X_mvar,BalanceMatrix = X_mvar,pop.size = 10)
matchedgen2 <- Match(Y = Y_mvar, Tr = Tr_mvar, X = X_mvar, Weight.matrix = weights2)
summary(matchedgen2)
MatchBalance(Tr_mvar ~ X_mvar,match.out = matchedgen2,nboots = 100 )
#visualizing balance with the age as an example
qqplot(newdata$V2[matchedgen2$index.control],newdata$V2[matchedgen2$index.treated])
abline(coef = c(0,1),col=5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment