Skip to content

Instantly share code, notes, and snippets.

@anglilian
Last active April 15, 2020 09:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anglilian/4877735ae14dfea80bf7bdc7956fb98b to your computer and use it in GitHub Desktop.
Save anglilian/4877735ae14dfea80bf7bdc7956fb98b to your computer and use it in GitHub Desktop.
CS112 Final Project Extension Code
#leave-two-out test + matching
# Clear the workspace
rm(list = ls())
#load the required libraries
#Run the commented code to install the libraries
#install.packages("Synth")
#install.packages("gtools")
library(Synth)
library(gtools)
# Please set your working directory to the data/ folder
#setwd()
# Load data
#download the data from here -> https://doi.org/10.7910/DVN/0XOYTG/RIYVNR
df <- read.csv("df.csv", header = TRUE)
# Prepare dataset
df$state <- as.character(df$state) # required by dataprep()
#main model of Synthetic control + implementing better matching
dataprep.out3 <-
dataprep(df,
predictors = c("state.gdp.capita",
"state.gdp.growth.percent",
"population.projection.ln",
"years.schooling.imp"
),
special.predictors = list(
list("homicide.rates", 1993:1998, "mean"),
list("proportion.extreme.poverty", 1993:1998, "mean"),
list("gini.imp", 1993:1998, "mean")
),
predictors.op = "mean",
dependent = "homicide.rates",
unit.variable = "code",
time.variable = "year",
unit.names.variable = "state",
treatment.identifier = 35,
controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53),
time.predictors.prior = c(1993:1998),
time.optimize.ssr = c(1993:1998),
time.plot = c(1993:2009)
)
# Run synth
synth.out3 <- synth(dataprep.out3)
#leave-two-out analysis
# Loop over leave two outs
#this is to store the results of year values for each iteration
#there are 6 combinations, hence 10 columns
storegaps <- matrix(NA, length(1993:2009), 10)
#colnames stores the codes of the states left out
colnames(storegaps) <- c("14, 32", "14, 33", "14, 42", "14, 53",
"32, 33", "32, 42", "32, 53",
"33, 42", "33, 53",
"42, 53")
#this contains the codes for all states except Sao Paulo
co <- unique(df$code)
co <- co[-25]
#a list of all the combinations of the 5 states
comb.omit <- combinations(5, 2, c(14, 32, 33, 42, 53))
for(k in 1:10){
# Data prep for training model
omit <- comb.omit[k, ]
# Prepare data for synth
dataprep.out2 <-
dataprep(df,
predictors = c("state.gdp.capita",
"state.gdp.growth.percent",
"population.projection.ln",
"years.schooling.imp"
),
special.predictors = list(
list("homicide.rates", 1993:1998, "mean"),
list("proportion.extreme.poverty", 1993:1998, "mean"),
list("gini.imp", 1993:1998, "mean")
),
predictors.op = "mean",
dependent = "homicide.rates",
unit.variable = "code",
time.variable = "year",
unit.names.variable = "state",
treatment.identifier = 35,
controls.identifier = co[-c(which(co==omit[1]), which(co==omit[2]))],
time.predictors.prior = c(1993:1998),
time.optimize.ssr = c(1993:1998),
time.plot = c(1993:2009)
)
# Run synth
synth.out2 <- synth(dataprep.out2)
#Store the results
storegaps[,k] <- (dataprep.out2$Y0%*%synth.out2$solution.w)
} # Close loop over leave one outs
# Leave-two-out: graph
#Plot the main model with Sao Paulo and Synthetic control + better matching
path.plot(synth.res = synth.out3,
dataprep.res = dataprep.out3,
Ylab = c("Homicide Rates"),
Xlab = c("Year"),
Legend = c("Sao Paulo","Synthetic Sao Paulo"),
Legend.position = c("bottomleft")
)
#add the line at treatment
abline(v = 1999,
lty = 2)
#add the arrow and text to indicate policy change
arrows(1997, 50, 1999, 50,
col = "black",
length = .1)
text(1995, 50,
"Policy Change",
cex = .8)
#plot all the leave 2 out synthetic controls
for(i in 1:10){
lines(1993:2009,
storegaps[,i],
col = "darkgrey",
lty = "solid")
}
#add the legend
legend(x = "bottomleft",
legend = c("Sao Paulo",
"Synthetic Sao Paulo",
"Synthetic Sao Paulo (leave-two-out)"
),
lty = c("solid", "dashed", "solid"),
col = c("black", "black", "darkgrey"),
cex = .8,
bg = "white",
lwdc(2, 2, 1)
)
# Clear the workspace
rm(list = ls())
# Load necessary packages
library(dplyr) # data manipulation
library(Synth) # models
# Load data
df <- read.csv("df.csv", header = TRUE)
# Prepare dataset
df$state <- as.character(df$state) # required by dataprep()
# Prepare data for synth
dataprep.out <-
dataprep(df,
predictors = c("state.gdp.capita",
"population.projection.ln",
"years.schooling.imp"
),
special.predictors = list(
list("state.gdp.growth.percent", 1993:1998, "mean"),
list("homicide.rates", 1990:1998, "mean"),
list("proportion.extreme.poverty", 1990:1998, "mean"),
list("gini.imp", 1990:1998, "mean")
),
predictors.op = "mean",
dependent = "homicide.rates",
unit.variable = "code",
time.variable = "year",
unit.names.variable = "state",
treatment.identifier = 35,
controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53),
time.predictors.prior = c(1990:1998),
time.optimize.ssr = c(1990:1998),
time.plot = c(1990:2009)
)
## Permutation test
states <- c(11:17, 21:27, 31:33, 35, 41:43, 50:53)
# Prepare data for synth
results <- list()
results_synth <- list()
gaps <- list()
for (i in states) {
dataprep.out <-
dataprep(foo=df,
predictors = c("state.gdp.capita",
"population.projection.ln",
"years.schooling.imp"
),
special.predictors = list(
list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003
list("homicide.rates", 1990:1998, "mean"),
list("proportion.extreme.poverty", 1990:1998, "mean"),
list("gini.imp", 1990:1998, "mean")
),
predictors.op = "mean",
dependent = "homicide.rates",
unit.variable = "code",
time.variable = "year",
unit.names.variable = "state",
treatment.identifier = i,
controls.identifier = states[which(states!=i)],
time.predictors.prior = c(1990:1998),
time.optimize.ssr = c(1990:1998),
time.plot = c(1990:2009)
)
results[[as.character(i)]] <- dataprep.out
results_synth[[as.character(i)]] <- synth(results[[as.character(i)]])
gaps[[as.character(i)]] <- results[[as.character(i)]]$Y1plot - (results[[as.character(i)]]$Y0plot %*% results_synth[[as.character(i)]]$solution.w)
}
#Calculate MSPE
MSE <- c()
for (i in c(1:25)){
year <- as.numeric(unlist(gaps[i]))
MSE[i]<- mean(year[1:9]^2)
}
low.mspe <- MSE[-which(MSE > 2*MSE[18],)]
# Permutation graph: states with MSPE no higher than 2x São Paulo's
selected_states <- match(low.mspe, MSE)
plot(1990:2009,
ylim = c(-30, 30),
xlim = c(1990,2009),
ylab = "Gap in Homicide Rates",
xlab = "Year"
)
for (i in selected_states) {
lines(1990:2009,
gaps[[i]],
col = "lightgrey",
lty = "solid",
lwd = 2
)
}
lines(1990:2009,
gaps[["35"]], # São Paulo
col = "black",
lty = "solid",
lwd = 2
)
abline(v = 1999,
lty = 2)
abline(h = 0,
lty = 1,
lwd = 1)
arrows(1997, 25, 1999, 25,
col = "black",
length = .1)
text(1995, 25,
"Policy Change",
cex = .8)
legend(x = "bottomleft",
legend = c("Sao Paulo",
"Control States (MSPE Less Than Two Times That of Sao Paulo)"),
lty = c("solid", "solid"),
col = c("black", "darkgrey"),
cex = .8,
bg = "white",
lwdc(2, 2, 1)
)
#Calculate MSPE
MSE <- c()
for (i in selected_states){
year <- as.numeric(unlist(gaps[i]))
MSE[i]<- mean(year[10:20]^2)
}
MSE <- MSE[-which(is.na(MSE),)]
order(MSE)
MSE[18]
#Sao Paulo index 18
pvalue <- 5/length(MSE)
#####################
### Data Analysis ###
#####################
## Please set your working directory to the data/ folder
# Clear the workspace
rm(list = ls())
# Load necessary packages
library(dplyr) # data manipulation
library(Synth) # models
# Load data
df <- read.csv("df.csv", header = TRUE)
# Prepare dataset
df$state <- as.character(df$state) # required by dataprep()
# Prepare data for synth
dataprep.out <-
dataprep(df,
predictors = c("state.gdp.capita",
"population.projection.ln",
"years.schooling.imp"
),
special.predictors = list(
list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003
list("homicide.rates", 1990:1998, "mean"),
list("proportion.extreme.poverty", 1990:1998, "mean"),
list("gini.imp", 1990:1998, "mean")
),
predictors.op = "mean",
dependent = "homicide.rates",
unit.variable = "code",
time.variable = "year",
unit.names.variable = "state",
treatment.identifier = 35,
controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53),
time.predictors.prior = c(1990:1998),
time.optimize.ssr = c(1990:1998),
time.plot = c(1990:2009)
)
# Run synth
synth.out <- synth(dataprep.out)
# Get result tables
print(synth.tables <- synth.tab(
dataprep.res = dataprep.out,
synth.res = synth.out)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment