anglilian/leave2out.r

## leave2out.r
#leave-two-out test + matching

# Clear the workspace
rm(list = ls())

#load the required libraries

#Run the commented code to install the libraries
#install.packages("Synth")
#install.packages("gtools")
library(Synth)
library(gtools)


# Please set your working directory to the data/ folder
#setwd()

# Load data
#download the data from here -> https://doi.org/10.7910/DVN/0XOYTG/RIYVNR
df <- read.csv("df.csv", header = TRUE)

# Prepare dataset
df$state <- as.character(df$state) # required by dataprep()

#main model of Synthetic control + implementing better matching
dataprep.out3 <-
  dataprep(df,
           predictors = c("state.gdp.capita",
                          "state.gdp.growth.percent",
                          "population.projection.ln",
                          "years.schooling.imp"
           ),
           special.predictors = list(
             list("homicide.rates", 1993:1998, "mean"),
             list("proportion.extreme.poverty", 1993:1998, "mean"),
             list("gini.imp", 1993:1998, "mean")
           ),
           predictors.op = "mean",
           dependent     = "homicide.rates",
           unit.variable = "code",
           time.variable = "year",
           unit.names.variable   = "state",
           treatment.identifier  = 35,
           controls.identifier   = c(11:17, 21:27, 31:33, 41:43, 50:53),
           time.predictors.prior = c(1993:1998),
           time.optimize.ssr     = c(1993:1998),
           time.plot             = c(1993:2009)
  )

# Run synth
synth.out3 <- synth(dataprep.out3)

#leave-two-out analysis
# Loop over leave two outs

#this is to store the results of year values for each iteration
#there are 6 combinations, hence 10 columns
storegaps <- matrix(NA, length(1993:2009), 10)


#colnames stores the codes of the states left out
colnames(storegaps) <- c("14, 32", "14, 33", "14, 42", "14, 53",
                         "32, 33", "32, 42", "32, 53",
                         "33, 42", "33, 53",
                         "42, 53")

#this contains the codes for all states except Sao Paulo
co <- unique(df$code)
co <- co[-25]

#a list of all the combinations of the 5 states
comb.omit <- combinations(5, 2, c(14, 32, 33, 42, 53))

for(k in 1:10){

  # Data prep for training model
  omit <- comb.omit[k, ]

  # Prepare data for synth
  dataprep.out2 <-
    dataprep(df,
             predictors = c("state.gdp.capita",
                            "state.gdp.growth.percent",
                            "population.projection.ln",
                            "years.schooling.imp"
             ),
             special.predictors = list(
               list("homicide.rates", 1993:1998, "mean"),
               list("proportion.extreme.poverty", 1993:1998, "mean"),
               list("gini.imp", 1993:1998, "mean")
             ),
             predictors.op = "mean",
             dependent     = "homicide.rates",
             unit.variable = "code",
             time.variable = "year",
             unit.names.variable   = "state",
             treatment.identifier  = 35,
             controls.identifier   = co[-c(which(co==omit[1]), which(co==omit[2]))],
             time.predictors.prior = c(1993:1998),
             time.optimize.ssr     = c(1993:1998),
             time.plot             = c(1993:2009)
    )

  # Run synth
  synth.out2 <- synth(dataprep.out2)

  #Store the results
  storegaps[,k] <- (dataprep.out2$Y0%*%synth.out2$solution.w)
} # Close loop over leave one outs


# Leave-two-out: graph

#Plot the main model with Sao Paulo and Synthetic control + better matching
path.plot(synth.res    = synth.out3,
          dataprep.res = dataprep.out3,
          Ylab         = c("Homicide Rates"),
          Xlab         = c("Year"),
          Legend       = c("Sao Paulo","Synthetic Sao Paulo"),
          Legend.position = c("bottomleft")
)

#add the line at treatment
abline(v   = 1999,
       lty = 2)

#add the arrow and text to indicate policy change
arrows(1997, 50, 1999, 50,
       col    = "black",
       length = .1)

text(1995, 50,
     "Policy Change",
     cex = .8)

#plot all the leave 2 out synthetic controls
for(i in 1:10){
  lines(1993:2009,
        storegaps[,i],
        col = "darkgrey",
        lty = "solid")
}


#add the legend
legend(x = "bottomleft",
       legend = c("Sao Paulo",
                  "Synthetic Sao Paulo",
                  "Synthetic Sao Paulo (leave-two-out)"
       ),
       lty    = c("solid", "dashed", "solid"),
       col    = c("black", "black", "darkgrey"),
       cex    = .8,
       bg     = "white",
       lwdc(2, 2, 1)
)

## pvalue.r
# Clear the workspace
rm(list = ls())

# Load necessary packages
library(dplyr) # data manipulation
library(Synth) # models

# Load data
df <- read.csv("df.csv", header = TRUE)

# Prepare dataset
df$state <- as.character(df$state) # required by dataprep()

# Prepare data for synth
dataprep.out <-
  dataprep(df,
           predictors = c("state.gdp.capita",
                          "population.projection.ln",
                          "years.schooling.imp"
           ),
           special.predictors = list(
             list("state.gdp.growth.percent", 1993:1998, "mean"),
             list("homicide.rates", 1990:1998, "mean"),
             list("proportion.extreme.poverty", 1990:1998, "mean"),
             list("gini.imp", 1990:1998, "mean")
           ),
           predictors.op = "mean",
           dependent     = "homicide.rates",
           unit.variable = "code",
           time.variable = "year",
           unit.names.variable   = "state",
           treatment.identifier  = 35,
           controls.identifier   = c(11:17, 21:27, 31:33, 41:43, 50:53),
           time.predictors.prior = c(1990:1998),
           time.optimize.ssr     = c(1990:1998),
           time.plot             = c(1990:2009)
  )


## Permutation test
states <- c(11:17, 21:27, 31:33, 35, 41:43, 50:53)

# Prepare data for synth
results <- list()
results_synth <- list()
gaps <- list()

for (i in states) {
  dataprep.out <-
    dataprep(foo=df,
             predictors = c("state.gdp.capita",
                            "population.projection.ln",
                            "years.schooling.imp"
             ),
             special.predictors = list(
               list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003
               list("homicide.rates", 1990:1998, "mean"),
               list("proportion.extreme.poverty", 1990:1998, "mean"),
               list("gini.imp", 1990:1998, "mean")
             ),
             predictors.op = "mean",
             dependent     = "homicide.rates",
             unit.variable = "code",
             time.variable = "year",
             unit.names.variable   = "state",
             treatment.identifier  = i,
             controls.identifier   = states[which(states!=i)],
             time.predictors.prior = c(1990:1998),
             time.optimize.ssr     = c(1990:1998),
             time.plot             = c(1990:2009)

    )
  results[[as.character(i)]] <- dataprep.out
  results_synth[[as.character(i)]] <- synth(results[[as.character(i)]])
  gaps[[as.character(i)]] <- results[[as.character(i)]]$Y1plot - (results[[as.character(i)]]$Y0plot %*% results_synth[[as.character(i)]]$solution.w)

}

#Calculate MSPE
MSE <- c()
for (i in c(1:25)){
  year <- as.numeric(unlist(gaps[i]))
  MSE[i]<- mean(year[1:9]^2)
}

low.mspe <- MSE[-which(MSE > 2*MSE[18],)]

# Permutation graph: states with MSPE no higher than 2x SÃ£o Paulo's
selected_states <- match(low.mspe, MSE)

plot(1990:2009,
     ylim = c(-30, 30),
     xlim = c(1990,2009),
     ylab = "Gap in Homicide Rates",
     xlab = "Year"
)

for (i in selected_states) {
  lines(1990:2009,
        gaps[[i]],
        col = "lightgrey",
        lty = "solid",
        lwd = 2
  )
}

lines(1990:2009,
      gaps[["35"]], # SÃ£o Paulo
      col = "black",
      lty = "solid",
      lwd = 2
)

abline(v   = 1999,
       lty = 2)

abline(h   = 0,
       lty = 1,
       lwd = 1)

arrows(1997, 25, 1999, 25,
       col    = "black",
       length = .1)

text(1995, 25,
     "Policy Change",
     cex = .8)

legend(x = "bottomleft",
       legend = c("Sao Paulo",
                  "Control States (MSPE Less Than Two Times That of Sao Paulo)"),
       lty    = c("solid", "solid"),
       col    = c("black", "darkgrey"),
       cex    = .8,
       bg     = "white",
       lwdc(2, 2, 1)
)

#Calculate MSPE
MSE <- c()
for (i in selected_states){
  year <- as.numeric(unlist(gaps[i]))
  MSE[i]<- mean(year[10:20]^2)
}
MSE <- MSE[-which(is.na(MSE),)]
order(MSE)
MSE[18]
#Sao Paulo index 18
pvalue <- 5/length(MSE)

## synthetic_control.r
#####################
### Data Analysis ###
#####################

## Please set your working directory to the data/ folder

# Clear the workspace
rm(list = ls())

# Load necessary packages
library(dplyr) # data manipulation
library(Synth) # models

# Load data
df <- read.csv("df.csv", header = TRUE)

# Prepare dataset
df$state <- as.character(df$state) # required by dataprep()

# Prepare data for synth
dataprep.out <-
  dataprep(df,
           predictors = c("state.gdp.capita",
                          "population.projection.ln",
                          "years.schooling.imp"
           ),
           special.predictors = list(
             list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003
             list("homicide.rates", 1990:1998, "mean"),
             list("proportion.extreme.poverty", 1990:1998, "mean"),
             list("gini.imp", 1990:1998, "mean")
           ),
           predictors.op = "mean",
           dependent     = "homicide.rates",
           unit.variable = "code",
           time.variable = "year",
           unit.names.variable   = "state",
           treatment.identifier  = 35,
           controls.identifier   = c(11:17, 21:27, 31:33, 41:43, 50:53),
           time.predictors.prior = c(1990:1998),
           time.optimize.ssr     = c(1990:1998),
           time.plot             = c(1990:2009)
  )

# Run synth
synth.out <- synth(dataprep.out)

# Get result tables
print(synth.tables   <- synth.tab(
  dataprep.res = dataprep.out,
  synth.res    = synth.out)
)
	#leave-two-out test + matching

	# Clear the workspace
	rm(list = ls())

	#load the required libraries

	#Run the commented code to install the libraries
	#install.packages("Synth")
	#install.packages("gtools")
	library(Synth)
	library(gtools)


	# Please set your working directory to the data/ folder
	#setwd()

	# Load data
	#download the data from here -> https://doi.org/10.7910/DVN/0XOYTG/RIYVNR
	df <- read.csv("df.csv", header = TRUE)

	# Prepare dataset
	df$state <- as.character(df$state) # required by dataprep()

	#main model of Synthetic control + implementing better matching
	dataprep.out3 <-
	dataprep(df,
	predictors = c("state.gdp.capita",
	"state.gdp.growth.percent",
	"population.projection.ln",
	"years.schooling.imp"
	),
	special.predictors = list(
	list("homicide.rates", 1993:1998, "mean"),
	list("proportion.extreme.poverty", 1993:1998, "mean"),
	list("gini.imp", 1993:1998, "mean")
	),
	predictors.op = "mean",
	dependent = "homicide.rates",
	unit.variable = "code",
	time.variable = "year",
	unit.names.variable = "state",
	treatment.identifier = 35,
	controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53),
	time.predictors.prior = c(1993:1998),
	time.optimize.ssr = c(1993:1998),
	time.plot = c(1993:2009)
	)

	# Run synth
	synth.out3 <- synth(dataprep.out3)

	#leave-two-out analysis
	# Loop over leave two outs

	#this is to store the results of year values for each iteration
	#there are 6 combinations, hence 10 columns
	storegaps <- matrix(NA, length(1993:2009), 10)


	#colnames stores the codes of the states left out
	colnames(storegaps) <- c("14, 32", "14, 33", "14, 42", "14, 53",
	"32, 33", "32, 42", "32, 53",
	"33, 42", "33, 53",
	"42, 53")

	#this contains the codes for all states except Sao Paulo
	co <- unique(df$code)
	co <- co[-25]

	#a list of all the combinations of the 5 states
	comb.omit <- combinations(5, 2, c(14, 32, 33, 42, 53))

	for(k in 1:10){

	# Data prep for training model
	omit <- comb.omit[k, ]

	# Prepare data for synth
	dataprep.out2 <-
	dataprep(df,
	predictors = c("state.gdp.capita",
	"state.gdp.growth.percent",
	"population.projection.ln",
	"years.schooling.imp"
	),
	special.predictors = list(
	list("homicide.rates", 1993:1998, "mean"),
	list("proportion.extreme.poverty", 1993:1998, "mean"),
	list("gini.imp", 1993:1998, "mean")
	),
	predictors.op = "mean",
	dependent = "homicide.rates",
	unit.variable = "code",
	time.variable = "year",
	unit.names.variable = "state",
	treatment.identifier = 35,
	controls.identifier = co[-c(which(co==omit[1]), which(co==omit[2]))],
	time.predictors.prior = c(1993:1998),
	time.optimize.ssr = c(1993:1998),
	time.plot = c(1993:2009)
	)

	# Run synth
	synth.out2 <- synth(dataprep.out2)

	#Store the results
	storegaps[,k] <- (dataprep.out2$Y0%*%synth.out2$solution.w)
	} # Close loop over leave one outs


	# Leave-two-out: graph

	#Plot the main model with Sao Paulo and Synthetic control + better matching
	path.plot(synth.res = synth.out3,
	dataprep.res = dataprep.out3,
	Ylab = c("Homicide Rates"),
	Xlab = c("Year"),
	Legend = c("Sao Paulo","Synthetic Sao Paulo"),
	Legend.position = c("bottomleft")
	)

	#add the line at treatment
	abline(v = 1999,
	lty = 2)

	#add the arrow and text to indicate policy change
	arrows(1997, 50, 1999, 50,
	col = "black",
	length = .1)

	text(1995, 50,
	"Policy Change",
	cex = .8)

	#plot all the leave 2 out synthetic controls
	for(i in 1:10){
	lines(1993:2009,
	storegaps[,i],
	col = "darkgrey",
	lty = "solid")
	}


	#add the legend
	legend(x = "bottomleft",
	legend = c("Sao Paulo",
	"Synthetic Sao Paulo",
	"Synthetic Sao Paulo (leave-two-out)"
	),
	lty = c("solid", "dashed", "solid"),
	col = c("black", "black", "darkgrey"),
	cex = .8,
	bg = "white",
	lwdc(2, 2, 1)
	)
	# Clear the workspace
	rm(list = ls())

	# Load necessary packages
	library(dplyr) # data manipulation
	library(Synth) # models

	# Load data
	df <- read.csv("df.csv", header = TRUE)

	# Prepare dataset
	df$state <- as.character(df$state) # required by dataprep()

	# Prepare data for synth
	dataprep.out <-
	dataprep(df,
	predictors = c("state.gdp.capita",
	"population.projection.ln",
	"years.schooling.imp"
	),
	special.predictors = list(
	list("state.gdp.growth.percent", 1993:1998, "mean"),
	list("homicide.rates", 1990:1998, "mean"),
	list("proportion.extreme.poverty", 1990:1998, "mean"),
	list("gini.imp", 1990:1998, "mean")
	),
	predictors.op = "mean",
	dependent = "homicide.rates",
	unit.variable = "code",
	time.variable = "year",
	unit.names.variable = "state",
	treatment.identifier = 35,
	controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53),
	time.predictors.prior = c(1990:1998),
	time.optimize.ssr = c(1990:1998),
	time.plot = c(1990:2009)
	)


	## Permutation test
	states <- c(11:17, 21:27, 31:33, 35, 41:43, 50:53)

	# Prepare data for synth
	results <- list()
	results_synth <- list()
	gaps <- list()

	for (i in states) {
	dataprep.out <-
	dataprep(foo=df,
	predictors = c("state.gdp.capita",
	"population.projection.ln",
	"years.schooling.imp"
	),
	special.predictors = list(
	list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003
	list("homicide.rates", 1990:1998, "mean"),
	list("proportion.extreme.poverty", 1990:1998, "mean"),
	list("gini.imp", 1990:1998, "mean")
	),
	predictors.op = "mean",
	dependent = "homicide.rates",
	unit.variable = "code",
	time.variable = "year",
	unit.names.variable = "state",
	treatment.identifier = i,
	controls.identifier = states[which(states!=i)],
	time.predictors.prior = c(1990:1998),
	time.optimize.ssr = c(1990:1998),
	time.plot = c(1990:2009)

	)
	results[[as.character(i)]] <- dataprep.out
	results_synth[[as.character(i)]] <- synth(results[[as.character(i)]])
	gaps[[as.character(i)]] <- results[[as.character(i)]]$Y1plot - (results[[as.character(i)]]$Y0plot %*% results_synth[[as.character(i)]]$solution.w)

	}

	#Calculate MSPE
	MSE <- c()
	for (i in c(1:25)){
	year <- as.numeric(unlist(gaps[i]))
	MSE[i]<- mean(year[1:9]^2)
	}

	low.mspe <- MSE[-which(MSE > 2*MSE[18],)]

	# Permutation graph: states with MSPE no higher than 2x SÃ£o Paulo's
	selected_states <- match(low.mspe, MSE)

	plot(1990:2009,
	ylim = c(-30, 30),
	xlim = c(1990,2009),
	ylab = "Gap in Homicide Rates",
	xlab = "Year"
	)

	for (i in selected_states) {
	lines(1990:2009,
	gaps[[i]],
	col = "lightgrey",
	lty = "solid",
	lwd = 2
	)
	}

	lines(1990:2009,
	gaps[["35"]], # SÃ£o Paulo
	col = "black",
	lty = "solid",
	lwd = 2
	)

	abline(v = 1999,
	lty = 2)

	abline(h = 0,
	lty = 1,
	lwd = 1)

	arrows(1997, 25, 1999, 25,
	col = "black",
	length = .1)

	text(1995, 25,
	"Policy Change",
	cex = .8)

	legend(x = "bottomleft",
	legend = c("Sao Paulo",
	"Control States (MSPE Less Than Two Times That of Sao Paulo)"),
	lty = c("solid", "solid"),
	col = c("black", "darkgrey"),
	cex = .8,
	bg = "white",
	lwdc(2, 2, 1)
	)

	#Calculate MSPE
	MSE <- c()
	for (i in selected_states){
	year <- as.numeric(unlist(gaps[i]))
	MSE[i]<- mean(year[10:20]^2)
	}
	MSE <- MSE[-which(is.na(MSE),)]
	order(MSE)
	MSE[18]
	#Sao Paulo index 18
	pvalue <- 5/length(MSE)
	#####################
	### Data Analysis ###
	#####################

	## Please set your working directory to the data/ folder

	# Clear the workspace
	rm(list = ls())

	# Load necessary packages
	library(dplyr) # data manipulation
	library(Synth) # models

	# Load data
	df <- read.csv("df.csv", header = TRUE)

	# Prepare dataset
	df$state <- as.character(df$state) # required by dataprep()

	# Prepare data for synth
	dataprep.out <-
	dataprep(df,
	predictors = c("state.gdp.capita",
	"population.projection.ln",
	"years.schooling.imp"
	),
	special.predictors = list(
	list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003
	list("homicide.rates", 1990:1998, "mean"),
	list("proportion.extreme.poverty", 1990:1998, "mean"),
	list("gini.imp", 1990:1998, "mean")
	),
	predictors.op = "mean",
	dependent = "homicide.rates",
	unit.variable = "code",
	time.variable = "year",
	unit.names.variable = "state",
	treatment.identifier = 35,
	controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53),
	time.predictors.prior = c(1990:1998),
	time.optimize.ssr = c(1990:1998),
	time.plot = c(1990:2009)
	)

	# Run synth
	synth.out <- synth(dataprep.out)

	# Get result tables
	print(synth.tables <- synth.tab(
	dataprep.res = dataprep.out,
	synth.res = synth.out)
	)