Last active
October 6, 2021 17:24
-
-
Save BioSciEconomist/6eb824527c03e12372667fb8861299bd to your computer and use it in GitHub Desktop.
Intuition for how synthetic control methods work
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: ex toy synthetic controls.R | |
# | DATE: 4/23/19 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: Macintosh HD ▸ Users ▸ amandabogard ▸ Google Dive ▸ R Training | |
# *---------------------------------------------------------------- | |
# | PURPOSE: intuition for how sytnthetic control methods work | |
# *---------------------------------------------------------------- | |
library(Synth) # load Synth package | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
df = read.csv("/Users/amandabogard/Google Drive/R Training/toysynth.txt") # read data | |
head(df) | |
df$state = as.character(df$state) # Synth requires our unit name variable to be character format | |
# run Synth's data prep function | |
dataprep.out= | |
dataprep( | |
foo = df, | |
predictors = c("X1", "X2", "X3"), # predictors we are using to create controls units | |
predictors.op = "mean", # predictors will be aggregated via averaging | |
dependent = "Y", # our outcome variable | |
unit.variable = "ID", # this identifies the units of aggregation or level of analysis (state level) | |
time.variable = "year", # our panel data is tracked by year in our file | |
# special.predictors lets you control the time periods for certain predictor variables | |
# i.e. we will use the pre-period outcome variable Y for years 1999, 1995, 1990 | |
# in addition to the predictors to create our synthetic control units | |
special.predictors = list( | |
list("Y", 1990:1995, "mean") , # use theese year values for Y in the pre period as a 'matching' variable | |
list("X1", 1990:1995, "mean") , # use theese year values for X1 in the pre period as a 'matching' variable | |
list("X2", 1990:1995, "mean"), # use theese year values for X2 in the pre period as a 'matching' variable | |
list("X3", 1990:1995, "mean") # use theese year values for X3 in the pre period as a 'matching' variable | |
), | |
treatment.identifier = 1, # 7 indicates california is our treatment group | |
controls.identifier = c(2,3,4), # these states are part of our control pool which will be weighted to create synthetic controls | |
time.predictors.prior = c(1990:1995), # numeric vector identifying the pretreatment periods over which the values for the outcome predictors should be averaged | |
time.optimize.ssr = c(1990:1995), # A numeric vector identifying the periods of the dependent variable over which the loss function should be minimized | |
unit.names.variable = "state", # character string identifying the column with the names of the units. This variable has to be of mode character. | |
time.plot = 1990:1998 # vector identifying the periods over which results are to be plotted | |
) | |
synth.out = synth(dataprep.out) # create synthetic controls | |
# plot treatment vs synthetic control outcomes trend | |
path.plot(synth.res = synth.out, dataprep.res = dataprep.out, | |
Ylab = "made up outcome", Xlab = "year", | |
Legend = c("KY","Synthetic KY"), | |
Legend.position = "bottomright") | |
# gaps or differencesin treatment and synthetic control | |
gaps <- dataprep.out$Y1plot - (dataprep.out$Y0plot %*% synth.out$solution.w) | |
# pre built tables from synth objects | |
synth.tables <- synth.tab(dataprep.res = dataprep.out,synth.res = synth.out) | |
# comparing pre-treatment predictor values for the treated unit, the synthetic control unit, and all the units in the sample | |
synth.tables$tab.pred[1:5, ] # check balance across treated and control for pre-period predictors | |
# view control unit weights | |
synth.tables$tab.w | |
# toy data source read as csv | |
#ID,year,state,Y,X1,X2,X3 | |
#1,1990,KY,.45,50000,25,10 | |
#1,1991,KY,.45,51000,26,10 | |
#1,1992,KY,.46,52000,27,10 | |
#1,1993,KY,.48,52000,28,10 | |
#1,1994,KY,.48,52000,28,10 | |
#1,1995,KY,.48,53000,27,15 | |
#1,1996,KY,.49,53000,24,15 | |
#1,1997,KY,.50,54000,24,15 | |
#1,1998,KY,.51,55000,23,15 | |
#2,1990,TN,.45,52000,23,12 | |
#2,1991,TN,.45,51000,23,12 | |
#2,1992,TN,.44,53000,24,12 | |
#2,1993,TN,.45,51000,26,12 | |
#2,1994,TN,.44,52000,25,12 | |
#2,1995,TN,.43,54000,26,14 | |
#2,1996,TN,.42,54000,25,14 | |
#2,1997,TN,.40,55000,26,14 | |
#2,1998,TN,.41,56000,25,14 | |
#3,1990,CA,.89,102000,10,20 | |
#3,1991,CA,.90,102500,11,20 | |
#3,1992,CA,.90,103000,13,20 | |
#3,1993,CA,.92,103500,12,20 | |
#3,1994,CA,.93,104000,11,20 | |
#3,1995,CA,.93,104000,12,25 | |
#3,1996,CA,.94,104500,14,25 | |
#3,1997,CA,.94,105000,12,25 | |
#3,1998,CA,.95,105000,10,25 | |
#4,1990,IN,.43,52000,25,10 | |
#4,1991,IN,.44,52000,26,10 | |
#4,1992,IN,.42,53000,26,10 | |
#4,1993,IN,.46,53500,27,10 | |
#4,1994,IN,.45,53500,28,10 | |
#4,1995,IN,.46,54000,26,12 | |
#4,1996,IN,.47,54000,26,12 | |
#4,1997,IN,.45,54500,25,12 | |
#4,1998,IN,.46,55000,24,12 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment