Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active October 6, 2021 17:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save BioSciEconomist/6eb824527c03e12372667fb8861299bd to your computer and use it in GitHub Desktop.
Save BioSciEconomist/6eb824527c03e12372667fb8861299bd to your computer and use it in GitHub Desktop.
Intuition for how synthetic control methods work
# *-----------------------------------------------------------------
# | PROGRAM NAME: ex toy synthetic controls.R
# | DATE: 4/23/19
# | CREATED BY: MATT BOGARD
# | PROJECT FILE: Macintosh HD ▸ ⁨Users⁩ ▸ ⁨amandabogard⁩ ▸ ⁨Google Dive⁩ ▸ ⁨R Training⁩
# *----------------------------------------------------------------
# | PURPOSE: intuition for how sytnthetic control methods work
# *----------------------------------------------------------------
library(Synth) # load Synth package
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
df = read.csv("/Users/amandabogard/Google Drive/R Training/toysynth.txt") # read data
head(df)
df$state = as.character(df$state) # Synth requires our unit name variable to be character format
# run Synth's data prep function
dataprep.out=
dataprep(
foo = df,
predictors = c("X1", "X2", "X3"), # predictors we are using to create controls units
predictors.op = "mean", # predictors will be aggregated via averaging
dependent = "Y", # our outcome variable
unit.variable = "ID", # this identifies the units of aggregation or level of analysis (state level)
time.variable = "year", # our panel data is tracked by year in our file
# special.predictors lets you control the time periods for certain predictor variables
# i.e. we will use the pre-period outcome variable Y for years 1999, 1995, 1990
# in addition to the predictors to create our synthetic control units
special.predictors = list(
list("Y", 1990:1995, "mean") , # use theese year values for Y in the pre period as a 'matching' variable
list("X1", 1990:1995, "mean") , # use theese year values for X1 in the pre period as a 'matching' variable
list("X2", 1990:1995, "mean"), # use theese year values for X2 in the pre period as a 'matching' variable
list("X3", 1990:1995, "mean") # use theese year values for X3 in the pre period as a 'matching' variable
),
treatment.identifier = 1, # 7 indicates california is our treatment group
controls.identifier = c(2,3,4), # these states are part of our control pool which will be weighted to create synthetic controls
time.predictors.prior = c(1990:1995), # numeric vector identifying the pretreatment periods over which the values for the outcome predictors should be averaged
time.optimize.ssr = c(1990:1995), # A numeric vector identifying the periods of the dependent variable over which the loss function should be minimized
unit.names.variable = "state", # character string identifying the column with the names of the units. This variable has to be of mode character.
time.plot = 1990:1998 # vector identifying the periods over which results are to be plotted
)
synth.out = synth(dataprep.out) # create synthetic controls
# plot treatment vs synthetic control outcomes trend
path.plot(synth.res = synth.out, dataprep.res = dataprep.out,
Ylab = "made up outcome", Xlab = "year",
Legend = c("KY","Synthetic KY"),
Legend.position = "bottomright")
# gaps or differencesin treatment and synthetic control
gaps <- dataprep.out$Y1plot - (dataprep.out$Y0plot %*% synth.out$solution.w)
# pre built tables from synth objects
synth.tables <- synth.tab(dataprep.res = dataprep.out,synth.res = synth.out)
# comparing pre-treatment predictor values for the treated unit, the synthetic control unit, and all the units in the sample
synth.tables$tab.pred[1:5, ] # check balance across treated and control for pre-period predictors
# view control unit weights
synth.tables$tab.w
# toy data source read as csv
#ID,year,state,Y,X1,X2,X3
#1,1990,KY,.45,50000,25,10
#1,1991,KY,.45,51000,26,10
#1,1992,KY,.46,52000,27,10
#1,1993,KY,.48,52000,28,10
#1,1994,KY,.48,52000,28,10
#1,1995,KY,.48,53000,27,15
#1,1996,KY,.49,53000,24,15
#1,1997,KY,.50,54000,24,15
#1,1998,KY,.51,55000,23,15
#2,1990,TN,.45,52000,23,12
#2,1991,TN,.45,51000,23,12
#2,1992,TN,.44,53000,24,12
#2,1993,TN,.45,51000,26,12
#2,1994,TN,.44,52000,25,12
#2,1995,TN,.43,54000,26,14
#2,1996,TN,.42,54000,25,14
#2,1997,TN,.40,55000,26,14
#2,1998,TN,.41,56000,25,14
#3,1990,CA,.89,102000,10,20
#3,1991,CA,.90,102500,11,20
#3,1992,CA,.90,103000,13,20
#3,1993,CA,.92,103500,12,20
#3,1994,CA,.93,104000,11,20
#3,1995,CA,.93,104000,12,25
#3,1996,CA,.94,104500,14,25
#3,1997,CA,.94,105000,12,25
#3,1998,CA,.95,105000,10,25
#4,1990,IN,.43,52000,25,10
#4,1991,IN,.44,52000,26,10
#4,1992,IN,.42,53000,26,10
#4,1993,IN,.46,53500,27,10
#4,1994,IN,.45,53500,28,10
#4,1995,IN,.46,54000,26,12
#4,1996,IN,.47,54000,26,12
#4,1997,IN,.45,54500,25,12
#4,1998,IN,.46,55000,24,12
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment