nazlialagoz/synthdid_estimation.R

## synthdid_estimation.R
# Install and load the required packages
# devtools::install_github("synth-inference/synthdid")
library(synthdid)
library(ggplot2)
library(fixest) # Fixed-effects regression
library(data.table)

# Set seed for reproducibility
set.seed(12345)

source('sim_data.R') # Import simulation function and some utilities

dt <- sim_data()
head(dt)

# Convert the data into a matrix
setup = panel.matrices(dt, unit = 'country', time = 'period',
                       outcome = 'revenue', treatment = 'treat')

# Estimate treatment effect using SynthDiD
tau.hat = synthdid_estimate(setup$Y, setup$N0, setup$T0)
print(summary(tau.hat))

# Calculate standard errors
se = sqrt(vcov(tau.hat, method='jackknife'))
te_est <- sprintf('Point estimate for the treatment effect: %1.2f', tau.hat)
CI <- sprintf('95%% CI (%1.2f, %1.2f)', tau.hat - 1.96 * se, tau.hat + 1.96 * se)

# Plot treatment effect estimates
plot(tau.hat)
plot(tau.hat, se.method='jackknife')

# Plot control unit contributions
synthdid_units_plot(tau.hat, se.method='jackknife') +
  labs(x = 'Country', y = 'Treatment effect',
       caption = 'The black horizontal line shows the actual effect;
       the gray ones show the endpoints of a 95% confidence interval.')
ggsave('../figures/unit_weights.png')

# Check for pre-treatment parallel trends
plot(tau.hat, overlay=1, se.method='jackknife')
ggsave('../figures/results_simple.png')

# Check the number of treatment and control countries to report
num_treated <- length(unique(dt[treat==1]$country))
num_control <- length(unique(dt$country))-num_treated

# Create spaghetti plot with top 10 control units
top.controls = synthdid_controls(tau.hat)[1:10, , drop=FALSE]
plot(tau.hat, spaghetti.units=rownames(top.controls),
     trajectory.linetype = 1, line.width=.75,
     trajectory.alpha=.9, effect.alpha=.9,
     diagram.alpha=1, onset.alpha=.9, ci.alpha = .3, spaghetti.line.alpha	=.2,
     spaghetti.label.alpha = .1, overlay = 1) +
  labs(x = 'Period', y = 'Revenue', title = 'Estimation Results',
       subtitle = paste0(te_est, ', ', CI, '.'),
       caption = paste0('The number of treatment and control units: ', num_treated, ' and ', num_control, '.'))
ggsave('../figures/results.png')

fe <- feols(revenue~treat, dt, cluster = 'country', panel.id = 'country',
      fixef = c('country', 'period'))
summary(fe)

## synthdid_sim_data.R
# Simulate the data
library(data.table)
library(tidyverse)

select <- dplyr::select
set.seed(123)

# Initiate the data -----------------------------------------------------------
# Create a function that takes in the number of units, the number of periods,
# a treatment effect that will accumulate overtime (tau), and
# cohort periods (in which periods a cohort is treated).

calc_dep_var <- function(constant, unit_fe, period_fe, treatment_effect, error){
  dep_var = constant + unit_fe + period_fe + treatment_effect + error
}

init_data <- function(num_unit, num_period, treatment_effect, constant, treatment_period){
  end_period = num_period-1 # periods start from 0
  # Fixed Effects ------------------------------------------------
  # unit fixed effects
  unit <- tibble(
    unit = 1:num_unit,
    unit_fe = rnorm(num_unit, 0, .01),
    # Assign units to  treatment vs  nontreatment
    treatment_group = sample(c(0,1), num_unit, replace = TRUE, prob = c(.88,.12)))

  # period fixed effects
  period <- tibble(
    period = 0:end_period,
    period_fe = rnorm(num_period, 0, .01))

  # Trend Break -------------------------------------------------------------

  # make main dataset
  # full interaction of unit X period
  tot_num_obs = num_unit*num_period

  expand_grid(unit = 1:num_unit, period = 0:end_period) %>%
    left_join(., unit) %>%
    left_join(., period) %>%
    # make error term and get treatment indicators and treatment effects
    mutate(error = rnorm(tot_num_obs, 0, .1),
           treat = ifelse(treatment_group ==1 & period >= treatment_period, 1, 0),
           treatment_effect = ifelse(treat == 1, treatment_effect, 0)) %>%
    # calculate the dependent variable
    mutate(dep_var = calc_dep_var(constant, unit_fe, period_fe, treatment_effect,error))
}

sim_data <- function(...){
  constant = 80
  data <- as.data.table(init_data(num_unit = 30,
                                  num_period = 30,
                                  treatment_effect = -.5,
                                  treatment_period = 20,
                                  constant = constant))
  data[, c('unit_fe', 'error', 'period_fe', 'treatment_effect', 'treatment_group'):=NULL]
  setnames(data, 'dep_var', 'revenue')
  setkeyv(data, c('unit', 'period'))
  setnames(data, 'unit', 'country')

  return(data)
}
	# Install and load the required packages
	# devtools::install_github("synth-inference/synthdid")
	library(synthdid)
	library(ggplot2)
	library(fixest) # Fixed-effects regression
	library(data.table)

	# Set seed for reproducibility
	set.seed(12345)

	source('sim_data.R') # Import simulation function and some utilities

	dt <- sim_data()
	head(dt)

	# Convert the data into a matrix
	setup = panel.matrices(dt, unit = 'country', time = 'period',
	outcome = 'revenue', treatment = 'treat')

	# Estimate treatment effect using SynthDiD
	tau.hat = synthdid_estimate(setup$Y, setup$N0, setup$T0)
	print(summary(tau.hat))

	# Calculate standard errors
	se = sqrt(vcov(tau.hat, method='jackknife'))
	te_est <- sprintf('Point estimate for the treatment effect: %1.2f', tau.hat)
	CI <- sprintf('95%% CI (%1.2f, %1.2f)', tau.hat - 1.96 * se, tau.hat + 1.96 * se)

	# Plot treatment effect estimates
	plot(tau.hat)
	plot(tau.hat, se.method='jackknife')

	# Plot control unit contributions
	synthdid_units_plot(tau.hat, se.method='jackknife') +
	labs(x = 'Country', y = 'Treatment effect',
	caption = 'The black horizontal line shows the actual effect;
	the gray ones show the endpoints of a 95% confidence interval.')
	ggsave('../figures/unit_weights.png')

	# Check for pre-treatment parallel trends
	plot(tau.hat, overlay=1, se.method='jackknife')
	ggsave('../figures/results_simple.png')

	# Check the number of treatment and control countries to report
	num_treated <- length(unique(dt[treat==1]$country))
	num_control <- length(unique(dt$country))-num_treated

	# Create spaghetti plot with top 10 control units
	top.controls = synthdid_controls(tau.hat)[1:10, , drop=FALSE]
	plot(tau.hat, spaghetti.units=rownames(top.controls),
	trajectory.linetype = 1, line.width=.75,
	trajectory.alpha=.9, effect.alpha=.9,
	diagram.alpha=1, onset.alpha=.9, ci.alpha = .3, spaghetti.line.alpha =.2,
	spaghetti.label.alpha = .1, overlay = 1) +
	labs(x = 'Period', y = 'Revenue', title = 'Estimation Results',
	subtitle = paste0(te_est, ', ', CI, '.'),
	caption = paste0('The number of treatment and control units: ', num_treated, ' and ', num_control, '.'))
	ggsave('../figures/results.png')

	fe <- feols(revenue~treat, dt, cluster = 'country', panel.id = 'country',
	fixef = c('country', 'period'))
	summary(fe)
	# Simulate the data
	library(data.table)
	library(tidyverse)

	select <- dplyr::select
	set.seed(123)

	# Initiate the data -----------------------------------------------------------
	# Create a function that takes in the number of units, the number of periods,
	# a treatment effect that will accumulate overtime (tau), and
	# cohort periods (in which periods a cohort is treated).

	calc_dep_var <- function(constant, unit_fe, period_fe, treatment_effect, error){
	dep_var = constant + unit_fe + period_fe + treatment_effect + error
	}

	init_data <- function(num_unit, num_period, treatment_effect, constant, treatment_period){
	end_period = num_period-1 # periods start from 0
	# Fixed Effects ------------------------------------------------
	# unit fixed effects
	unit <- tibble(
	unit = 1:num_unit,
	unit_fe = rnorm(num_unit, 0, .01),
	# Assign units to treatment vs nontreatment
	treatment_group = sample(c(0,1), num_unit, replace = TRUE, prob = c(.88,.12)))

	# period fixed effects
	period <- tibble(
	period = 0:end_period,
	period_fe = rnorm(num_period, 0, .01))

	# Trend Break -------------------------------------------------------------

	# make main dataset
	# full interaction of unit X period
	tot_num_obs = num_unit*num_period

	expand_grid(unit = 1:num_unit, period = 0:end_period) %>%
	left_join(., unit) %>%
	left_join(., period) %>%
	# make error term and get treatment indicators and treatment effects
	mutate(error = rnorm(tot_num_obs, 0, .1),
	treat = ifelse(treatment_group ==1 & period >= treatment_period, 1, 0),
	treatment_effect = ifelse(treat == 1, treatment_effect, 0)) %>%
	# calculate the dependent variable
	mutate(dep_var = calc_dep_var(constant, unit_fe, period_fe, treatment_effect,error))
	}

	sim_data <- function(...){
	constant = 80
	data <- as.data.table(init_data(num_unit = 30,
	num_period = 30,
	treatment_effect = -.5,
	treatment_period = 20,
	constant = constant))
	data[, c('unit_fe', 'error', 'period_fe', 'treatment_effect', 'treatment_group'):=NULL]
	setnames(data, 'dep_var', 'revenue')
	setkeyv(data, c('unit', 'period'))
	setnames(data, 'unit', 'country')

	return(data)
	}