Skip to content

Instantly share code, notes, and snippets.

@nazlialagoz
Last active February 24, 2023 00:38
Show Gist options
  • Save nazlialagoz/85d0713f157ffeacdc71b581a0da07d0 to your computer and use it in GitHub Desktop.
Save nazlialagoz/85d0713f157ffeacdc71b581a0da07d0 to your computer and use it in GitHub Desktop.
Difference-in-differences with variation in treatment timing - Analysis
# Simulation study for the DiD article:
rm(list = ls())
library(data.table)
library(fastDummies) # Create dummy variables
library(fixest) # Fixed-effects regression
library(kableExtra) # Make nice tables
library(bacondecomp) # Goodman-Bacon Decomposition
library(did) # Difference-in-differences
source('sim_data.R') # Import simulation function and some utilities
data <- sim_data()
# EDA and Analysis --------------------------------------------------------
# Check out the data
select_cols <- c('unit', 'period', 'cohort_period','treat','hrs_listened')
kable(head(data[, ..select_cols]), 'simple')
kable(summary(data[, ..select_cols]), 'simple')
data[, .N, by = cohort_period] # group sizes
data[, .(mean_hrs_listened = mean(hrs_listened)), by = cohort_period]
# Visualize the outcome variable
avg_dv_period <- data[, .(mean_hrs_listened = mean(hrs_listened)), by = c('cohort_period','period')]
ggplot(avg_dv_period, aes(fill=factor(cohort_period), y=mean_hrs_listened, x=period)) +
geom_bar(position=position_dodge2(), stat="identity") + coord_cartesian(ylim=c(79,85))+
labs(x = "Period", y = "Hours", title = 'Average music listening (hours)',
caption = 'Cohort 2 is the early treated, and cohort 3 is the late treated group.',
subtitle = 'Treatment effect heterogeneity across cohorts') +
theme(legend.position = 'bottom',
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)) + scale_fill_manual(values=cbPalette) +
geom_vline(xintercept = 1.5, color = '#0072B2', lty = 5)+
geom_vline(xintercept = 2.5, color = '#D55E00', lty = 5) +
geom_text(label = 'Cohort period 2 is treated',aes(1.4,83), color = '#0072B2', angle = 90)+
geom_text(label = 'Cohort period 3 is treated',aes(2.4,83), color = '#D55E00', angle = 90) +
guides(fill=guide_legend(title="Treatment cohort period"))
# Visualize treatment effects
avg_treat_period <- data[treat == 1, .(mean_treat_effect = mean(tau_cum)), by = c('cohort_period','period')]
plot_te <- ggplot(avg_treat_period, aes(fill=factor(cohort_period), y=mean_treat_effect, x=period)) +
geom_bar(position=position_dodge2(preserve = "single"), stat="identity") +
labs(x = "Period", y = "Hours", title = 'True treatment effects (hrs)',
caption = 'Cohort 2 is the early treated and cohort 3 is the late treated group.',
subtitle = 'Treatment effect heterogeneity across cohorts') +
theme(legend.position = 'bottom',
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)) + scale_fill_manual(values=cbPalette) +
guides(fill=guide_legend(title="Treatment cohort period")) +
scale_x_continuous(breaks = unique(data$period)) +
scale_y_continuous(breaks = round(unique(avg_treat_period$mean_treat_effect)))
plot_te
# A) Canonical DiD -------------------------------------------------------------
formula <- as.formula('hrs_listened ~ treat')
canonical_did <- feols(formula,
data = data, panel.id = "unit",
fixef = c("unit", "period"), cluster = "unit")
summary(canonical_did)
# Bacon Decomposition
# bacon_decomp <- bacon(formula, data, id_var="unit", time_var='period', quietly = F)
# sum(bacon_decomp$weight * bacon_decomp$estimate)
# B) DiD with post-period-cohort specific TEs ----------------------------------
# Drop periods where everyone is treated
data <- data[period < 3]
# Create dummy variables
data <- data %>%
dummy_cols(select_columns = c("cohort_period", "period"))
interact_covs <- 'cohort_period_2:period_2'
# Regression
formula <- as.formula(paste0('hrs_listened ~ ',interact_covs))
model <- feols(formula,
data = data, panel.id = "unit",
fixef = c("unit", "period"), cluster = "unit")
summary(model)
# did package by Santanna & Callaway
out <- att_gt(yname = "hrs_listened",
gname = "cohort_period",
idname = "unit",
tname = "period",
xformla = ~1,
data = data,
est_method = "reg",
control_group = 'notyettreated'
)
out
es <- aggte(out, type = "dynamic")
group_effects <- aggte(out, type = "group")
ggdid(out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment