Last active
February 24, 2023 00:38
-
-
Save nazlialagoz/85d0713f157ffeacdc71b581a0da07d0 to your computer and use it in GitHub Desktop.
Difference-in-differences with variation in treatment timing - Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulation study for the DiD article: | |
rm(list = ls()) | |
library(data.table) | |
library(fastDummies) # Create dummy variables | |
library(fixest) # Fixed-effects regression | |
library(kableExtra) # Make nice tables | |
library(bacondecomp) # Goodman-Bacon Decomposition | |
library(did) # Difference-in-differences | |
source('sim_data.R') # Import simulation function and some utilities | |
data <- sim_data() | |
# EDA and Analysis -------------------------------------------------------- | |
# Check out the data | |
select_cols <- c('unit', 'period', 'cohort_period','treat','hrs_listened') | |
kable(head(data[, ..select_cols]), 'simple') | |
kable(summary(data[, ..select_cols]), 'simple') | |
data[, .N, by = cohort_period] # group sizes | |
data[, .(mean_hrs_listened = mean(hrs_listened)), by = cohort_period] | |
# Visualize the outcome variable | |
avg_dv_period <- data[, .(mean_hrs_listened = mean(hrs_listened)), by = c('cohort_period','period')] | |
ggplot(avg_dv_period, aes(fill=factor(cohort_period), y=mean_hrs_listened, x=period)) + | |
geom_bar(position=position_dodge2(), stat="identity") + coord_cartesian(ylim=c(79,85))+ | |
labs(x = "Period", y = "Hours", title = 'Average music listening (hours)', | |
caption = 'Cohort 2 is the early treated, and cohort 3 is the late treated group.', | |
subtitle = 'Treatment effect heterogeneity across cohorts') + | |
theme(legend.position = 'bottom', | |
axis.title = element_text(size = 14), | |
axis.text = element_text(size = 12)) + scale_fill_manual(values=cbPalette) + | |
geom_vline(xintercept = 1.5, color = '#0072B2', lty = 5)+ | |
geom_vline(xintercept = 2.5, color = '#D55E00', lty = 5) + | |
geom_text(label = 'Cohort period 2 is treated',aes(1.4,83), color = '#0072B2', angle = 90)+ | |
geom_text(label = 'Cohort period 3 is treated',aes(2.4,83), color = '#D55E00', angle = 90) + | |
guides(fill=guide_legend(title="Treatment cohort period")) | |
# Visualize treatment effects | |
avg_treat_period <- data[treat == 1, .(mean_treat_effect = mean(tau_cum)), by = c('cohort_period','period')] | |
plot_te <- ggplot(avg_treat_period, aes(fill=factor(cohort_period), y=mean_treat_effect, x=period)) + | |
geom_bar(position=position_dodge2(preserve = "single"), stat="identity") + | |
labs(x = "Period", y = "Hours", title = 'True treatment effects (hrs)', | |
caption = 'Cohort 2 is the early treated and cohort 3 is the late treated group.', | |
subtitle = 'Treatment effect heterogeneity across cohorts') + | |
theme(legend.position = 'bottom', | |
axis.title = element_text(size = 14), | |
axis.text = element_text(size = 12)) + scale_fill_manual(values=cbPalette) + | |
guides(fill=guide_legend(title="Treatment cohort period")) + | |
scale_x_continuous(breaks = unique(data$period)) + | |
scale_y_continuous(breaks = round(unique(avg_treat_period$mean_treat_effect))) | |
plot_te | |
# A) Canonical DiD ------------------------------------------------------------- | |
formula <- as.formula('hrs_listened ~ treat') | |
canonical_did <- feols(formula, | |
data = data, panel.id = "unit", | |
fixef = c("unit", "period"), cluster = "unit") | |
summary(canonical_did) | |
# Bacon Decomposition | |
# bacon_decomp <- bacon(formula, data, id_var="unit", time_var='period', quietly = F) | |
# sum(bacon_decomp$weight * bacon_decomp$estimate) | |
# B) DiD with post-period-cohort specific TEs ---------------------------------- | |
# Drop periods where everyone is treated | |
data <- data[period < 3] | |
# Create dummy variables | |
data <- data %>% | |
dummy_cols(select_columns = c("cohort_period", "period")) | |
interact_covs <- 'cohort_period_2:period_2' | |
# Regression | |
formula <- as.formula(paste0('hrs_listened ~ ',interact_covs)) | |
model <- feols(formula, | |
data = data, panel.id = "unit", | |
fixef = c("unit", "period"), cluster = "unit") | |
summary(model) | |
# did package by Santanna & Callaway | |
out <- att_gt(yname = "hrs_listened", | |
gname = "cohort_period", | |
idname = "unit", | |
tname = "period", | |
xformla = ~1, | |
data = data, | |
est_method = "reg", | |
control_group = 'notyettreated' | |
) | |
out | |
es <- aggte(out, type = "dynamic") | |
group_effects <- aggte(out, type = "group") | |
ggdid(out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment