Created
September 1, 2023 08:46
-
-
Save srosh2000/13be23a4a37e16d21d2842e704b2c60d to your computer and use it in GitHub Desktop.
Staggered DiD example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load packages | |
library(data.table) | |
library(dplyr) | |
library(tidyr) | |
library(zoo) | |
library(did) | |
library(lubridate) | |
library(googledrive) | |
library(ggplot2) | |
#### --- Load data ---### | |
# Download data from google drive | |
drive_deauth() | |
file_id<- "1e-VogI4zQPi_xbSydVLH7PrgkOlWpczl" | |
drive_download(as_id(file_id), overwrite = TRUE) | |
GoodAma_sample<- fread("GoodAma_sample.csv") | |
# SETTING UP THE DATA FOR ESTIMATION | |
# Get the minimum and maximum review dates | |
min_date <- min(GoodAma_sample$year_month) | |
max_date <- max(GoodAma_sample$year_month) | |
# Create a dataframe with all unique dates in the range | |
date_range <- seq(min_date, max_date, by = "month") | |
date_df <- data.frame(date = date_range) | |
# Assign time period values to the date dataframe | |
date_df$period <- seq_len(nrow(date_df)) | |
# convert question time to year month variable | |
GoodAma_sample$question_ym<- as.Date(as.yearmon(GoodAma_sample$cleaned_question_time)) | |
# Merge the review dates with the date dataframe to get the corresponding time period labels | |
merged_df <- merge(GoodAma_sample, date_df, by.x = "year_month", by.y = "date", all.x = TRUE) | |
# Merge the question time with the date dataframe to get the corresponding time period values | |
merged_df <- merge(merged_df, date_df, by.x = "question_ym", by.y = "date", all.x = TRUE) | |
# Rename the merged columns for review and question time | |
names(merged_df)[names(merged_df) == "period.x"] <- "period_review" | |
names(merged_df)[names(merged_df) == "period.y"] <- "period_question" | |
# Add condition to set period_question to zero for control group | |
merged_df$period_question <- ifelse(merged_df$goodr == 0, 0, merged_df$period_question) | |
merged_df$period_review <- ifelse(merged_df$goodr == 0, 0, merged_df$period_review) | |
# Change Amazon's ASIN to identify never treated group (otherwise `goodr`was the treatment group indicator) | |
# This step is crucial because otherwise the package cannot identify the control group as both Goodreads and Amazon books are matched with the same ASIN | |
merged_df$asin[which(merged_df$goodr == 0)] = paste0(merged_df$asin[which(merged_df$goodr==0)],"1") | |
# create treatment dummy | |
merged_df$treat <- ifelse(merged_df$goodr == 0, 0, ifelse(merged_df$period_review >= merged_df$period_question, 1, 0)) | |
# --- ESTIMATION USING NOT-YET-TREATED UNITS AS CONTROL --- # | |
# subset to just goodreads data and estimate with "not-yet-treated" as control group | |
goodreads_df <- merged_df[merged_df$goodr == 1, ] | |
# convert all columns to numeric | |
goodreads_df$period_review<- as.numeric(goodreads_df$period_review) | |
goodreads_df$asin<- as.numeric(as.factor(goodreads_df$asin)) | |
staggered_notyettreated <- att_gt(yname = "rating", | |
tname = "period_review", | |
idname = "asin", | |
gname = "period_question", | |
control_group = "notyettreated", | |
data = goodreads_df, | |
allow_unbalanced_panel = TRUE | |
) | |
summary(staggered_notyettreated) | |
#ggdid(staggered_notyettreated) no need to plot this right? | |
# aggregate the group-time average treatment effects | |
staggered_notyettreated_aggregate<- aggte(staggered_notyettreated, type = "dynamic", na.rm = TRUE) | |
summary(staggered_notyettreated_aggregate) | |
staggered_notyettreated_plot<- ggdid(staggered_notyettreated_aggregate)+ labs(x = "Time Relative to Q&A Adoption (in 30-day bins)", y = "ATT") | |
print(staggered_notyettreated_plot) | |
# --- ESTIMATION USING NEVER-TREATED UNITS AS CONTROL --- # | |
# Select required columns | |
stag_did <- merged_df[, c("asin", "rating", "period_review", "period_question","treat","fiction")] | |
# convert all columns to numeric | |
stag_did$period_review<- as.numeric(stag_did$period_review) | |
stag_did$asin<- as.numeric(as.factor(stag_did$asin)) | |
# estimation | |
staggered_nevertreated <- att_gt(yname = "rating", | |
tname = "period_review", | |
idname = "asin", | |
gname = "period_question", | |
data = stag_did, | |
control_group = "nevertreated", | |
allow_unbalanced_panel = TRUE, | |
clustervars = "asin") | |
summary(staggered_nevertreated) | |
# aggregate effects | |
staggered_nevertreated_aggregated<- aggte(staggered_nevertreated, type = "dynamic", na.rm = TRUE) | |
summary(staggered_nevertreated_aggregated) | |
staggered_nevertreated_aggregated_plot <- ggdid(staggered_nevertreated_aggregated) + labs(x = "Time Relative to Q&A Adoption (in 30-day bins)", y = "ATT") | |
print(staggered_nevertreated_aggregated_plot) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment