Skip to content

Instantly share code, notes, and snippets.

@srosh2000
Last active June 12, 2023 11:11
Show Gist options
  • Save srosh2000/f52600b76999e88f0fe316e8f23b419e to your computer and use it in GitHub Desktop.
Save srosh2000/f52600b76999e88f0fe316e8f23b419e to your computer and use it in GitHub Desktop.
2 x 2 difference-in-difference estimation example
# Load packages
library(googledrive)
library(data.table)
library(fixest)
library(broom)
library(ggplot2)
library(dplyr)
library(zoo)
# download data from google drive
drive_deauth()
file_id_gr<- "1voW5dWUiplIzWHa-KnJx0lh9lz7h1Wzf"
file_id_am<- "1E0HG5ZCSF939rTE7gRPHUP6QFS43JXDm"
drive_download(as_id(file_id_gr), overwrite = TRUE)
drive_download(as_id(file_id_am), overwrite = TRUE)
# load data
goodreads_subset<- fread("gr_did.csv")
amazon_subset<- fread("am_did.csv")
### --- DATA PREPARATION ---###
# create dummy "qa" which takes 0 if before may 21st, 2014 and 1 for after
cutoff_date<- as.Date('2014-05-21')
amazon_subset$qa<- ifelse(amazon_subset$unixReviewTime < cutoff_date, 0, 1)
amazon_subset$goodr <- 0
amazon_subset$goodr_after<- 0
# now for goodreads
goodreads_subset$goodr <- 1
goodreads_subset$qa <- ifelse(goodreads_subset$date < cutoff_date, 0, 1)
goodreads_subset$goodr_after<- goodreads_subset$qa * goodreads_subset$goodr
# add time fixed effects to both datasets
# check which dataset has the oldest observation
amazon_subset %>% arrange(unixReviewTime) %>% head() # oldest: 1996-11-14
goodreads_subset %>% arrange(date) %>% head() # oldest: 2007-01-06
first_date<- as.Date('1996-11-14')
# compute time fixed effect as number of months since first review
amazon_subset$t<- (as.yearmon(amazon_subset$unixReviewTime) - as.yearmon(first_date))*12
goodreads_subset$t<- (as.yearmon(goodreads_subset$date) - as.yearmon(first_date))*12
# merge both datasets
GoodAma <- select (amazon_subset,-c( reviewerID))
goodreads_subset <- goodreads_subset[ , c("asin","t", "rating", "date", "year_month", "goodr", "qa", "goodr_after")]
GoodAma <- dplyr::rename(GoodAma, rating = overall)
GoodAma <- dplyr::rename(GoodAma, date = unixReviewTime)
goodreads_subset$date<- as.Date(goodreads_subset$date) # resolve error of unmatched date class attribute
goodreads_subset$year_month<- as.Date(as.yearmon(goodreads_subset$year_month)) # same as above to match classes of year_month
GoodAma$date<- as.Date(GoodAma$date)
GoodAma$year_month<- as.Date(as.yearmon(GoodAma$year_month))
GoodAma <- rbind(GoodAma, goodreads_subset, fill = TRUE) # FINAL DATASET
means<- GoodAma %>%
group_by(goodr, qa) %>%
summarize(rating=mean(rating)) %>%
ungroup()
print(means)
# add labels
library(dplyr)
# Modify the labels using mutate and case_when
means_modified <- means %>%
mutate(
goodr_label = case_when(
goodr == 0 ~ "Amazon",
goodr == 1 ~ "Goodreads"
),
qa_label = case_when(
qa == 0 ~ "Before",
qa == 1 ~ "After"
)
)
# Print the modified table
print(means_modified)
# compute treatment effect
# before-after difference for untreated, has time effect only
bef_aft_untreated <- filter(means, goodr == 0, qa == 1)$rating - filter(means, goodr == 0, qa == 0)$rating
# before-after for treated, has time and treatment effect
bef_aft_treated<- filter(means, goodr == 1, qa == 1)$rating - filter(means, goodr == 1, qa == 0)$rating
# difference-in-difference: take time +treatment effect, and remove the time effect
did<- bef_aft_treated - bef_aft_untreated
print(paste("Diff in Diff Estimate: " , did))
### --- ANALYSIS --- ###
# Check if parallel trends assumption holds by visualising
temp<- GoodAma %>% filter(goodr == 1) %>% group_by(t) %>% summarize(mean_goodreads = mean(rating), n_obs = n()) %>% filter(n_obs > 100)
temp2<- GoodAma %>% filter(goodr == 0) %>% group_by(t) %>% summarize(mean_amazon = mean(rating), n_obs = n()) %>% filter(n_obs >100)
plot_data<- temp %>% left_join(temp2, by = "t")
plot_avg_rating<- ggplot(plot_data, (aes(x = t)))+
geom_line(aes(y = mean_goodreads), color = "red")+
geom_line(aes(y = mean_amazon), color = "blue")+
geom_vline(xintercept = 210, linetype = "longdash")+
ylab("Mean Rating")+
xlab("Number of periods") +
theme_bw()+
annotate("text", x = 175, y = 4.3, label = "Amazon", size = 5.5, color = "blue")+
annotate("text", x = 164, y = 3.88, label = "Goodreads", size = 5.5, color = "red") +
annotate("text", x = 225, y = 4.12, label = "QA launch", size = 5)+
theme(plot.title = element_text(hjust = 0.5))
plot_avg_rating
# Simple linear regression
model_1<- lm(rating ~ goodr*qa, data = GoodAma)
tidy(model_1, conf.int = TRUE)
# time fixed effects only model
model_2<- feols(rating ~ goodr*qa
|
year_month,
data = GoodAma)
tidy(model_2, conf.int = TRUE)
# both time and book fixed effects model
model_3<- feols(rating ~ goodr*qa
|
year_month + asin,
data = GoodAma)
tidy(model_3, conf.int = TRUE)
# time and book fixed effects with clustered SE
model_4<- feols(rating ~ goodr*qa
|
year_month + asin,
cluster = c("asin"),
data = GoodAma)
tidy(model_4, conf.int = TRUE)
# Get all results in summary table
install.packages("modelsummary")
library(modelsummary)
models <- list(
"(1)" = model_1,
"(2)" = model_2,
"(3)" = model_3,
"(4)" = model_4)
msummary(models)
# add variable names to the output
cm = c('goodr' = 'Treated (1 = Goodreads)',
'qa' = 'After (1 = post QA)',
'goodr:qa' = 'Treatment Effect (treated x after)')
# include table notes and caption
notetable <- c(
"Notes: Model 1 is the simple regression. Model 2 includes time fixed effects. Model 3 includes time and book fixed effects. Model 4 includes time and book fixed effects and also clustered standard errors by books.
Standard errors are in parentheses.",
"***Significant at the 1 percent level.",
"**Significant at the 5 percent level.",
"*Significant at the 10 percent level.")
titletable <- 'Table 1— All DiD Estimation Results'
# get summary of results
msummary(models,
output = "table1.md",
stars = c('*' = .1, '**' = 0.05, '***' = .01),
estimate = "{estimate}",
statistic = "[{std.error}]{stars}",
coef_map = cm,
gof_omit = 'AIC|BIC|RMSE|Within|Log.Lik.|Std.Errors',
notes = notetable,
title = titletable)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment