Last active
November 2, 2022 21:50
-
-
Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.
Basic difference in differences in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: basic 2x2 DID.r | |
# | DATE: | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: simple example of DID and parallel trends | |
# *---------------------------------------------------------------- | |
rm(list=ls()) # get rid of any existing data | |
cat("\f") # clear console | |
dev.off() # clear plots | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
library(dplyr) | |
# read toy data | |
cards <- "id steps year time treat | |
1 3700 2015 0 1 | |
1 3800 2016 0 1 | |
1 5000 2017 1 1 | |
2 3700 2015 0 1 | |
2 3800 2016 0 1 | |
2 4700 2017 1 1 | |
3 3700 2015 0 1 | |
3 3800 2016 0 1 | |
3 4450 2017 1 1 | |
4 3700 2015 0 1 | |
4 3800 2016 0 1 | |
4 4600 2017 1 1 | |
5 3700 2015 0 1 | |
5 3800 2016 0 1 | |
5 4500 2017 1 1 | |
6 3700 2015 0 1 | |
6 3800 2016 0 1 | |
6 4300 2017 1 1 | |
7 3700 2015 0 1 | |
7 3800 2016 0 1 | |
7 3800 2017 1 1 | |
8 3700 2015 0 1 | |
8 3800 2016 0 1 | |
8 4000 2017 1 1 | |
9 3700 2015 0 1 | |
9 3800 2016 0 1 | |
9 3900 2017 1 1 | |
10 3700 2015 0 1 | |
10 3800 2016 0 1 | |
10 4200 2017 1 1 | |
11 3700 2015 0 0 | |
11 3800 2016 0 0 | |
11 4300 2017 1 0 | |
12 3700 2015 0 0 | |
12 3800 2016 0 0 | |
12 4300 2017 1 0 | |
13 3700 2015 0 0 | |
13 3800 2016 0 0 | |
13 4300 2017 1 0 | |
14 3500 2015 0 0 | |
14 3600 2016 0 0 | |
14 3900 2017 1 0 | |
15 3600 2015 0 0 | |
15 3700 2016 0 0 | |
15 3700 2017 1 0 | |
16 3700 2015 0 0 | |
16 3800 2016 0 0 | |
16 3800 2017 1 0 | |
17 3650 2015 0 0 | |
17 3750 2016 0 0 | |
17 3900 2017 1 0 | |
18 3400 2015 0 0 | |
18 3500 2016 0 0 | |
18 3850 2017 1 0 | |
19 3300 2015 0 0 | |
19 3400 2016 0 0 | |
19 3825 2017 1 0 | |
20 3500 2015 0 0 | |
20 3600 2016 0 0 | |
20 3610 2017 1 0" | |
dat <- read.table(textConnection(cards), header = TRUE) | |
closeAllConnections() | |
# let's start with the basic 2x2 design | |
df <- dat[dat$year %in% c(2016,2017),] | |
# | |
# analysis - regression | |
# | |
summary(lm(steps ~ treat + time + time*treat, data = df)) | |
# | |
# clustered standard errors | |
# | |
library(multiwayvcov) | |
library(lmtest) | |
m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model | |
m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix | |
coeftest(m1, m1.vcovCL) # results with clustered standard errors | |
# | |
# estimation using a linear mixed effects model | |
# | |
library(nlme) | |
mix<-lme(steps ~ time*treat, random=(~1 | id), data = df) | |
summary(mix) | |
# | |
# explore parallel trends for groups | |
# | |
tmp <- dat%>%group_by(treat,year) %>% | |
summarize(steps = mean(steps), | |
N = n()) | |
plot(tmp$year[tmp$treat == 1], tmp$steps[tmp$treat==1], type="b", col = "green", | |
xlim=c(2015,2017), ylim=c(3000,4500),xlab="year", ylab="steps", xaxt = "n") | |
axis(1, at = 2015:2017) | |
lines(tmp$year[tmp$treat==0], tmp$steps[tmp$treat==0], col=2, type="b") | |
abline(v=2016, col="blue") | |
title(main="Outcomes by Year", sub="Green = Treatment | Red = Control") | |
#--------------------------------------- | |
# exploring cluster robust standard errors and pre-post paired designs | |
#-------------------------------------- | |
tmp <- dat[dat$year %in% c(2016,2017),] | |
tmp <- tmp[tmp$treat == 0,] | |
tmp <- tmp[order(tmp$time,tmp$id),] | |
# paired t-test | |
t.test(steps ~ time, | |
data= tmp, | |
paired=TRUE, | |
conf.level=0.95) | |
(273.5/4) #SE = 68.38 | |
t.test(steps ~ time, | |
data= tmp, | |
paired=FALSE, | |
conf.level=0.95) | |
(273.5/2.9) #SE = 94.31 | |
summary(lm(steps ~ time, data = tmp)) # SE = 93.4 | |
m1 <- lm(steps ~ time, data = tmp) # save model | |
m1.vcovCL <- cluster.vcov(m1, tmp$id) # get variance-covariance matrix | |
coeftest(m1, m1.vcovCL) # results with clustered standard errors | |
# SE = 70 similar to the SE we got from the paired t-test | |
library(nlme) | |
mix<-lme(steps ~ time, random=(~1 | id), data = tmp) | |
summary(mix) | |
# SE = 68.13 similar to the SE we got from the paired t-test | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment