Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active November 2, 2022 21:50
Show Gist options
  • Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/d61b75805fa0ca44f138cf13fe4a6815 to your computer and use it in GitHub Desktop.
Basic difference in differences in R
# *-----------------------------------------------------------------
# | PROGRAM NAME: basic 2x2 DID.r
# | DATE:
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: simple example of DID and parallel trends
# *----------------------------------------------------------------
rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
library(dplyr)
# read toy data
cards <- "id steps year time treat
1 3700 2015 0 1
1 3800 2016 0 1
1 5000 2017 1 1
2 3700 2015 0 1
2 3800 2016 0 1
2 4700 2017 1 1
3 3700 2015 0 1
3 3800 2016 0 1
3 4450 2017 1 1
4 3700 2015 0 1
4 3800 2016 0 1
4 4600 2017 1 1
5 3700 2015 0 1
5 3800 2016 0 1
5 4500 2017 1 1
6 3700 2015 0 1
6 3800 2016 0 1
6 4300 2017 1 1
7 3700 2015 0 1
7 3800 2016 0 1
7 3800 2017 1 1
8 3700 2015 0 1
8 3800 2016 0 1
8 4000 2017 1 1
9 3700 2015 0 1
9 3800 2016 0 1
9 3900 2017 1 1
10 3700 2015 0 1
10 3800 2016 0 1
10 4200 2017 1 1
11 3700 2015 0 0
11 3800 2016 0 0
11 4300 2017 1 0
12 3700 2015 0 0
12 3800 2016 0 0
12 4300 2017 1 0
13 3700 2015 0 0
13 3800 2016 0 0
13 4300 2017 1 0
14 3500 2015 0 0
14 3600 2016 0 0
14 3900 2017 1 0
15 3600 2015 0 0
15 3700 2016 0 0
15 3700 2017 1 0
16 3700 2015 0 0
16 3800 2016 0 0
16 3800 2017 1 0
17 3650 2015 0 0
17 3750 2016 0 0
17 3900 2017 1 0
18 3400 2015 0 0
18 3500 2016 0 0
18 3850 2017 1 0
19 3300 2015 0 0
19 3400 2016 0 0
19 3825 2017 1 0
20 3500 2015 0 0
20 3600 2016 0 0
20 3610 2017 1 0"
dat <- read.table(textConnection(cards), header = TRUE)
closeAllConnections()
# let's start with the basic 2x2 design
df <- dat[dat$year %in% c(2016,2017),]
#
# analysis - regression
#
summary(lm(steps ~ treat + time + time*treat, data = df))
#
# clustered standard errors
#
library(multiwayvcov)
library(lmtest)
m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
coeftest(m1, m1.vcovCL) # results with clustered standard errors
#
# estimation using a linear mixed effects model
#
library(nlme)
mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
summary(mix)
#
# explore parallel trends for groups
#
tmp <- dat%>%group_by(treat,year) %>%
summarize(steps = mean(steps),
N = n())
plot(tmp$year[tmp$treat == 1], tmp$steps[tmp$treat==1], type="b", col = "green",
xlim=c(2015,2017), ylim=c(3000,4500),xlab="year", ylab="steps", xaxt = "n")
axis(1, at = 2015:2017)
lines(tmp$year[tmp$treat==0], tmp$steps[tmp$treat==0], col=2, type="b")
abline(v=2016, col="blue")
title(main="Outcomes by Year", sub="Green = Treatment | Red = Control")
#---------------------------------------
# exploring cluster robust standard errors and pre-post paired designs
#--------------------------------------
tmp <- dat[dat$year %in% c(2016,2017),]
tmp <- tmp[tmp$treat == 0,]
tmp <- tmp[order(tmp$time,tmp$id),]
# paired t-test
t.test(steps ~ time,
data= tmp,
paired=TRUE,
conf.level=0.95)
(273.5/4) #SE = 68.38
t.test(steps ~ time,
data= tmp,
paired=FALSE,
conf.level=0.95)
(273.5/2.9) #SE = 94.31
summary(lm(steps ~ time, data = tmp)) # SE = 93.4
m1 <- lm(steps ~ time, data = tmp) # save model
m1.vcovCL <- cluster.vcov(m1, tmp$id) # get variance-covariance matrix
coeftest(m1, m1.vcovCL) # results with clustered standard errors
# SE = 70 similar to the SE we got from the paired t-test
library(nlme)
mix<-lme(steps ~ time, random=(~1 | id), data = tmp)
summary(mix)
# SE = 68.13 similar to the SE we got from the paired t-test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment