BioSciEconomist/basic DID.r

## basic DID.r
# *-----------------------------------------------------------------
# | PROGRAM NAME: basic 2x2 DID.r
# | DATE:
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: simple example of DID and parallel trends
# *----------------------------------------------------------------

rm(list=ls()) # get rid of any existing data
cat("\f") # clear console
dev.off() # clear plots
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

library(dplyr)

# read toy data

cards <- "id         steps      year       time       treat
1             3700      2015      0             1
1             3800      2016      0             1
1             5000      2017      1             1
2             3700      2015      0             1
2             3800      2016      0             1
2             4700      2017      1             1
3             3700      2015      0             1
3             3800      2016      0             1
3             4450      2017      1             1
4             3700      2015      0             1
4             3800      2016      0             1
4             4600      2017      1             1
5             3700      2015      0             1
5             3800      2016      0             1
5             4500      2017      1             1
6             3700      2015      0             1
6             3800      2016      0             1
6             4300      2017      1             1
7             3700      2015      0             1
7             3800      2016      0             1
7             3800      2017      1             1
8             3700      2015      0             1
8             3800      2016      0             1
8             4000      2017      1             1
9             3700      2015      0             1
9             3800      2016      0             1
9             3900      2017      1             1
10           3700      2015      0             1
10           3800      2016      0             1
10           4200      2017      1             1
11           3700      2015      0             0
11           3800      2016      0             0
11           4300      2017      1             0
12           3700      2015      0             0
12           3800      2016      0             0
12           4300      2017      1             0
13           3700      2015      0             0
13           3800      2016      0             0
13           4300      2017      1             0
14           3500      2015      0             0
14           3600      2016      0             0
14           3900      2017      1             0
15           3600      2015      0             0
15           3700      2016      0             0
15           3700      2017      1             0
16           3700      2015      0             0
16           3800      2016      0             0
16           3800      2017      1             0
17           3650      2015      0             0
17           3750      2016      0             0
17           3900      2017      1             0
18           3400      2015      0             0
18           3500      2016      0             0
18           3850      2017      1             0
19           3300      2015      0             0
19           3400      2016      0             0
19           3825      2017      1             0
20           3500      2015      0             0
20           3600      2016      0             0
20           3610      2017      1             0"
dat <- read.table(textConnection(cards), header = TRUE)
closeAllConnections()

# let's start with the basic 2x2 design

df <- dat[dat$year %in% c(2016,2017),]


#
# analysis - regression
#

summary(lm(steps ~ treat + time + time*treat, data = df))

#
# clustered standard errors
#

library(multiwayvcov)
library(lmtest)

m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
coeftest(m1, m1.vcovCL) # results with clustered standard errors

#
# estimation using a linear mixed effects model
#

library(nlme)

mix<-lme(steps ~ time*treat, random=(~1 | id), data = df)
summary(mix)


#
# explore parallel trends for groups
#


tmp <- dat%>%group_by(treat,year) %>%
  summarize(steps = mean(steps),
            N = n())


plot(tmp$year[tmp$treat == 1], tmp$steps[tmp$treat==1], type="b", col = "green",
     xlim=c(2015,2017), ylim=c(3000,4500),xlab="year", ylab="steps", xaxt = "n")
  axis(1, at = 2015:2017)
lines(tmp$year[tmp$treat==0], tmp$steps[tmp$treat==0], col=2, type="b")
abline(v=2016, col="blue")
title(main="Outcomes by Year", sub="Green = Treatment | Red = Control")


#---------------------------------------
# exploring cluster robust standard errors and pre-post paired designs
#--------------------------------------


tmp <- dat[dat$year %in% c(2016,2017),]
tmp <- tmp[tmp$treat == 0,]
tmp <- tmp[order(tmp$time,tmp$id),]


# paired t-test


t.test(steps ~ time,
       data= tmp,
       paired=TRUE,
       conf.level=0.95)

(273.5/4)  #SE = 68.38

t.test(steps ~ time,
       data= tmp,
       paired=FALSE,
       conf.level=0.95)

(273.5/2.9) #SE = 94.31

summary(lm(steps ~ time, data = tmp)) # SE = 93.4

m1 <- lm(steps ~ time, data = tmp) # save model
m1.vcovCL <- cluster.vcov(m1, tmp$id) # get variance-covariance matrix
coeftest(m1, m1.vcovCL) # results with clustered standard errors

# SE = 70 similar to the SE we got from the paired t-test

library(nlme)

mix<-lme(steps ~ time, random=(~1 | id), data = tmp)
summary(mix)

# SE = 68.13 similar to the SE we got from the paired t-test
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: basic 2x2 DID.r
	# \| DATE:
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: simple example of DID and parallel trends
	# *----------------------------------------------------------------

	rm(list=ls()) # get rid of any existing data
	cat("\f") # clear console
	dev.off() # clear plots
	options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation

	library(dplyr)

	# read toy data

	cards <- "id steps year time treat
	1 3700 2015 0 1
	1 3800 2016 0 1
	1 5000 2017 1 1
	2 3700 2015 0 1
	2 3800 2016 0 1
	2 4700 2017 1 1
	3 3700 2015 0 1
	3 3800 2016 0 1
	3 4450 2017 1 1
	4 3700 2015 0 1
	4 3800 2016 0 1
	4 4600 2017 1 1
	5 3700 2015 0 1
	5 3800 2016 0 1
	5 4500 2017 1 1
	6 3700 2015 0 1
	6 3800 2016 0 1
	6 4300 2017 1 1
	7 3700 2015 0 1
	7 3800 2016 0 1
	7 3800 2017 1 1
	8 3700 2015 0 1
	8 3800 2016 0 1
	8 4000 2017 1 1
	9 3700 2015 0 1
	9 3800 2016 0 1
	9 3900 2017 1 1
	10 3700 2015 0 1
	10 3800 2016 0 1
	10 4200 2017 1 1
	11 3700 2015 0 0
	11 3800 2016 0 0
	11 4300 2017 1 0
	12 3700 2015 0 0
	12 3800 2016 0 0
	12 4300 2017 1 0
	13 3700 2015 0 0
	13 3800 2016 0 0
	13 4300 2017 1 0
	14 3500 2015 0 0
	14 3600 2016 0 0
	14 3900 2017 1 0
	15 3600 2015 0 0
	15 3700 2016 0 0
	15 3700 2017 1 0
	16 3700 2015 0 0
	16 3800 2016 0 0
	16 3800 2017 1 0
	17 3650 2015 0 0
	17 3750 2016 0 0
	17 3900 2017 1 0
	18 3400 2015 0 0
	18 3500 2016 0 0
	18 3850 2017 1 0
	19 3300 2015 0 0
	19 3400 2016 0 0
	19 3825 2017 1 0
	20 3500 2015 0 0
	20 3600 2016 0 0
	20 3610 2017 1 0"
	dat <- read.table(textConnection(cards), header = TRUE)
	closeAllConnections()

	# let's start with the basic 2x2 design

	df <- dat[dat$year %in% c(2016,2017),]


	#
	# analysis - regression
	#

	summary(lm(steps ~ treat + time + time*treat, data = df))

	#
	# clustered standard errors
	#

	library(multiwayvcov)
	library(lmtest)

	m1 <- lm(steps ~ treat + time + time*treat, data = df) # save model
	m1.vcovCL <- cluster.vcov(m1, df$id) # get variance-covariance matrix
	coeftest(m1, m1.vcovCL) # results with clustered standard errors

	#
	# estimation using a linear mixed effects model
	#

	library(nlme)

	mix<-lme(steps ~ time*treat, random=(~1 \| id), data = df)
	summary(mix)


	#
	# explore parallel trends for groups
	#


	tmp <- dat%>%group_by(treat,year) %>%
	summarize(steps = mean(steps),
	N = n())


	plot(tmp$year[tmp$treat == 1], tmp$steps[tmp$treat==1], type="b", col = "green",
	xlim=c(2015,2017), ylim=c(3000,4500),xlab="year", ylab="steps", xaxt = "n")
	axis(1, at = 2015:2017)
	lines(tmp$year[tmp$treat==0], tmp$steps[tmp$treat==0], col=2, type="b")
	abline(v=2016, col="blue")
	title(main="Outcomes by Year", sub="Green = Treatment \| Red = Control")


	#---------------------------------------
	# exploring cluster robust standard errors and pre-post paired designs
	#--------------------------------------


	tmp <- dat[dat$year %in% c(2016,2017),]
	tmp <- tmp[tmp$treat == 0,]
	tmp <- tmp[order(tmp$time,tmp$id),]


	# paired t-test


	t.test(steps ~ time,
	data= tmp,
	paired=TRUE,
	conf.level=0.95)

	(273.5/4) #SE = 68.38

	t.test(steps ~ time,
	data= tmp,
	paired=FALSE,
	conf.level=0.95)

	(273.5/2.9) #SE = 94.31

	summary(lm(steps ~ time, data = tmp)) # SE = 93.4

	m1 <- lm(steps ~ time, data = tmp) # save model
	m1.vcovCL <- cluster.vcov(m1, tmp$id) # get variance-covariance matrix
	coeftest(m1, m1.vcovCL) # results with clustered standard errors

	# SE = 70 similar to the SE we got from the paired t-test

	library(nlme)

	mix<-lme(steps ~ time, random=(~1 \| id), data = tmp)
	summary(mix)

	# SE = 68.13 similar to the SE we got from the paired t-test