Skip to content

Instantly share code, notes, and snippets.

@acarril
Last active February 24, 2021 13:24
Show Gist options
  • Save acarril/270f0fd4e5ae8fff039216c3a9aad764 to your computer and use it in GitHub Desktop.
Save acarril/270f0fd4e5ae8fff039216c3a9aad764 to your computer and use it in GitHub Desktop.
ECO539B - Homework 3
setwd("/Users/alvaro/Dropbox/Princeton/2021-Spring/539B/03-IV/hw03")
### Load libraries
library(tidyverse)
library(haven) # import .dta
library(sandwich) # vcovHC()
library(clubSandwich) # vcovCR()
library(dfadjust)
library(progress)
library(brew)
### Read and prepare data
fam <- read_dta("famine.dta")
fam <- fam %>%
mutate(
lgrain_pred_fam = lgrain_pred * famine,
lgrain_pred_invfam = lgrain_pred * (1 - famine)
)
fam_sub <- filter(fam, year >= 1953 & year <= 1965)
### Compute main regression and extract betas
reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = fam)
betas <- coef(reg)[2:3]
reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = fam_sub)
betas_sub <- coef(reg_sub)[2:3]
### (a) No clustering
# (i), (ii) HC standard errors
sigma_hc0 <- diag(sqrt(vcovHC(reg, type = "HC0")[2:3, 2:3]))
sigma_hc1 <- diag(sqrt(vcovHC(reg, type = "HC1")[2:3, 2:3]))
sigma_hc2 <- diag(sqrt(vcovHC(reg, type = "HC2")[2:3, 2:3]))
sigma_hc0_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC0")[2:3, 2:3]))
sigma_hc1_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC1")[2:3, 2:3]))
sigma_hc2_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC2")[2:3, 2:3]))
### (b) With clustering
# (i), (ii) CR standard errors
sigma_cr0 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR0")[2:3, 2:3]))
sigma_cr1 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR1")[2:3, 2:3]))
sigma_cr2 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR2")[2:3, 2:3]))
sigma_cr0_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR0")[2:3, 2:3]))
sigma_cr1_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR1")[2:3, 2:3]))
sigma_cr2_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR2")[2:3, 2:3]))
### (iii) Effective standard errors
# to compute the effective degrees of freedom, we use the package by Imbens and Kolesar
# (a) No clustering
reg_adj <- dfadjustSE(reg, IK = F)
df_eff <- reg_adj$coefficients[2:3,5]
sigma_eff <- sigma_hc2 * qt(0.975, df = df_eff) / 1.96
reg_adj_sub <- dfadjustSE(reg_sub, IK = F)
df_eff_sub <- reg_adj_sub$coefficients[2:3,5]
sigma_eff_sub <- sigma_hc2_sub * qt(0.975, df = df_eff_sub) / 1.96
# (b) With clustering
reg_cl_adj <- dfadjustSE(reg, clustervar = as.factor(fam$prov), IK = F)
df_cl_eff <- reg_cl_adj$coefficients[2:3,5]
sigma_cl_eff <- sigma_cr2 * qt(0.975, df = df_cl_eff) / 1.96
reg_cl_adj_sub <- dfadjustSE(reg_sub, clustervar = as.factor(fam_sub$prov), IK = F)
df_cl_eff_sub <- reg_cl_adj_sub$coefficients[2:3,5]
sigma_cl_eff_sub <- sigma_cr2_sub * qt(0.975, df = df_cl_eff_sub) / 1.96
### (iv), (v) Boostrap
B <- 50000
N <- dim(fam)[1]
N_sub <- dim(fam_sub)[1]
provs <- unique(fam$prov)
provs_sub <- unique(fam_sub$prov)
Nclusters <- length(provs)
Nclusters_sub <- length(provs_sub)
bs_estimates <- matrix(data = NA, nrow = B, ncol = 2)
bs_estimates_sub <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats_sub <- matrix(data = NA, nrow = B, ncol = 2)
bs_estimates_c <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats_c <- matrix(data = NA, nrow = B, ncol = 2)
bs_estimates_c_sub <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats_c_sub <- matrix(data = NA, nrow = B, ncol = 2)
pb <- progress_bar$new(total = B, format = "[:bar] :current/:total (:percent)")
for (b in 1:B){
pb$tick()
# (a) No clustering
dat <- sample_n(fam, size = N, replace = TRUE)
dat_sub <- sample_n(fam_sub, size = N_sub, replace = TRUE)
bs_reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat)
bs_reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat_sub)
for (c in 2:3){
bs_estimates[b, c-1] <- coef(bs_reg)[c]
bs_estimates_sub[b, c-1] <- coef(bs_reg_sub)[c]
bs_tstats[b, c-1] <- sqrt(N) * (coef(bs_reg)[c] - coef(reg)[c]) / sqrt(vcovHC(bs_reg, type = "HC1")[c, c])
bs_tstats_sub[b, c-1] <- sqrt(N_sub) * (coef(bs_reg_sub)[c] - coef(reg_sub)[c]) / sqrt(vcovHC(bs_reg_sub, type = "HC1")[c,c])
}
# (b) With clustering
provb <- sample(provs, size = Nclusters, replace = T)
provb_sub <- sample(provs_sub, size = Nclusters_sub, replace = T)
dat <- filter(fam, prov %in% provb)
dat_sub <- filter(fam_sub, prov %in% provb_sub)
bs_reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat)
bs_reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat_sub)
for (c in 2:3){
bs_estimates_c [b, c-1] <- coef(bs_reg)[c]
bs_estimates_c_sub[b, c-1] <- coef(bs_reg_sub)[c]
bs_tstats_c [b, c-1] <- sqrt(N) * (coef(bs_reg)[c] - coef(reg)[c]) / sqrt(vcovCR(bs_reg, cluster = dat$prov, type = "CR1")[c,c])
bs_tstats_c_sub [b, c-1] <- sqrt(N_sub) * (coef(bs_reg_sub)[c] - coef(reg_sub)[c]) / sqrt(vcovCR(bs_reg_sub, cluster = dat_sub$prov, type = "CR1")[c, c])
}
}
# compute se (no cluster)
bs_sigma <- c(sd(bs_estimates[, 1]), sd(bs_estimates[, 2]))
names(bs_sigma) <- names(coef(reg)[2:3])
bs_sigma_sub <- c(sd(bs_estimates_sub[, 1]), sd(bs_estimates_sub[, 2]))
names(bs_sigma_sub) <- names(coef(reg_sub)[2:3])
# compute se (cluster)
bs_sigma_cl <- c(sd(bs_estimates_c[,1]), sd(bs_estimates_c[,2]))
names(bs_sigma_cl) <- names(coef(reg)[2:3])
bs_sigma_cl_sub <- c(sd(bs_estimates_c_sub[,1]), sd(bs_estimates_c_sub[,2]))
names(bs_sigma_cl_sub) <- names(coef(reg_sub)[2:3])
# confidence intervals
lb_all <- numeric(2)
ub_all <- numeric(2)
lb_sub <- numeric(2)
ub_sub <- numeric(2)
lb_all_c <- numeric(2)
ub_all_c <- numeric(2)
lb_sub_c <- numeric(2)
ub_sub_c <- numeric(2)
for (b in 1:2){
lb_all [b] = betas[b] - quantile(bs_tstats[,b], probs = 1 - 0.05 / 2) * sigma_hc1[b] / sqrt(N)
ub_all [b] = betas[b] - quantile(bs_tstats[,b], probs = 0.05 / 2) * sigma_hc1[b] / sqrt(N)
lb_sub [b] = betas_sub[b] - quantile(bs_tstats_sub[,b], probs = 1 - 0.05 / 2) * sigma_hc1_sub[b] / sqrt(N_sub)
ub_sub [b] = betas_sub[b] - quantile(bs_tstats_sub[,b], probs = 0.05 / 2) * sigma_hc1_sub[b] / sqrt(N_sub)
lb_all_c[b] = betas[b] - quantile(bs_tstats_c[,b], probs = 1 - 0.05 / 2) * sigma_cr1[b] / sqrt(N)
ub_all_c[b] = betas[b] - quantile(bs_tstats_c[,b], probs = 0.05 / 2) * sigma_cr1[b] / sqrt(N)
lb_sub_c[b] = betas_sub[b] - quantile(bs_tstats_c_sub[,b], probs = 1 - 0.05 / 2) * sigma_cr1_sub[b] / sqrt(N_sub)
ub_sub_c[b] = betas_sub[b] - quantile(bs_tstats_c_sub[,b], probs = 0.05 / 2) * sigma_cr1_sub[b] / sqrt(N_sub)
}
# sink(file = "standard_errors.tex")
# brew("SE_template.brew")
# sink(file = NULL)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment