Skip to content

Instantly share code, notes, and snippets.

@acarril
Last active March 10, 2021 13:58
Show Gist options
  • Save acarril/3f0de5efa45c1cbb79bc4cc7676aec30 to your computer and use it in GitHub Desktop.
Save acarril/3f0de5efa45c1cbb79bc4cc7676aec30 to your computer and use it in GitHub Desktop.
setwd("/Users/alvaro/Dropbox/Princeton/2021-Spring/539B/03-IV/hw03")
### Load libraries
library(tidyverse)
library(haven) # import .dta
library(sandwich) # vcovHC()
library(clubSandwich) # vcovCR()
library(dfadjust)
library(progress)
library(brew)
### Read and prepare data
fam <- read_dta("famine.dta")
fam <- fam %>%
mutate(
lgrain_pred_fam = lgrain_pred * famine,
lgrain_pred_invfam = lgrain_pred * (1 - famine)
)
fam_sub <- filter(fam, year >= 1953 & year <= 1965)
### Compute main regression and extract betas
reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = fam)
betas <- coef(reg)[2:3]
reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = fam_sub)
betas_sub <- coef(reg_sub)[2:3]
### (a) No clustering
# (i), (ii) HC standard errors
sigma_hc0 <- diag(sqrt(vcovHC(reg, type = "HC0")[2:3, 2:3]))
sigma_hc1 <- diag(sqrt(vcovHC(reg, type = "HC1")[2:3, 2:3]))
sigma_hc2 <- diag(sqrt(vcovHC(reg, type = "HC2")[2:3, 2:3]))
sigma_hc0_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC0")[2:3, 2:3]))
sigma_hc1_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC1")[2:3, 2:3]))
sigma_hc2_sub <- diag(sqrt(vcovHC(reg_sub, type = "HC2")[2:3, 2:3]))
### (b) With clustering
# (i), (ii) CR standard errors
sigma_cr0 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR0")[2:3, 2:3]))
sigma_cr1 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR1")[2:3, 2:3]))
sigma_cr2 <- diag(sqrt(vcovCR(reg, cluster = fam$prov, type = "CR2")[2:3, 2:3]))
sigma_cr0_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR0")[2:3, 2:3]))
sigma_cr1_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR1")[2:3, 2:3]))
sigma_cr2_sub <- diag(sqrt(vcovCR(reg_sub, cluster = fam_sub$prov, type = "CR2")[2:3, 2:3]))
### (iii) Effective standard errors
# to compute the effective degrees of freedom, we use the package by Imbens and Kolesar
# (a) No clustering
reg_adj <- dfadjustSE(reg, IK = F)
df_eff <- reg_adj$coefficients[2:3,5]
sigma_eff <- sigma_hc2 * qt(0.975, df = df_eff) / 1.96
reg_adj_sub <- dfadjustSE(reg_sub, IK = F)
df_eff_sub <- reg_adj_sub$coefficients[2:3,5]
sigma_eff_sub <- sigma_hc2_sub * qt(0.975, df = df_eff_sub) / 1.96
# (b) With clustering
reg_cl_adj <- dfadjustSE(reg, clustervar = as.factor(fam$prov), IK = F)
df_cl_eff <- reg_cl_adj$coefficients[2:3,5]
sigma_cl_eff <- sigma_cr2 * qt(0.975, df = df_cl_eff) / 1.96
reg_cl_adj_sub <- dfadjustSE(reg_sub, clustervar = as.factor(fam_sub$prov), IK = F)
df_cl_eff_sub <- reg_cl_adj_sub$coefficients[2:3,5]
sigma_cl_eff_sub <- sigma_cr2_sub * qt(0.975, df = df_cl_eff_sub) / 1.96
### (iv), (v) Boostrap
B <- 50000
N <- dim(fam)[1]
N_sub <- dim(fam_sub)[1]
provs <- unique(fam$prov)
provs_sub <- unique(fam_sub$prov)
Nclusters <- length(provs)
Nclusters_sub <- length(provs_sub)
bs_estimates <- matrix(data = NA, nrow = B, ncol = 2)
bs_estimates_sub <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats_sub <- matrix(data = NA, nrow = B, ncol = 2)
bs_estimates_c <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats_c <- matrix(data = NA, nrow = B, ncol = 2)
bs_estimates_c_sub <- matrix(data = NA, nrow = B, ncol = 2)
bs_tstats_c_sub <- matrix(data = NA, nrow = B, ncol = 2)
pb <- progress_bar$new(total = B, format = "[:bar] :current/:total (:percent)")
for (b in 1:B){
pb$tick()
# (a) No clustering
dat <- sample_n(fam, size = N, replace = TRUE)
dat_sub <- sample_n(fam_sub, size = N_sub, replace = TRUE)
bs_reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat)
bs_reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat_sub)
for (c in 2:3){
bs_estimates[b, c-1] <- coef(bs_reg)[c]
bs_estimates_sub[b, c-1] <- coef(bs_reg_sub)[c]
bs_tstats[b, c-1] <- sqrt(N) * (coef(bs_reg)[c] - coef(reg)[c]) / sqrt(vcovHC(bs_reg, type = "HC1")[c, c])
bs_tstats_sub[b, c-1] <- sqrt(N_sub) * (coef(bs_reg_sub)[c] - coef(reg_sub)[c]) / sqrt(vcovHC(bs_reg_sub, type = "HC1")[c,c])
}
# (b) With clustering
provb <- sample(provs, size = Nclusters, replace = T)
provb_sub <- sample(provs_sub, size = Nclusters_sub, replace = T)
dat <- filter(fam, prov %in% provb)
dat_sub <- filter(fam_sub, prov %in% provb_sub)
bs_reg <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat)
bs_reg_sub <- lm(ldeaths ~ lgrain_pred_fam + lgrain_pred_invfam + ltotpop + lurbpop + factor(year), data = dat_sub)
for (c in 2:3){
bs_estimates_c [b, c-1] <- coef(bs_reg)[c]
bs_estimates_c_sub[b, c-1] <- coef(bs_reg_sub)[c]
bs_tstats_c [b, c-1] <- sqrt(N) * (coef(bs_reg)[c] - coef(reg)[c]) / sqrt(vcovCR(bs_reg, cluster = dat$prov, type = "CR1")[c,c])
bs_tstats_c_sub [b, c-1] <- sqrt(N_sub) * (coef(bs_reg_sub)[c] - coef(reg_sub)[c]) / sqrt(vcovCR(bs_reg_sub, cluster = dat_sub$prov, type = "CR1")[c, c])
}
}
# compute se (no cluster)
bs_sigma <- c(sd(bs_estimates[, 1]), sd(bs_estimates[, 2]))
names(bs_sigma) <- names(coef(reg)[2:3])
bs_sigma_sub <- c(sd(bs_estimates_sub[, 1]), sd(bs_estimates_sub[, 2]))
names(bs_sigma_sub) <- names(coef(reg_sub)[2:3])
# compute se (cluster)
bs_sigma_cl <- c(sd(bs_estimates_c[,1]), sd(bs_estimates_c[,2]))
names(bs_sigma_cl) <- names(coef(reg)[2:3])
bs_sigma_cl_sub <- c(sd(bs_estimates_c_sub[,1]), sd(bs_estimates_c_sub[,2]))
names(bs_sigma_cl_sub) <- names(coef(reg_sub)[2:3])
# confidence intervals
lb_all <- numeric(2)
ub_all <- numeric(2)
lb_sub <- numeric(2)
ub_sub <- numeric(2)
lb_all_c <- numeric(2)
ub_all_c <- numeric(2)
lb_sub_c <- numeric(2)
ub_sub_c <- numeric(2)
for (b in 1:2){
lb_all [b] = betas[b] - quantile(bs_tstats[,b], probs = 1 - 0.05 / 2) * sigma_hc1[b] / sqrt(N)
ub_all [b] = betas[b] - quantile(bs_tstats[,b], probs = 0.05 / 2) * sigma_hc1[b] / sqrt(N)
lb_sub [b] = betas_sub[b] - quantile(bs_tstats_sub[,b], probs = 1 - 0.05 / 2) * sigma_hc1_sub[b] / sqrt(N_sub)
ub_sub [b] = betas_sub[b] - quantile(bs_tstats_sub[,b], probs = 0.05 / 2) * sigma_hc1_sub[b] / sqrt(N_sub)
lb_all_c[b] = betas[b] - quantile(bs_tstats_c[,b], probs = 1 - 0.05 / 2) * sigma_cr1[b] / sqrt(N)
ub_all_c[b] = betas[b] - quantile(bs_tstats_c[,b], probs = 0.05 / 2) * sigma_cr1[b] / sqrt(N)
lb_sub_c[b] = betas_sub[b] - quantile(bs_tstats_c_sub[,b], probs = 1 - 0.05 / 2) * sigma_cr1_sub[b] / sqrt(N_sub)
ub_sub_c[b] = betas_sub[b] - quantile(bs_tstats_c_sub[,b], probs = 0.05 / 2) * sigma_cr1_sub[b] / sqrt(N_sub)
}
# sink(file = "standard_errors.tex")
# brew("SE_template.brew")
# sink(file = NULL)
# using Random, Distributions
using ProgressMeter
# using LaTeXTabulars, LaTeXStrings
using CSV, DataFrames
using Statistics
using LinearAlgebra
using Optim
using LatexPrint, IOCapture
using Plots, LaTeXStrings
using LaTeXTabulars, Printf
using Random
using Distributed
Random.seed!(0303)
ddc = CSV.read("07-SimulationInference/hw04/ddc.csv", DataFrame; header = ["t$t" for t in 1:10])
Y = convert(Matrix, ddc)
n, T = size(ddc)
M = 10
"""
Compute moments and optimal weight matrix.
Input:
- Y ... T×n array of data
Output:
- π_hat ... 2×1 array of moments given data (real or simulated)
- W ... 2×2 weight matrix
"""
function computeMoments(Y; returnW = false)
T, n = size(Y)
"""
Compute the 2 S_i values for observation i.
Input:
- Y_i ... Tx1 array of data corresponding to observation i
Output:
- S_i ... 2x1 array of S values for observation i
"""
function compute_S_i(Y_i)
T = size(Y_i, 1)
S_i1 = 0
for t in 2:T
S_i1 = S_i1 + Y_i[t] * Y_i[t-1]
end
S_i2 = 0
for t in 3:T
S_i2 = S_i2 + Y_i[t] * Y_i[t-2]
end
S_i = [1/(T-1) * S_i1; 1/(T-2) * S_i2]
end
# Compute S_i for all i, and compute π_hat
S = mapslices(compute_S_i, Y; dims = 1)'
π_hat = mean(S, dims = 1)
# Compute weight matrix
Vxx = 1/n .* sum(S.^2, dims = 1) - π_hat.^2
Vxy = 1/n * sum( (S[:, 1] .- π_hat[1]) .* (S[:, 2] .- π_hat[2]) )
W = inv([Vxx[1] Vxy; Vxy Vxx[2]])
# Return what's requested
if returnW
return π_hat', W
else
return π_hat'
end
end
π_hat, W = computeMoments(Y; returnW = true)
### Export π_hat and W hat
# Capture stdout because LatexPrint is annoying
W_latex = IOCapture.capture() do
lap(round.(W, digits = 2))
end
π_hat_latex = IOCapture.capture() do
lap(round.(π_hat, digits = 3))
end
# Write stdout to file
open("07-SimulationInference/hw04/W_hat.tex", "w") do io
write(io, W_latex.output)
end
open("07-SimulationInference/hw04/pi_hat.tex", "w") do io
write(io, π_hat_latex.output)
end
ϵ = randn(10 * size(Y, 1), size(Y, 2))
ξ = randn(10 * size(Y, 1))
ρ = 0.5
function simulateY(ρ, Y, ϵ, ξ)
n, T = size(Y)
U_pre = sqrt(1/(1 - ρ^2)) * ξ
nM = 10 * n
simY = Array{Int}(undef, nM, T)
for t in 1:T
U_t = ρ .* U_pre + ϵ[:, t]
simY[:, t] = (U_t .>= 0)
U_pre = U_t
end
return simY
end
function computeQ(ρ)
simY = simulateY(ρ, Y, ϵ, ξ)
π_tilde = computeMoments(simY')
Q = ((π_hat - π_tilde)' * W * (π_hat - π_tilde))[1]
end
# Compute Q over fine grid
gridStep = 0.00001
grid = 0 : gridStep : 1 - gridStep
# @showprogress Q = map(computeQ, grid)
Q = @showprogress [computeQ(ρ) for ρ in grid]
# Plot Q and ρ ∈ [0,1]
plot(grid, Q, label = :none, xlabel = L"\rho", ylabel = "Objective", size = (400, 300))
savefig("07-SimulationInference/hw04/Q_rho.pdf")
# Export argmin Q(ρ)
argminQ = round(grid[findmin(Q)[2]], digits = 3)
open("07-SimulationInference/hw04/argminQ.tex", "w") do io
write(io, "$argminQ")
end
# Compute ρ_II with optimizer and export results
res = @time optimize(computeQ, 0.0, 1.0, GoldenSection())
argminQ_optim = round(res.minimizer, digits = 3)
minQ_optim = @sprintf("%.3e", res.minimum)
open("07-SimulationInference/hw04/minQ_optim.tex", "w") do io
write(io, "$minQ_optim")
end
# Export Q(ρ) for different values
latex_tabular(
"07-SimulationInference/hw04/rho_ii_values.tex",
Tabular("lccc"),
[Rule(:top),
[L"\rho", argminQ_optim, 0.6, 0.1],
# [L"\widehat{Q}(\rho)", minQ_optim, Q[60000], Q[10000]],
[L"\widehat{Q}(\rho)", "\$7.102 \\times 10^{-4}\$", "\$1.205 \\times 10^{-3}\$", "\$1.700 \\times 10^{-1}\$"],
Rule(:bottom)])
# Explore how sensitive results are to different draws of ϵ
S = 1000
ρOptimSim = Array{Float64}(undef, S, 2)
@showprogress for s in 1:S
ϵ = randn(10 * size(Y, 1), size(Y, 2))
ξ = randn(10 * size(Y, 1))
res = optimize(computeQ, 0.0, 1.0, GoldenSection())
ρOptimSim[s, :] = [res.minimizer res.minimum]
end
histogram(ρOptimSim[:, 1], legend = :none, xlabel = L"\hat{\rho}_{II}", size = (400, 300))
ρ_II_mu = round(mean(ρOptimSim[:, 1]), digits = 3)
ρ_II_std = round(std(ρOptimSim[:, 1]), digits = 3)
annotate!(.4, 175, text(L"\mu = %$ρ_II_mu", :left, 9))
annotate!(.4, 160, text(L"\sigma = %$ρ_II_std", :left, 9))
savefig("07-SimulationInference/hw04/rhoOptimSim.pdf")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment