library(fixest)
library(dqrng)
library(tictoc) # For timing
data(trade, package = "fixest")
tic("Initial feols")
est = feols(Euros ~ dist_km | Destination + Origin, data = trade)
toc()
#> Initial feols: 0.023 sec elapsed
cl = trade$Origin
unique_clusters = unique(cl)
G = length(unique_clusters)
cli::cli_h3("\nLeave-cluster-out estimates")
#>
#> ── Leave-cluster-out estimates
# Leave-cluster-out estimates
tic("Estimate using feols")
for (clust in unique_clusters) {
subset = trade[cl != clust, ]
lo_est = feols(
Euros ~ dist_km | Destination + Origin,
data = subset, only.coef = TRUE
)
}
time_feols = toc()
#> Estimate using feols: 0.133 sec elapsed
tic("Estimate using est_env")
for (clust in unique(cl)) {
# Need to get env object from est each time because est_env will update the env object.
core_env = update(est, only.coef = TRUE, only.env = TRUE)
include = (cl != clust)
if (identical(core_env$weights.value, 1)) {
w = as.numeric(include)
} else {
w = core_env$weights.value * include
}
# Updates the estimation environment dropping sample
lco_est = fixest::est_env(core_env, weights = w)
}
time_est_env = toc()
#> Estimate using est_env: 0.113 sec elapsed
cli::cli_h3("VCOV CR3")
#>
#> ── VCOV CR3
tic("Using summclust")
vcov_summclust = summclust::vcov_CR3J(
est, cluster = "Origin", absorb_cluster_fixef = FALSE, return_all = TRUE, type = "CRV3J"
)
toc()
#> Using summclust: 0.127 sec elapsed
sqrt(diag(vcov_summclust$vcov))["dist_km"]
#> dist_km
#> 13039.27
# Using est_env
tic("Using est_env")
lco_ests = matrix(nrow = length(coef(est)), ncol = G)
colnames(lco_ests) = unique_clusters
core_env = update(est, only.coef = TRUE, only.env = TRUE)
w = weights(est)
if (identical(w, 1) | is.null(w)) w = rep(1, est$nobs)
for (i in seq_along(unique_clusters)) {
clust = unique_clusters[i]
assign("weights.value", w * (cl != clust), core_env)
# Updates the estimation environment dropping sample
lco_ests[,i] = fixest::est_env(core_env)
}
centered = lco_ests - rowMeans(lco_ests)
vcov_manual = (G - 1) / G * tcrossprod(centered)
toc()
#> Using est_env: 0.064 sec elapsed
sqrt(diag(vcov_manual))
#> [1] 12210.64
# Note: I think this is the same issue as before with producing different estimates than MASS::ginv
vcov_summclust$beta_jack["dist_km", ]
#> BE LU NL DE GB IE ES PT
#> -42363.10 -51098.09 -35305.88 -39604.42 -45821.12 -44839.27 -45251.81 -44883.50
#> FR AT IT DK FI SE GR
#> -41224.74 -45725.14 -44658.12 -46456.61 -46300.13 -46267.32 -45442.79
coef(feols(
Euros ~ dist_km | Destination + Origin,
data = trade[trade$Origin != "BE", ]
))
#> dist_km
#> -43576.77
lco_ests[1, ]
#> BE LU NL DE GB IE ES PT
#> -43576.77 -47062.71 -40466.98 -43388.22 -43484.18 -44381.25 -41175.06 -50354.14
#> FR AT IT DK FI SE GR
#> -43234.03 -46715.27 -47560.73 -45839.40 -51536.96 -50369.35 -48066.35
Created on 2023-11-10 with reprex v2.0.2
^^ faster