Skip to content

Instantly share code, notes, and snippets.

@kylebutts
Created November 10, 2023 16:23
Show Gist options
  • Save kylebutts/3853092f4b72bfae17a406b8ea300277 to your computer and use it in GitHub Desktop.
Save kylebutts/3853092f4b72bfae17a406b8ea300277 to your computer and use it in GitHub Desktop.
CR3 and CR3J vcov using `fixest::est_env()`
library(fixest)
library(dqrng)
library(tictoc) # For timing
data(trade, package = "fixest")

tic("Initial feols")
est = feols(Euros ~ dist_km | Destination + Origin, data = trade)
toc()
#> Initial feols: 0.023 sec elapsed


cl = trade$Origin
unique_clusters = unique(cl)
G = length(unique_clusters)

cli::cli_h3("\nLeave-cluster-out estimates")
#> 
#> ── Leave-cluster-out estimates
# Leave-cluster-out estimates
tic("Estimate using feols")

  for (clust in unique_clusters) {
    subset = trade[cl != clust, ]
    lo_est = feols(
      Euros ~ dist_km | Destination + Origin, 
      data = subset, only.coef = TRUE
    )
  }

time_feols = toc()
#> Estimate using feols: 0.133 sec elapsed

tic("Estimate using est_env")

  for (clust in unique(cl)) {
    # Need to get env object from est each time because est_env will update the env object.
    core_env = update(est, only.coef = TRUE, only.env = TRUE)
    include = (cl != clust)
    
    if (identical(core_env$weights.value, 1)) {
      w = as.numeric(include)
    } else {
      w = core_env$weights.value * include
    }

    # Updates the estimation environment dropping sample
    lco_est = fixest::est_env(core_env, weights = w)
  }

time_est_env = toc()
#> Estimate using est_env: 0.113 sec elapsed


cli::cli_h3("VCOV CR3")
#> 
#> ── VCOV CR3
tic("Using summclust")

  vcov_summclust = summclust::vcov_CR3J(
    est, cluster = "Origin", absorb_cluster_fixef = FALSE, return_all = TRUE, type = "CRV3J"
  )

toc()
#> Using summclust: 0.127 sec elapsed
sqrt(diag(vcov_summclust$vcov))["dist_km"]
#>  dist_km 
#> 13039.27

# Using est_env
tic("Using est_env")

  lco_ests = matrix(nrow = length(coef(est)), ncol = G)
  colnames(lco_ests) = unique_clusters

  core_env = update(est, only.coef = TRUE, only.env = TRUE)
  w = weights(est)
  if (identical(w, 1) | is.null(w)) w = rep(1, est$nobs)
  for (i in seq_along(unique_clusters)) {
    clust = unique_clusters[i]
    
    assign("weights.value", w * (cl != clust), core_env)

    # Updates the estimation environment dropping sample
    lco_ests[,i] = fixest::est_env(core_env)
  }
  centered = lco_ests - rowMeans(lco_ests)
  vcov_manual = (G - 1) / G * tcrossprod(centered)

toc()
#> Using est_env: 0.064 sec elapsed
sqrt(diag(vcov_manual))
#> [1] 12210.64


# Note: I think this is the same issue as before with producing different estimates than MASS::ginv
vcov_summclust$beta_jack["dist_km", ]
#>        BE        LU        NL        DE        GB        IE        ES        PT 
#> -42363.10 -51098.09 -35305.88 -39604.42 -45821.12 -44839.27 -45251.81 -44883.50 
#>        FR        AT        IT        DK        FI        SE        GR 
#> -41224.74 -45725.14 -44658.12 -46456.61 -46300.13 -46267.32 -45442.79
coef(feols(
  Euros ~ dist_km | Destination + Origin, 
  data = trade[trade$Origin != "BE", ]
))
#>   dist_km 
#> -43576.77
lco_ests[1, ]
#>        BE        LU        NL        DE        GB        IE        ES        PT 
#> -43576.77 -47062.71 -40466.98 -43388.22 -43484.18 -44381.25 -41175.06 -50354.14 
#>        FR        AT        IT        DK        FI        SE        GR 
#> -43234.03 -46715.27 -47560.73 -45839.40 -51536.96 -50369.35 -48066.35

Created on 2023-11-10 with reprex v2.0.2

@kylebutts
Copy link
Author

lco_v2 <- function(est, cl) {
  unique_clusters = unique(cl)

  core_env = update(est, only.coef = TRUE, only.env = TRUE)
  w0 <- core_env$weights.value
  if (length(w0) == 1) w0 = rep(1, length(core_env$lhs))
  
  lco_ests <- lapply(unique_clusters, function(clust) {
    w <- w0
    w[cl == clust] <- 0

    # Updates the estimation environment dropping sample
    fixest::est_env(core_env, weights = w)
  })

  ests = do.call(rbind, lco_ests)
  rownames(ests) <- paste0("leave-out: ", unique_clusters)
  return(ests)
}

bench::mark(
  lco_v2(est, cl), 
  check = FALSE
)

^^ faster

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment