kylebutts/0.md

## 0.md

      
    Raw
  

              0.md
            
          
    Fast implementation of hatvalues.fixest. This calculates (or approximates) the diagonals of the projection matrix $X (X' X)^{-1} X'$.

  
## 1_1M_FEs.md

      
    Raw
  

              1_1M_FEs.md
            
          
    Comparing p = 500 and 1M fixed effects to Kline, Saggio, and Sølvsten
in their computational appendix. They took ~250 seconds to approximate
with parallelization. This takes 100 seconds without parallelization.
With just fixed effects and no covariates, 2.33 seconds.
library(data.table)
library(fixest)
library(Matrix)
library(MatrixExtra)
#> 'MatrixExtra' modifies important behaviors from 'Matrix'. See ?MatrixExtra-options.
library(tictoc)
#> 
#> Attaching package: 'tictoc'
#> The following object is masked from 'package:data.table':
#> 
#>     shift

# `hatvalues` for feols models -------------------------------------------------
hatvalues.fixest <- function(model, exact = TRUE, p = 1000) {
  
  X = did2s:::sparse_model_matrix(df, model)

  # If fixed effects only, flag
  fe_only = is.null(X$mat_RHS)
  X = cbind(X$mat_RHS, X$mat_FE)
  S_xx = crossprod(X)
  
  # If there are only fixed effects, then can do this *super fast* with FEs
  if (fe_only == TRUE) {
    # https://stackoverflow.com/questions/39533785/how-to-compute-diagx-solvea-tx-efficiently-without-taking-matrix-i
    U = Matrix::chol(S_xx)
    Z = solve(t(U), t(X))
    P_ii = Matrix::colSums(Z ^ 2)
    
    return(P_ii)
    # P_ii[1:10]
    # [1] 0.2000000 0.2000000 0.2000000 0.2000000 0.2000000 0.3333333 0.3333333 0.3333333 0.1428571 0.1428571
  } 
    
  # Note: This might fail if looking at too large of a matrix
  if (exact == TRUE) {
    Z = solve(S_xx, t(X))
    P_ii = diag(X %*% Z)

    return(P_ii)
    # P_ii[1:10]
    # [1] 0.3344063 0.3336252 0.3335790 0.3339978 0.3337960 0.3333515 0.3333510 0.3335557 0.3336988 0.2001362
    # mean(P_ii)
    # [1] 0.2731241
  }

  if (exact == FALSE) {
    
    n <- nrow(X)
    k <- ncol(X)

    # TODO: think about using fixest object to calculate \hat{\beta} = (X'X)^{-1} X' R_pi for i = 1, ..., p

    # Using speed heuristics based on n*p and trying
    # Solve for Z = (X'X)^{-1} (X' R_p)
    if (n*p <= 1000000) {
      # Fastest for small n*p (b/c of my foor loop)

      Rp <- matrix(rbinom(n * p, 1, 0.5) * 2 - 1, nrow = p, ncol = n)
      X_Rp <- tcrossprod(t(X), Rp)
      Z = solve(S_xx, X_Rp)
    
      # P_ii = 1/p || X_i' Z ||^2
      P_ii = colSums(crossprod(Z, t(X))^2) / p 

    } else if (n*p <= 10000000) {
      # Memory efficient but Slower

      X_Rp <- matrix(0, nrow = k, ncol = p)
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp[, j] <- tcrossprod(t(X), Rp_j)
      }
      Z = solve(S_xx, X_Rp)

      # P_ii = 1/p || X_i' Z ||^2
      # Needed for speed. Dense already, so no memory difference
      Z_dense = as.matrix(Z)

      P_ii = rep(0, nrow(X))
      for (j in 1:ncol(Z_dense)) {
        Zj = Z_dense[, j, drop = FALSE]
        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }

    } else {
      # Slightly slower, but very memory efficient
      
      P_ii = rep(0, nrow(X))
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp_j <- tcrossprod(t(X), Rp_j)
        Zj <- solve(S_xx, X_Rp_j)

        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }
    }

    return(P_ii)

    # Checking, when p = 1000000, n = 3665 (1000 FEs)
    # P_ii[1:10]
    # [1] 0.3338723 0.3330020 0.3329582 0.3340253 0.3338307 0.3333825 0.3327888 0.3329766 0.3331747 0.2001476
    # mean(P_ii)
    # [1] 0.2731081
  }
}

# Test -------------------------------------------------------------------------

N = 1000000
var_fes = 10
var_eps = 4

# Ensure P_ii exists
N_i = pmax(2, rpois(N, 3))

# Create dataset
df = data.table(id = rep(1:N, N_i))
df[, j := 1:.N, by = id]
df[, fe_i := rnorm(1, 0, sd = sqrt(var_fes)), by = id]
df[, x := rnorm(.N, mean = fe_i, sd = 1)]
df[, y := x * 1 + fe_i + rnorm(.N, 0, sqrt(var_eps))]

# FE only model ----------------------------------------------------------------
est = feols(y ~ 0 | id, data = df)

tic()
h = hatvalues(est)
toc()
#> 2.333 sec elapsed


# Covariates -------------------------------------------------------------------
est = feols(y ~ x | id, data = df)

tic()
h_approx_1k = hatvalues(est, exact = FALSE, p = 500)
toc()
#> 100.744 sec elapsed
^{Created on 2023-05-08 with reprex v2.0.2}

  
## 2_accuracy_of_approximation.md

      
    Raw
  

              2_accuracy_of_approximation.md
            
          
    This uses the JLA approximation as described in Kline, Saggio, and Sølvsten. The hatleverage
function has two parameters. exact which determines whether to solve for
$P_ii$ exactly or with an approximation. The parameter p is used to determine
the number of draws of the Rademacher weights will be used and larger values
increase the accuracy of estimation. Even for small values of p, the average $P_ii$
is very precise and there is very little deviations elsewhere.
One thing to note is that estimated $P_ii$ can be greater than 1. They propose an improved
estimator that fixes this, but I haven't implemented it yet.
library(data.table)
library(fixest)
library(Matrix)
library(MatrixExtra)
#> 'MatrixExtra' modifies important behaviors from 'Matrix'. See ?MatrixExtra-options.
library(tictoc)
#> 
#> Attaching package: 'tictoc'
#> The following object is masked from 'package:data.table':
#> 
#>     shift

# `hatvalues` for feols models -------------------------------------------------
hatvalues.fixest <- function(model, exact = TRUE, p = 1000) {
  
  X = did2s:::sparse_model_matrix(df, model)

  # If fixed effects only, flag
  fe_only = is.null(X$mat_RHS)
  X = cbind(X$mat_RHS, X$mat_FE)
  S_xx = crossprod(X)
  
  # If there are only fixed effects, then can do this *super fast* with FEs
  if (fe_only == TRUE) {
    # https://stackoverflow.com/questions/39533785/how-to-compute-diagx-solvea-tx-efficiently-without-taking-matrix-i
    U = Matrix::chol(S_xx)
    Z = solve(t(U), t(X))
    P_ii = Matrix::colSums(Z ^ 2)
    
    return(P_ii)
    # P_ii[1:10]
    # [1] 0.2000000 0.2000000 0.2000000 0.2000000 0.2000000 0.3333333 0.3333333 0.3333333 0.1428571 0.1428571
  } 
    
  # Note: This might fail if looking at too large of a matrix
  if (exact == TRUE) {
    Z = solve(S_xx, t(X))
    P_ii = diag(X %*% Z)

    return(P_ii)
    # P_ii[1:10]
    # [1] 0.3344063 0.3336252 0.3335790 0.3339978 0.3337960 0.3333515 0.3333510 0.3335557 0.3336988 0.2001362
    # mean(P_ii)
    # [1] 0.2731241
  }

  if (exact == FALSE) {
    
    n <- nrow(X)
    k <- ncol(X)

    # TODO: think about using fixest object to calculate \hat{\beta} = (X'X)^{-1} X' R_pi for i = 1, ..., p

    # Using speed heuristics based on n*p and trying
    # Solve for Z = (X'X)^{-1} (X' R_p)
    if (n*p <= 1000000) {
      # Fastest for small n*p (b/c of my foor loop)

      Rp <- matrix(rbinom(n * p, 1, 0.5) * 2 - 1, nrow = p, ncol = n)
      X_Rp <- tcrossprod(t(X), Rp)
      Z = solve(S_xx, X_Rp)
    
      # P_ii = 1/p || X_i' Z ||^2
      P_ii = colSums(crossprod(Z, t(X))^2) / p 

    } else if (n*p <= 10000000) {
      # Memory efficient but Slower

      X_Rp <- matrix(0, nrow = k, ncol = p)
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp[, j] <- tcrossprod(t(X), Rp_j)
      }
      Z = solve(S_xx, X_Rp)

      # P_ii = 1/p || X_i' Z ||^2
      # Needed for speed. Dense already, so no memory difference
      Z_dense = as.matrix(Z)

      P_ii = rep(0, nrow(X))
      for (j in 1:ncol(Z_dense)) {
        Zj = Z_dense[, j, drop = FALSE]
        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }

    } else {
      # Slightly slower, but very memory efficient
      
      P_ii = rep(0, nrow(X))
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp_j <- tcrossprod(t(X), Rp_j)
        Zj <- solve(S_xx, X_Rp_j)

        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }
    }

    return(P_ii)

    # Checking, when p = 1000000, n = 3665 (1000 FEs)
    # P_ii[1:10]
    # [1] 0.3338723 0.3330020 0.3329582 0.3340253 0.3338307 0.3333825 0.3327888 0.3329766 0.3331747 0.2001476
    # mean(P_ii)
    # [1] 0.2731081
  }
}

# Test -------------------------------------------------------------------------

N = 1000
var_fes = 10
var_eps = 4

# Ensure P_ii exists
N_i = pmax(2, rpois(N, 3))

# Create dataset
df = data.table(id = rep(1:N, N_i))
df[, j := 1:.N, by = id]
df[, fe_i := rnorm(1, 0, sd = sqrt(var_fes)), by = id]
df[, x := rnorm(.N, mean = fe_i, sd = 1)]
df[, y := x * 1 + fe_i + rnorm(.N, 0, sqrt(var_eps))]

# FE only model ----------------------------------------------------------------
est = feols(y ~ 0 | id, data = df)

tic()
h = hatvalues(est)
toc()
#> 0.048 sec elapsed


# Covariates -------------------------------------------------------------------
est = feols(y ~ x | id, data = df)

tic()
h_exact = hatvalues(est)
toc()
#> 0.532 sec elapsed
tic()
h_approx_1k = hatvalues(est, exact = FALSE, p = 1000)
toc()
#> 0.974 sec elapsed
tic()
h_approx_10k = hatvalues(est, exact = FALSE, p = 10000)
toc()
#> 9.101 sec elapsed

# How good is the approximation?
diff_1k = h_exact - h_approx_1k
mean(diff_1k)
#> [1] 0.0001043958
sd(diff_1k)
#> [1] 0.01128287

diff_10k = h_exact - h_approx_10k
mean(diff_10k)
#> [1] -0.0002276724
sd(diff_10k)
#> [1] 0.003602415
^{Created on 2023-05-08 with reprex v2.0.2}

  
## 3_100k_FEs.md

      
    Raw
  

              3_100k_FEs.md
            
          
    library(data.table)
library(fixest)
library(Matrix)
library(MatrixExtra)
#> 'MatrixExtra' modifies important behaviors from 'Matrix'. See ?MatrixExtra-options.
library(tictoc)
#> 
#> Attaching package: 'tictoc'
#> The following object is masked from 'package:data.table':
#> 
#>     shift

# `hatvalues` for feols models -------------------------------------------------
hatvalues.fixest <- function(model, exact = TRUE, p = 1000) {
  
  X = did2s:::sparse_model_matrix(df, model)

  # If fixed effects only, flag
  fe_only = is.null(X$mat_RHS)
  X = cbind(X$mat_RHS, X$mat_FE)
  S_xx = crossprod(X)
  
  # If there are only fixed effects, then can do this *super fast* with FEs
  if (fe_only == TRUE) {
    # https://stackoverflow.com/questions/39533785/how-to-compute-diagx-solvea-tx-efficiently-without-taking-matrix-i
    U = Matrix::chol(S_xx)
    Z = solve(t(U), t(X))
    P_ii = Matrix::colSums(Z ^ 2)
    
    return(P_ii)
    # P_ii[1:10]
    # [1] 0.2000000 0.2000000 0.2000000 0.2000000 0.2000000 0.3333333 0.3333333 0.3333333 0.1428571 0.1428571
  } 
    
  # Note: This might fail if looking at too large of a matrix
  if (exact == TRUE) {
    Z = solve(S_xx, t(X))
    P_ii = diag(X %*% Z)

    return(P_ii)
    # P_ii[1:10]
    # [1] 0.3344063 0.3336252 0.3335790 0.3339978 0.3337960 0.3333515 0.3333510 0.3335557 0.3336988 0.2001362
    # mean(P_ii)
    # [1] 0.2731241
  }

  if (exact == FALSE) {
    
    n <- nrow(X)
    k <- ncol(X)

    # TODO: think about using fixest object to calculate \hat{\beta} = (X'X)^{-1} X' R_pi for i = 1, ..., p

    # Using speed heuristics based on n*p and trying
    # Solve for Z = (X'X)^{-1} (X' R_p)
    if (n*p <= 1000000) {
      # Fastest for small n*p (b/c of my foor loop)

      Rp <- matrix(rbinom(n * p, 1, 0.5) * 2 - 1, nrow = p, ncol = n)
      X_Rp <- tcrossprod(t(X), Rp)
      Z = solve(S_xx, X_Rp)
    
      # P_ii = 1/p || X_i' Z ||^2
      P_ii = colSums(crossprod(Z, t(X))^2) / p 

    } else if (n*p <= 10000000) {
      # Memory efficient but Slower

      X_Rp <- matrix(0, nrow = k, ncol = p)
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp[, j] <- tcrossprod(t(X), Rp_j)
      }
      Z = solve(S_xx, X_Rp)

      # P_ii = 1/p || X_i' Z ||^2
      # Needed for speed. Dense already, so no memory difference
      Z_dense = as.matrix(Z)

      P_ii = rep(0, nrow(X))
      for (j in 1:ncol(Z_dense)) {
        Zj = Z_dense[, j, drop = FALSE]
        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }

    } else {
      # Slightly slower, but very memory efficient
      
      P_ii = rep(0, nrow(X))
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp_j <- tcrossprod(t(X), Rp_j)
        Zj <- solve(S_xx, X_Rp_j)

        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }
    }

    return(P_ii)

    # Checking, when p = 1000000, n = 3665 (1000 FEs)
    # P_ii[1:10]
    # [1] 0.3338723 0.3330020 0.3329582 0.3340253 0.3338307 0.3333825 0.3327888 0.3329766 0.3331747 0.2001476
    # mean(P_ii)
    # [1] 0.2731081
  }
}

# Test -------------------------------------------------------------------------

N = 100000
var_fes = 10
var_eps = 4

# Ensure P_ii exists
N_i = pmax(2, rpois(N, 3))

# Create dataset
df = data.table(id = rep(1:N, N_i))
df[, j := 1:.N, by = id]
df[, fe_i := rnorm(1, 0, sd = sqrt(var_fes)), by = id]
df[, x := rnorm(.N, mean = fe_i, sd = 1)]
df[, y := x * 1 + fe_i + rnorm(.N, 0, sqrt(var_eps))]

# FE only model ----------------------------------------------------------------
est = feols(y ~ 0 | id, data = df)

tic()
h = hatvalues(est)
toc()
#> 0.46 sec elapsed


# Covariates -------------------------------------------------------------------
est = feols(y ~ x | id, data = df)

tic()
h_approx_1k = hatvalues(est, exact = FALSE, p = 1000)
toc()
#> 22.377 sec elapsed
^{Created on 2023-05-08 with reprex v2.0.2}

  
## hatvalues.R
# `hatvalues` for feols models
hatvalues.fixest <- function(model, exact = TRUE, p = 1000) {

  X = did2s:::sparse_model_matrix(df, model)

  # If fixed effects only, flag
  fe_only = is.null(X$mat_RHS)
  X = cbind(X$mat_RHS, X$mat_FE)
  S_xx = crossprod(X)

  # If there are only fixed effects, then can do this *super fast* with FEs
  if (fe_only == TRUE) {
    # https://stackoverflow.com/questions/39533785/how-to-compute-diagx-solvea-tx-efficiently-without-taking-matrix-i
    U = Matrix::chol(S_xx)
    Z = solve(t(U), t(X))
    P_ii = Matrix::colSums(Z ^ 2)

    return(P_ii)
    # P_ii[1:10]
    # [1] 0.2000000 0.2000000 0.2000000 0.2000000 0.2000000 0.3333333 0.3333333 0.3333333 0.1428571 0.1428571
  }

  # Note: This might fail if looking at too large of a matrix
  if (exact == TRUE) {
    Z = solve(S_xx, t(X))
    P_ii = diag(X %*% Z)

    return(P_ii)
  }

  if (exact == FALSE) {

    n <- nrow(X)
    k <- ncol(X)

    # TODO: think about using fixest object to calculate \hat{\beta} = (X'X)^{-1} X' R_pi for i = 1, ..., p

    # Using speed heuristics based on n*p and trying
    # Solve for Z = (X'X)^{-1} (X' R_p)
    if (n*p <= 1000000) {
      # Fastest for small n*p (b/c of my foor loop)

      Rp <- matrix(rbinom(n * p, 1, 0.5) * 2 - 1, nrow = p, ncol = n)
      X_Rp <- tcrossprod(t(X), Rp)
      Z = solve(S_xx, X_Rp)

      # P_ii = 1/p || X_i' Z ||^2
      P_ii = colSums(crossprod(Z, t(X))^2) / p

    } else if (n*p <= 10000000) {
      # Memory efficient but Slower

      X_Rp <- matrix(0, nrow = k, ncol = p)
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp[, j] <- tcrossprod(t(X), Rp_j)
      }
      Z = solve(S_xx, X_Rp)

      # P_ii = 1/p || X_i' Z ||^2
      # Needed for speed. Dense already, so no memory difference
      Z_dense = as.matrix(Z)

      P_ii = rep(0, nrow(X))
      for (j in 1:ncol(Z_dense)) {
        Zj = Z_dense[, j, drop = FALSE]
        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }

    } else {
      # Slightly slower, but very memory efficient

      P_ii = rep(0, nrow(X))
      for (j in 1:p) {
        Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
        X_Rp_j <- tcrossprod(t(X), Rp_j)
        Zj <- solve(S_xx, X_Rp_j)

        P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
      }
    }

    return(P_ii)
  }
}
	# `hatvalues` for feols models
	hatvalues.fixest <- function(model, exact = TRUE, p = 1000) {

	X = did2s:::sparse_model_matrix(df, model)

	# If fixed effects only, flag
	fe_only = is.null(X$mat_RHS)
	X = cbind(X$mat_RHS, X$mat_FE)
	S_xx = crossprod(X)

	# If there are only fixed effects, then can do this super fast with FEs
	if (fe_only == TRUE) {
	# https://stackoverflow.com/questions/39533785/how-to-compute-diagx-solvea-tx-efficiently-without-taking-matrix-i
	U = Matrix::chol(S_xx)
	Z = solve(t(U), t(X))
	P_ii = Matrix::colSums(Z ^ 2)

	return(P_ii)
	# P_ii[1:10]
	# [1] 0.2000000 0.2000000 0.2000000 0.2000000 0.2000000 0.3333333 0.3333333 0.3333333 0.1428571 0.1428571
	}

	# Note: This might fail if looking at too large of a matrix
	if (exact == TRUE) {
	Z = solve(S_xx, t(X))
	P_ii = diag(X %*% Z)

	return(P_ii)
	}

	if (exact == FALSE) {

	n <- nrow(X)
	k <- ncol(X)

	# TODO: think about using fixest object to calculate \hat{\beta} = (X'X)^{-1} X' R_pi for i = 1, ..., p

	# Using speed heuristics based on n*p and trying
	# Solve for Z = (X'X)^{-1} (X' R_p)
	if (n*p <= 1000000) {
	# Fastest for small n*p (b/c of my foor loop)

	Rp <- matrix(rbinom(n * p, 1, 0.5) * 2 - 1, nrow = p, ncol = n)
	X_Rp <- tcrossprod(t(X), Rp)
	Z = solve(S_xx, X_Rp)

	# P_ii = 1/p \|\| X_i' Z \|\|^2
	P_ii = colSums(crossprod(Z, t(X))^2) / p

	} else if (n*p <= 10000000) {
	# Memory efficient but Slower

	X_Rp <- matrix(0, nrow = k, ncol = p)
	for (j in 1:p) {
	Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
	X_Rp[, j] <- tcrossprod(t(X), Rp_j)
	}
	Z = solve(S_xx, X_Rp)

	# P_ii = 1/p \|\| X_i' Z \|\|^2
	# Needed for speed. Dense already, so no memory difference
	Z_dense = as.matrix(Z)

	P_ii = rep(0, nrow(X))
	for (j in 1:ncol(Z_dense)) {
	Zj = Z_dense[, j, drop = FALSE]
	P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
	}

	} else {
	# Slightly slower, but very memory efficient

	P_ii = rep(0, nrow(X))
	for (j in 1:p) {
	Rp_j <- matrix(rbinom(n, 1, 0.5) * 2 - 1, nrow = 1, ncol = n)
	X_Rp_j <- tcrossprod(t(X), Rp_j)
	Zj <- solve(S_xx, X_Rp_j)

	P_ii = P_ii + as.numeric(X %*% Zj)^2 / p
	}
	}

	return(P_ii)
	}
	}