Skip to content

Instantly share code, notes, and snippets.

@kylebutts
Created April 15, 2024 16:36
Show Gist options
  • Save kylebutts/bd3c7adbd290a99b3777a31e9a2d8c81 to your computer and use it in GitHub Desktop.
Save kylebutts/bd3c7adbd290a99b3777a31e9a2d8c81 to your computer and use it in GitHub Desktop.
Example random projections for regression
# %% 
library(wooldridge)
library(dqrng)
library(collapse)
#> collapse 2.0.10, see ?`collapse-package` or ?`collapse-documentation`
#> 
#> Attaching package: 'collapse'
#> The following object is masked from 'package:stats':
#> 
#>     D
library(tidyverse)
library(tinytable)

data(alcohol, package = "wooldridge")
nrow(alcohol)
#> [1] 9822

# %% 
f = famsize ~ age + agesq + educ + educsq + factor(employ)
X = model.matrix(f, data = alcohol)
y = as.matrix(eval(f[[2]], envir = alcohol))

# %% 
ols = function(X, y) {
  beta = solve(crossprod(X), crossprod(X, y))
  as.data.frame(t(beta))
}
(beta = ols(X, y))
#>   (Intercept)       age        agesq        educ       educsq factor(employ)1
#> 1   -1.882748 0.2758294 -0.003414645 -0.06979463 0.0004057171        0.255854

Random projection exmample:

# %% 
ols_random <- function(X, y, m, draw_func = dqrng::dqrnorm) {
  N = nrow(X)
  Rp = matrix(draw_func(n = m * N), nrow = m, ncol = N)
  
  # Much lower dimension: 
  # m x K matrix
  Xp = Rp %*% X
  # m x 1 vector
  yp = Rp %*% y

  beta = solve(crossprod(Xp), crossprod(Xp, yp))
  as.data.frame(t(beta))
}
ols_random(X, y, m = 100) 
#>   (Intercept)       age        agesq       educ     educsq factor(employ)1
#> 1   -1.592284 0.3789481 -0.004851475 -0.3490212 0.01034057       0.2321395

Averaging over 1000 random projection simulations

# %% 
set.seed(20240415)
reps = purrr::map_dfr(1:1000, function(i) {
  ols_random(X, y, m = 100)
})

results = bind_rows(beta, colMeans(reps)) |>
  mutate(estimator = c("Original", "Random Projections"), .before = 1)

tt(results)
estimator (Intercept) age agesq educ educsq factor(employ)1
Original -1.882748 0.2758294 -0.003414645 -0.06979463 0.0004057171 0.2558540
Random Projections -1.941984 0.2809203 -0.003486083 -0.07316458 0.0005421148 0.2497102

Created on 2024-04-15 with reprex v2.1.0

# %%
library(wooldridge)
library(dqrng)
library(collapse)
library(tidyverse)
library(tinytable)
data(alcohol, package = "wooldridge")
nrow(alcohol)
# %%
f = famsize ~ age + agesq + educ + educsq + factor(employ)
X = model.matrix(f, data = alcohol)
y = as.matrix(eval(f[[2]], envir = alcohol))
# %%
ols = function(X, y) {
beta = solve(crossprod(X), crossprod(X, y))
as.data.frame(t(beta))
}
(beta = ols(X, y))
#' Random projection exmample:
# %%
ols_random <- function(X, y, m, draw_func = dqrng::dqrnorm) {
N = nrow(X)
Rp = matrix(draw_func(n = m * N), nrow = m, ncol = N)
# Much lower dimension:
# m x K matrix
Xp = Rp %*% X
# m x 1 vector
yp = Rp %*% y
beta = solve(crossprod(Xp), crossprod(Xp, yp))
as.data.frame(t(beta))
}
ols_random(X, y, m = 100)
#' Averaging over 1000 random projection simulations
# %%
set.seed(20240415)
reps = purrr::map_dfr(1:1000, function(i) {
ols_random(X, y, m = 100)
})
results = bind_rows(beta, colMeans(reps)) |>
mutate(estimator = c("Original", "Random Projections"), .before = 1)
tt(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment