# %%
library(wooldridge)
library(dqrng)
library(collapse)
#> collapse 2.0.10, see ?`collapse-package` or ?`collapse-documentation`
#>
#> Attaching package: 'collapse'
#> The following object is masked from 'package:stats':
#>
#> D
library(tidyverse)
library(tinytable)
data(alcohol, package = "wooldridge")
nrow(alcohol)
#> [1] 9822
# %%
f = famsize ~ age + agesq + educ + educsq + factor(employ)
X = model.matrix(f, data = alcohol)
y = as.matrix(eval(f[[2]], envir = alcohol))
# %%
ols = function(X, y) {
beta = solve(crossprod(X), crossprod(X, y))
as.data.frame(t(beta))
}
(beta = ols(X, y))
#> (Intercept) age agesq educ educsq factor(employ)1
#> 1 -1.882748 0.2758294 -0.003414645 -0.06979463 0.0004057171 0.255854
Random projection exmample:
# %%
ols_random <- function(X, y, m, draw_func = dqrng::dqrnorm) {
N = nrow(X)
Rp = matrix(draw_func(n = m * N), nrow = m, ncol = N)
# Much lower dimension:
# m x K matrix
Xp = Rp %*% X
# m x 1 vector
yp = Rp %*% y
beta = solve(crossprod(Xp), crossprod(Xp, yp))
as.data.frame(t(beta))
}
ols_random(X, y, m = 100)
#> (Intercept) age agesq educ educsq factor(employ)1
#> 1 -1.592284 0.3789481 -0.004851475 -0.3490212 0.01034057 0.2321395
Averaging over 1000 random projection simulations
# %%
set.seed(20240415)
reps = purrr::map_dfr(1:1000, function(i) {
ols_random(X, y, m = 100)
})
results = bind_rows(beta, colMeans(reps)) |>
mutate(estimator = c("Original", "Random Projections"), .before = 1)
tt(results)
estimator | (Intercept) | age | agesq | educ | educsq | factor(employ)1 |
---|---|---|---|---|---|---|
Original | -1.882748 | 0.2758294 | -0.003414645 | -0.06979463 | 0.0004057171 | 0.2558540 |
Random Projections | -1.941984 | 0.2809203 | -0.003486083 | -0.07316458 | 0.0005421148 | 0.2497102 |
Created on 2024-04-15 with reprex v2.1.0