Skip to content

Instantly share code, notes, and snippets.

@johnmyleswhite
Created March 26, 2013 19:06
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnmyleswhite/5248212 to your computer and use it in GitHub Desktop.
Save johnmyleswhite/5248212 to your computer and use it in GitHub Desktop.
The Joys of Sparsity: Forward Stagewise Regression
# Generate (x, y) data with a sparse set of active predictors
# prob controls the frequency of predictors having zero effect
function simulate_date(n::Integer, p::Integer, prob::Real)
x = randn(n, p)
beta = randn(p)
for j in 1:p
if rand() < prob
beta[j] = 0.0
end
end
y = x * beta
for i in 1:n
y[i] = y[i] + 0.01 * randn()
end
return x, y, beta
end
# Estimate sparse model coefficients
function forward_stagewise(x::Matrix, y::Vector, max_itr::Integer, tolerance::Real)
# Maintain parameter space
n, p = size(x)
# Maintain a vector of residuals
r = y
# Mainain a vector of inferred coefficients
beta = zeros(p)
# Select a "step-size"
# This determines digits of precision
epsilon = 0.01
# Monitor convergence
old_norm, new_norm = Inf, Inf
converged = false
# Count iterations
itr = 0
# Iterate until convergence
while !converged && itr < max_itr
itr += 1
# Find the predictor maximally correlated with residuals
max_correlation = 0.0
best_predictor = 0
for j in 1:p
current_correlation = abs(cor(r, x[:, j]))
if current_correlation > max_correlation
best_predictor = j
max_correlation = current_correlation
end
end
# Determine the coefficient update
delta = epsilon * sign(dot(x[:, best_predictor], r))
beta[best_predictor] = beta[best_predictor] + delta
# Update residuals
for i in 1:n
r[i] = r[i] - delta * x[i, best_predictor]
end
# Assess convergence
old_norm = new_norm
new_norm = norm(r)
# This can cycle
if abs(old_norm - new_norm) < tolerance || old_norm < new_norm
converged = true
end
# Show trace information
if mod(itr, 100) == 0
@printf "Iteration: %d\n" itr
@printf "Previous Residual Norm: %0.4f\n" old_norm
@printf "Current Residual Norm: %0.4f\n" new_norm
end
end
# Return estimate coefficients
return beta
end
srand(2)
x, y, beta = simulate_date(1_000, 100, 0.75)
beta_hat = forward_stagewise(x, y, 10_000, 1e-4)
beta_ols = inv(x'x) * x' * y
@printf "beta: %s,%s\n" beta[1:4] beta[(end - 3):end]
@printf "beta ~LASSO: %s,%s\n" beta_hat[1:4] beta_hat[(end - 3):end]
@printf "beta OLS: %s\n %s\n" beta_ols[1:4] beta_ols[(end - 3):end]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment