Skip to content

Instantly share code, notes, and snippets.

@erikcs
Last active December 23, 2016 00:02
Show Gist options
  • Save erikcs/2a56ccbaf279bd0c6447901d6c481dc8 to your computer and use it in GitHub Desktop.
Save erikcs/2a56ccbaf279bd0c6447901d6c481dc8 to your computer and use it in GitHub Desktop.
Toy coordinate descent for lasso regression
# Toy coordinate descent for Lasso regression
import numpy as np
import warnings
def _normalise(X, y):
Xbar = X.mean(axis=0)
Xstd = X.std(axis=0)
ybar = y.mean(axis=0)
ystd = y.std()
X_norm = (X - Xbar) / Xstd
y_cent = y - ybar
return X_norm, y_cent, Xstd, Xbar, ybar, ystd
def _soft_threshold(x, value):
return np.sign(x) * np.maximum(np.abs(x) - value, 0)
def coordinate_descent(X, y, alpha=0, alpha_weights=None, tol=1e-5, maxiter=1000):
""" Runs coordinate descent on the objective
1\2n||y - w0 - Xw||^2_2 + P(alpha, x, w)
where
P = alpha sum_{i=1}^{p} |w_i| * alpha_weights[i]
i.e. Lasso where each l1 penalty coordinate is weighted according to
`alpha_weights`
Fitting is done on normalized data, but coefficients are returned
on original scale.
Parameters
----------
X : ndarray, shape (n_observations, n_features)
y : ndarray, shape (n_observations,)
alpha_weights : ndarray, shape (n_features,)
Returns
-------
w_rescaled: array, shape (n_features, )
Estimated coefficients on original scale
intercept : int
Estimated (unpenalized) intercept on original scale
w : array, shape (n_features)
Estimated coefficients, normalized
Reference:
Hastie, T., Tibshirani, R., & Wainwright, M. (2015). Statistical learning
with sparsity: the lasso and generalizations. CRC Press.
"""
n, p = X.shape
w = np.zeros(p)
wp = w.copy()
X, y, Xstd, Xbar, ybar, ystd = _normalise(X, y)
if alpha_weights is None:
alpha_weights = np.ones(p)
# Normalize alpha_weights to sum to nvar...?
alpha_weights *= 1.0 * p / alpha_weights.sum()
r = y - (X * w).sum(axis=1)
for n_iter in range(maxiter):
for i in range(p):
x = X[:, i]
w_ols = w[i] + np.dot(x, r) / n
w[i] = _soft_threshold(w_ols, alpha * alpha_weights[i])
# Update residuals
r += wp[i] * X[:, i]
r -= w[i] * X[:, i]
converged = np.linalg.norm(wp - w) < tol
if converged:
break
wp = w.copy()
if not converged:
warnings.warn("Coordinate descent did not converge. You might want "
"to increase `maxiter` or decrease `tol`")
w_rescaled = w / Xstd
intercept = ybar - np.sum(Xbar * w_rescaled)
return w_rescaled, intercept, w
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment