-
-
Save mathurinm/975c5a6fe8a3a5723f4d0ca8844d570c to your computer and use it in GitHub Desktop.
Solving L1-regularized problems with l-bfgs-b
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""l-bfgs-b L1-Logistic Regression solver""" | |
# Author: Vlad Niculae <vlad@vene.ro> | |
# Suggested by Mathieu Blondel | |
from __future__ import division, print_function | |
import numpy as np | |
from scipy.optimize import fmin_l_bfgs_b | |
from sklearn.base import BaseEstimator, ClassifierMixin | |
from sklearn.utils.extmath import safe_sparse_dot, log_logistic | |
from sklearn.utils.fixes import expit | |
def _l1_logistic_loss_grad(w_extended, X, y, alpha): | |
_, n_features = X.shape | |
w = w_extended[:n_features] - w_extended[n_features:] | |
yz = y * safe_sparse_dot(X, w) | |
# Logistic loss is the negative of the log of the logistic function. | |
out = -np.sum(log_logistic(yz)) | |
# out += .5 * alpha * np.dot(w, w) # L2 | |
out += alpha * w_extended.sum() # L1, w_extended is non-negative | |
z = expit(yz) | |
z0 = (z - 1) * y | |
grad = safe_sparse_dot(X.T, z0) | |
grad = np.concatenate([grad, -grad]) | |
# grad += alpha * w # L2 | |
grad += alpha # L1 | |
return out, grad | |
class LbfgsL1Logistic(BaseEstimator, ClassifierMixin): | |
def __init__(self, tol=1e-3, alpha=1.0): | |
"""Logistic Regression Lasso solved by L-BFGS-B | |
Solves the same objective as sklearn.linear_model.LogisticRegression | |
Parameters | |
---------- | |
alpha: float, default: 1.0 | |
The amount of regularization to use. | |
tol: float, default: 1e-3 | |
Convergence tolerance for L-BFGS-B. | |
""" | |
self.tol = tol | |
self.alpha = alpha | |
def fit(self, X, y): | |
n_samples, n_features = X.shape | |
coef0 = np.zeros(2 * n_features) | |
w, f, d = fmin_l_bfgs_b(_l1_logistic_loss_grad, x0=coef0, fprime=None, | |
pgtol=self.tol, | |
bounds=[(0, None)] * n_features * 2, | |
args=(X, y, self.alpha)) | |
self.coef_ = w[:n_features] - w[n_features:] | |
return self | |
def predict(self, X): | |
return np.sign(safe_sparse_dot(X, self.coef_)) | |
if __name__ == '__main__': | |
from scipy.spatial.distance import jaccard | |
from sklearn.linear_model import LogisticRegression | |
from time import time | |
# Generate data with known sparsity pattern | |
n_samples, n_features, n_relevant = 100, 80, 20 | |
X = np.random.randn(n_samples, n_features) | |
X /= np.linalg.norm(X, axis=1)[:, np.newaxis] | |
true_coef = np.zeros(n_features) | |
nonzero_idx = np.random.randint(n_features, size=n_relevant) | |
true_coef[nonzero_idx] = np.random.randn(n_relevant) | |
y = np.dot(X, true_coef) + np.random.randn(n_samples) * 0.01 | |
# classification, note: y must be {-1, +1} | |
y = np.sign(y) | |
C = 1.0 | |
# Run this solver | |
t0 = time() | |
lasso_1 = LbfgsL1Logistic(alpha=1. / C, tol=1e-8).fit(X, y) | |
t0 = time() - t0 | |
print("l-bfgs-b: time = {:.4f}s acc = {:.8f} ||w - w_true|| = {:.6f} " | |
"Jacc. sparsity = {:.2f}".format(t0, lasso_1.score(X, y), | |
np.linalg.norm(lasso_1.coef_ - true_coef), | |
jaccard(true_coef > 0, lasso_1.coef_ > 0))) | |
# run liblinear | |
t0 = time() | |
lasso_2 = LogisticRegression(penalty='l1', C=C, tol=1e-8, | |
fit_intercept=False).fit( X, y) | |
t0 = time() - t0 | |
print("liblinear: time = {:.4f}s acc = {:.8f} ||w - w_true|| = {:.6f} " | |
"Jacc. sparsity = {:.2f}".format(t0, lasso_2.score(X, y), | |
np.linalg.norm(lasso_2.coef_ - true_coef), | |
jaccard(true_coef > 0, lasso_2.coef_ > 0))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""l-bfgs-b Lasso solver""" | |
# Author: Vlad Niculae <vlad@vene.ro> | |
# Suggested by Mathieu Blondel | |
from __future__ import division, print_function | |
import numpy as np | |
from scipy.optimize import fmin_l_bfgs_b | |
from sklearn.base import BaseEstimator, RegressorMixin | |
from sklearn.utils.extmath import safe_sparse_dot | |
class LbfgsLasso(BaseEstimator, RegressorMixin): | |
def __init__(self, tol=1e-3, alpha=1.0): | |
"""Lasso solved by L-BFGS-B | |
Solves the same objective as sklearn.linear_model.Lasso | |
Parameters | |
---------- | |
alpha: float, default: 1.0 | |
The amount of regularization to use. | |
tol: float, default: 1e-3 | |
Convergence tolerance for L-BFGS-B. | |
""" | |
self.tol = tol | |
self.alpha = alpha | |
def fit(self, X, y): | |
n_samples, n_features = X.shape | |
def f(w_extended, *args): | |
# w_extended is [w_pos -w_neg] | |
# assert (w_extended >= 0).all() | |
# abs(w).sum() = w_extended.sum() | |
w = w_extended[:n_features] - w_extended[n_features:] | |
return (np.sum((safe_sparse_dot(X, w) - y) ** 2) + | |
(2 * self.alpha * n_samples) * w_extended.sum()) | |
def fprime(w_extended, *args): | |
# assert (w_extended >= 0).all() | |
# dloss_extended = [X' -X']' * ([X -X]' * [w_pos -w_neg] - y) | |
# = [X' -X']' * (X * w - y) | |
# = [dloss -dloss] | |
w = w_extended[:n_features] - w_extended[n_features:] | |
dloss = 2 * safe_sparse_dot(X.T, safe_sparse_dot(X, w) - y) | |
grad_extended = np.concatenate([dloss, -dloss]) | |
grad_extended += 2 * self.alpha * n_samples # to match skl lasso | |
return grad_extended | |
coef0 = np.zeros(2 * n_features) | |
w, f, d = fmin_l_bfgs_b(f, x0=coef0, fprime=fprime, pgtol=self.tol, | |
bounds=[(0, None)] * n_features * 2) | |
self.coef_ = w[:n_features] - w[n_features:] | |
return self | |
def predict(self, X): | |
return safe_sparse_dot(X, self.coef_) | |
if __name__ == '__main__': | |
from scipy.spatial.distance import jaccard | |
from sklearn.linear_model import Lasso | |
from time import time | |
# Generate data with known sparsity pattern | |
X = np.random.randn(100, 80) | |
X /= np.linalg.norm(X, axis=1)[:, np.newaxis] | |
true_coef = np.zeros(80) | |
nonzero_idx = np.random.randint(80, size=20) | |
true_coef[nonzero_idx] = np.random.randn(20) | |
y = np.dot(X, true_coef) + np.random.randn(100) * 0.01 | |
# Run this solver | |
t0 = time() | |
lasso_1 = LbfgsLasso(alpha=0.001, tol=1e-8).fit(X, y) | |
t0 = time() - t0 | |
print("LBFGSB: time = {:.4f}s R2 = {:.8f} ||w - w_true|| = {:.6f} " | |
"Jacc. sparsity = {:.2f}".format(t0, lasso_1.score(X, y), | |
np.linalg.norm(lasso_1.coef_ - true_coef), | |
jaccard(true_coef > 0, lasso_1.coef_ > 0))) | |
# Run coordinate descent Lasso from scikit-learn | |
t0 = time() | |
lasso_2 = Lasso(alpha=0.001, fit_intercept=False, tol=1e-8).fit(X, y) | |
t0 = time() - t0 | |
print("CD: time = {:.4f}s R2 = {:.8f} ||w - w_true|| = {:.6f} " | |
"Jacc. sparsity = {:.2f}".format(t0, lasso_2.score(X, y), | |
np.linalg.norm(lasso_2.coef_ - true_coef), | |
jaccard(true_coef > 0, lasso_2.coef_ > 0))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment