-
-
Save ibayer/3188143 to your computer and use it in GitHub Desktop.
strong rules lasso and enet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Generalized linear models via coordinate descent | |
Author: Fabian Pedregosa <fabian@fseoane.net> | |
""" | |
import numpy as np | |
from scipy import linalg | |
MAX_ITER = 100 | |
# cdef double l1_reg = alpha * rho * n_samples | |
# cdef double l2_reg = alpha * (1.0 - rho) * n_samples | |
def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER): | |
n_samples = X.shape[0] | |
l1_reg = alpha * rho * n_samples | |
l2_reg = alpha * (1.0 - rho) * n_samples | |
if warm_start is not None: | |
beta = warm_start | |
else: | |
beta = np.zeros(X.shape[1], dtype=np.float) | |
alpha = alpha * X.shape[0] | |
for _ in range(max_iter): | |
for i in range(X.shape[1]): | |
bb = beta.copy() | |
bb[i] = 0. | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \ | |
/ (np.dot(X[:, i], X[:, i]) + l2_reg) | |
return beta | |
def shrinkage(X, y, l1_reg, l2_reg, beta, ever_active_set, max_iter): | |
bb = beta.copy() | |
for _ in range(max_iter): | |
for i in ever_active_set: | |
bb[i] = 0 | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \ | |
/ (np.dot(X[:, i], X[:, i]) + l2_reg) | |
return bb | |
def enet_path(X, y, alphas, rho, max_iter=MAX_ITER, verbose=False): | |
""" | |
The strategy is described in "Strong rules for discarding predictors in lasso-type problems" | |
alphas must be a decreasing sequence of regularization parameters | |
WARNING: does not compute intercept | |
""" | |
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float) | |
alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered | |
n_samples, n_features = X.shape | |
alphas_scaled = np.array(alphas) * X.shape[0] | |
ever_active_set = np.arange(X.shape[1]).tolist() | |
for k, a in enumerate(alphas_scaled): | |
if k > 0: | |
# .. Strong rules for discarding predictors in lasso-type .. | |
tmp = np.dot(X.T, y - np.dot(X, beta[k - 1])) | |
tmp = np.abs(tmp) | |
strong_set = tmp >= rho * (2 * alphas_scaled[k] - alphas_scaled[k - 1]) | |
strong_set = np.where(strong_set)[0].tolist() | |
else: | |
# strong_set = np.arange(X.shape[1]) | |
# .. Basic Strong rules for discarding predictors in lasso-type .. | |
tmp = np.abs(np.dot(X.T, y)) | |
alpha_max = np.max(np.abs(np.dot(X.T, y))) | |
strong_set = tmp >= rho * (2 * alphas_scaled[k] - alpha_max) | |
strong_set = np.where(strong_set)[0].tolist() | |
ever_active_set = strong_set | |
if verbose: | |
print 'Strong set ', strong_set | |
print 'Ever active set ', ever_active_set | |
l1_reg = a * rho | |
l2_reg = a * (1.0 - rho) | |
# solve for the current active set | |
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], ever_active_set, max_iter) | |
# check KKT in the strong active set | |
kkt_violations = False | |
for i in strong_set: | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
s = np.sign(beta[k, i]) | |
if beta[k, i] != 0 and not np.allclose(tmp, s * l1_reg + l2_reg * beta[k, i], rtol=1e-2): | |
import ipdb; ipdb.set_trace() | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if not kkt_violations: | |
# passed KKT for all variables in strong set, we're done | |
ever_active_set = np.where(beta[k] != 0)[0].tolist() | |
if verbose: | |
print 'no KKT violated on strong set' | |
# .. recompute with new active set .. | |
else: | |
if verbose: | |
print 'KKT violated on strong set' | |
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], ever_active_set, max_iter) | |
# .. check KKT on all predictors .. | |
kkt_violations = False | |
for i in range(X.shape[1]): | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
s = np.sign(beta[k, i]) | |
if beta[k, i] != 0 and not np.allclose(tmp, s * l1_reg + l2_reg * beta[k, i], rtol=1e-2): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if not kkt_violations: | |
# passed KKT for all variables, we're done | |
ever_active_set = np.where(beta[k] != 0)[0].tolist() | |
print 'no KKT violated on full set of features' | |
else: | |
if verbose: | |
print 'KKT violated on full set of features' | |
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], ever_active_set, max_iter) | |
return beta | |
def check_kkt_enet(Xr, coef, l1_reg, l2_reg, tol=1e-3): | |
""" | |
Check KKT conditions for Enet | |
Xr : X'(y - X coef) | |
""" | |
nonzero = (coef != 0) | |
l2_penalty = l2_reg * (coef) | |
return np.all(np.abs(Xr[nonzero] - \ | |
(np.sign(coef[nonzero]) * l1_reg + l2_penalty[nonzero])) < tol) \ | |
and np.all(np.abs(Xr[~nonzero] / (l1_reg + l2_penalty[~nonzero])) <= 1) | |
if __name__ == '__main__': | |
np.random.seed(0) | |
from sklearn import datasets | |
diabetes = datasets.load_diabetes() | |
X = diabetes.data | |
y = diabetes.target | |
# X = np.random.randn(20, 100) | |
# y = np.random.randn(20) | |
# X = (X / np.sqrt((X ** 2).sum(0))) | |
#print l1_coordinate_descent(X, y, .001) | |
rho = 0.8 | |
alphas = np.linspace(.1, 1., 10)[::-1] | |
coef = enet_path(X, y, alphas, rho=rho, verbose=True) | |
n_samples = X.shape[0] | |
for i in range(len(alphas)): | |
Xr = np.dot(X.T, y - np.dot(X, coef[i])) | |
l1_reg = alphas[i] * rho * n_samples | |
l2_reg = alphas[i] * (1.0 - rho) * n_samples | |
assert check_kkt_enet(Xr, coef[i], l1_reg, l2_reg) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Generalized linear models via coordinate descent | |
Author: Fabian Pedregosa <fabian@fseoane.net> | |
""" | |
import numpy as np | |
from scipy import linalg | |
MAX_ITER = 100 | |
def l1_coordinate_descent(X, y, alpha, warm_start=None, max_iter=MAX_ITER): | |
if warm_start is not None: | |
beta = warm_start | |
else: | |
beta = np.zeros(X.shape[1], dtype=np.float) | |
alpha = alpha * X.shape[0] | |
for _ in range(max_iter): | |
for i in range(X.shape[1]): | |
bb = beta.copy() | |
bb[i] = 0. | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - alpha, 0) \ | |
/ np.dot(X[:, i], X[:, i]) | |
return beta | |
def shrinkage(X, y, alpha, beta, ever_active_set, max_iter): | |
bb = beta.copy() | |
for _ in range(max_iter): | |
for i in ever_active_set: | |
bb[i] = 0 | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - alpha, 0) \ | |
/ np.dot(X[:, i], X[:, i]) | |
return bb | |
def l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=False): | |
""" | |
The strategy is described in "Strong rules for discarding predictors in lasso-type problems" | |
alphas must be a decreasing sequence of regularization parameters | |
WARNING: does not compute intercept | |
""" | |
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float) | |
alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered | |
n_samples, n_features = X.shape | |
alphas_scaled = np.array(alphas) * X.shape[0] | |
# ever_active_set = np.arange(X.shape[1]).tolist() | |
for k, a in enumerate(alphas_scaled): | |
if k > 0: | |
# .. Strong rules for discarding predictors in lasso-type .. | |
tmp = np.dot(X.T, y - np.dot(X, beta[k - 1])) | |
tmp = np.abs(tmp) | |
strong_set = tmp >= 2 * alphas_scaled[k] - alphas_scaled[k - 1] | |
strong_set = np.where(strong_set)[0].tolist() | |
else: | |
# strong_set = np.arange(X.shape[1]) | |
# .. Basic Strong rules for discarding predictors in lasso-type .. | |
tmp = np.abs(np.dot(X.T, y)) | |
alpha_max = np.max(np.abs(np.dot(X.T, y))) | |
strong_set = tmp >= 2 * alphas_scaled[k] - alpha_max | |
strong_set = np.where(strong_set)[0].tolist() | |
ever_active_set = strong_set | |
if verbose: | |
print 'Strong active set ', strong_set | |
print 'Ever active set ', ever_active_set | |
# solve for the current active set | |
beta[k] = shrinkage(X, y, a, beta[k], ever_active_set, max_iter) | |
# check KKT in the strong active set | |
kkt_violations = False | |
for i in strong_set: | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
if beta[k, i] != 0 and not np.allclose(tmp, np.abs(alphas_scaled[k])): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(alphas_scaled[k]): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if not kkt_violations: | |
# passed KKT for all variables in strong active set, we're done | |
ever_active_set = np.where(beta[k] != 0)[0].tolist() | |
if verbose: | |
print 'no KKT violated on strong active set' | |
# .. recompute with new active set .. | |
else: | |
if verbose: | |
print 'KKT violated on strong active set' | |
beta[k] = shrinkage(X, y, a, beta[k], ever_active_set, max_iter) | |
# .. check KKT on all predictors .. | |
kkt_violations = False | |
for i in range(X.shape[1]): | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
if beta[k, i] != 0 and not np.allclose(tmp, np.abs(alphas_scaled[k])): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(alphas_scaled[k]): | |
if i not in ever_active_set: | |
ever_active_set.append(i) | |
kkt_violations = True | |
if not kkt_violations: | |
# passed KKT for all variables, we're done | |
ever_active_set = np.where(beta[k] != 0)[0].tolist() | |
print 'no KKT violated on full active set' | |
else: | |
if verbose: | |
print 'KKT violated on full active set' | |
beta[k] = shrinkage(X, y, a, beta[k], ever_active_set, max_iter) | |
return beta | |
def check_kkt_lasso(xr, coef, penalty, tol=1e-3): | |
""" | |
Check KKT conditions for Lasso | |
xr : X'(y - X coef) | |
""" | |
nonzero = (coef != 0) | |
return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \ | |
and np.all(np.abs(xr[~nonzero] / penalty) <= 1) | |
if __name__ == '__main__': | |
np.random.seed(0) | |
from sklearn import datasets | |
diabetes = datasets.load_diabetes() | |
X = diabetes.data | |
y = diabetes.target | |
# X = np.random.randn(20, 100) | |
# y = np.random.randn(20) | |
# X = (X / np.sqrt((X ** 2).sum(0))) | |
#print l1_coordinate_descent(X, y, .001) | |
alphas = np.linspace(.1, 2., 10)[::-1] | |
coef = l1_path(X, y, alphas, verbose=True) | |
for i in range(len(alphas)): | |
Xr = np.dot(X.T, y - np.dot(X, coef[i])) | |
assert check_kkt_lasso(Xr, coef[i], X.shape[0] * alphas[i]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import strong_rule_enet as enet | |
import strong_rule_lasso as lasso | |
import numpy as np | |
from sklearn import datasets | |
from sklearn import linear_model | |
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \ | |
assert_equal | |
from sklearn.datasets.samples_generator import make_regression | |
from sklearn.linear_model import lasso_path | |
from sklearn.linear_model import enet_path | |
from sklearn.linear_model.base import center_data | |
np.random.seed(0) | |
diabetes = datasets.load_diabetes() | |
X = diabetes.data | |
y = diabetes.target | |
MAX_ITER = 100 | |
def test_lasso_coordinate_descent(): | |
MAX_ITER = 10000 | |
X, y = make_regression(n_samples=5, n_features=3, n_informative=2, | |
random_state=0) | |
clf = linear_model.Lasso(alpha=0.5, tol=1e-10, max_iter=MAX_ITER, fit_intercept=False) | |
clf.fit(X, y) | |
coef = lasso.l1_coordinate_descent(X, y, 0.5, max_iter=MAX_ITER) | |
assert_array_almost_equal(coef, clf.coef_, 2) | |
def test_enet_coordinate_descent(): | |
MAX_ITER = 1000 | |
X, y = make_regression(n_samples=5, n_features=3, n_informative=2, | |
random_state=0) | |
clf = linear_model.ElasticNet(alpha=0.5, rho=0.7, max_iter=MAX_ITER, fit_intercept=False) | |
clf.fit(X, y) | |
coef_sr_ = enet.enet_coordinate_descent(X, y, alpha=0.5, rho=0.7, max_iter=MAX_ITER) | |
assert_array_almost_equal(coef_sr_, clf.coef_, 2) | |
def test_lasso_path(): | |
MAX_ITER = 1000 | |
X, y = make_regression(n_samples=10, n_features=20, n_informative=5, | |
random_state=0) | |
X, y, _, _, _ = center_data(X, y, fit_intercept=False, normalize=True) | |
alphas = np.linspace(.2, 150., 5) | |
coefs_sr_ = lasso.l1_path(X, y, alphas, verbose=True, max_iter=MAX_ITER) | |
print coefs_sr_ | |
# compare with sklearn | |
models = lasso_path(X, y, tol=1e-8, alphas=alphas, fit_intercept=False, normalize=False, copy_X=True, max_iter=MAX_ITER) | |
coefs_skl_ = np.array([m.coef_ for m in models]) | |
assert_array_almost_equal(coefs_sr_[1], coefs_skl_[1], 5) | |
assert_array_almost_equal(coefs_sr_[4], coefs_skl_[4], 5) | |
# assert_equal(0,1) | |
def test_enet_path(): | |
MAX_ITER = 1000 | |
X, y = make_regression(n_samples=100, n_features=50, n_informative=20, | |
random_state=0) | |
rho = 0.9 | |
alphas = np.linspace(.2, 150., 5) | |
coefs_sr_ = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True) | |
# compare with sklearn | |
models = enet_path(X, y, tol=1e-8, alphas=alphas, rho=rho, fit_intercept=False, normalize=False, copy_X=True, max_iter=MAX_ITER) | |
coefs_skl_ = np.array([m.coef_ for m in models])#[::-1] | |
assert_array_almost_equal(coefs_sr_[1], coefs_skl_[1], 5) | |
assert_array_almost_equal(coefs_sr_[4], coefs_skl_[4], 5) | |
# assert_equal(0,1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment