Skip to content

Instantly share code, notes, and snippets.

@ibayer
Forked from agramfort/gist:3181189
Created July 27, 2012 13:50
Show Gist options
  • Save ibayer/3188143 to your computer and use it in GitHub Desktop.
Save ibayer/3188143 to your computer and use it in GitHub Desktop.
strong rules lasso and enet
# -*- coding: utf-8 -*-
"""
Generalized linear models via coordinate descent
Author: Fabian Pedregosa <fabian@fseoane.net>
"""
import numpy as np
from scipy import linalg
MAX_ITER = 100
# cdef double l1_reg = alpha * rho * n_samples
# cdef double l2_reg = alpha * (1.0 - rho) * n_samples
def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER):
n_samples = X.shape[0]
l1_reg = alpha * rho * n_samples
l2_reg = alpha * (1.0 - rho) * n_samples
if warm_start is not None:
beta = warm_start
else:
beta = np.zeros(X.shape[1], dtype=np.float)
alpha = alpha * X.shape[0]
for _ in range(max_iter):
for i in range(X.shape[1]):
bb = beta.copy()
bb[i] = 0.
residual = np.dot(X[:, i], y - np.dot(X, bb).T)
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
/ (np.dot(X[:, i], X[:, i]) + l2_reg)
return beta
def shrinkage(X, y, l1_reg, l2_reg, beta, ever_active_set, max_iter):
bb = beta.copy()
for _ in range(max_iter):
for i in ever_active_set:
bb[i] = 0
residual = np.dot(X[:, i], y - np.dot(X, bb).T)
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
/ (np.dot(X[:, i], X[:, i]) + l2_reg)
return bb
def enet_path(X, y, alphas, rho, max_iter=MAX_ITER, verbose=False):
"""
The strategy is described in "Strong rules for discarding predictors in lasso-type problems"
alphas must be a decreasing sequence of regularization parameters
WARNING: does not compute intercept
"""
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float)
alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered
n_samples, n_features = X.shape
alphas_scaled = np.array(alphas) * X.shape[0]
ever_active_set = np.arange(X.shape[1]).tolist()
for k, a in enumerate(alphas_scaled):
if k > 0:
# .. Strong rules for discarding predictors in lasso-type ..
tmp = np.dot(X.T, y - np.dot(X, beta[k - 1]))
tmp = np.abs(tmp)
strong_set = tmp >= rho * (2 * alphas_scaled[k] - alphas_scaled[k - 1])
strong_set = np.where(strong_set)[0].tolist()
else:
# strong_set = np.arange(X.shape[1])
# .. Basic Strong rules for discarding predictors in lasso-type ..
tmp = np.abs(np.dot(X.T, y))
alpha_max = np.max(np.abs(np.dot(X.T, y)))
strong_set = tmp >= rho * (2 * alphas_scaled[k] - alpha_max)
strong_set = np.where(strong_set)[0].tolist()
ever_active_set = strong_set
if verbose:
print 'Strong set ', strong_set
print 'Ever active set ', ever_active_set
l1_reg = a * rho
l2_reg = a * (1.0 - rho)
# solve for the current active set
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], ever_active_set, max_iter)
# check KKT in the strong active set
kkt_violations = False
for i in strong_set:
tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
s = np.sign(beta[k, i])
if beta[k, i] != 0 and not np.allclose(tmp, s * l1_reg + l2_reg * beta[k, i], rtol=1e-2):
import ipdb; ipdb.set_trace()
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if not kkt_violations:
# passed KKT for all variables in strong set, we're done
ever_active_set = np.where(beta[k] != 0)[0].tolist()
if verbose:
print 'no KKT violated on strong set'
# .. recompute with new active set ..
else:
if verbose:
print 'KKT violated on strong set'
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], ever_active_set, max_iter)
# .. check KKT on all predictors ..
kkt_violations = False
for i in range(X.shape[1]):
tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
s = np.sign(beta[k, i])
if beta[k, i] != 0 and not np.allclose(tmp, s * l1_reg + l2_reg * beta[k, i], rtol=1e-2):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if not kkt_violations:
# passed KKT for all variables, we're done
ever_active_set = np.where(beta[k] != 0)[0].tolist()
print 'no KKT violated on full set of features'
else:
if verbose:
print 'KKT violated on full set of features'
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], ever_active_set, max_iter)
return beta
def check_kkt_enet(Xr, coef, l1_reg, l2_reg, tol=1e-3):
"""
Check KKT conditions for Enet
Xr : X'(y - X coef)
"""
nonzero = (coef != 0)
l2_penalty = l2_reg * (coef)
return np.all(np.abs(Xr[nonzero] - \
(np.sign(coef[nonzero]) * l1_reg + l2_penalty[nonzero])) < tol) \
and np.all(np.abs(Xr[~nonzero] / (l1_reg + l2_penalty[~nonzero])) <= 1)
if __name__ == '__main__':
np.random.seed(0)
from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
# X = np.random.randn(20, 100)
# y = np.random.randn(20)
# X = (X / np.sqrt((X ** 2).sum(0)))
#print l1_coordinate_descent(X, y, .001)
rho = 0.8
alphas = np.linspace(.1, 1., 10)[::-1]
coef = enet_path(X, y, alphas, rho=rho, verbose=True)
n_samples = X.shape[0]
for i in range(len(alphas)):
Xr = np.dot(X.T, y - np.dot(X, coef[i]))
l1_reg = alphas[i] * rho * n_samples
l2_reg = alphas[i] * (1.0 - rho) * n_samples
assert check_kkt_enet(Xr, coef[i], l1_reg, l2_reg)
# -*- coding: utf-8 -*-
"""
Generalized linear models via coordinate descent
Author: Fabian Pedregosa <fabian@fseoane.net>
"""
import numpy as np
from scipy import linalg
MAX_ITER = 100
def l1_coordinate_descent(X, y, alpha, warm_start=None, max_iter=MAX_ITER):
if warm_start is not None:
beta = warm_start
else:
beta = np.zeros(X.shape[1], dtype=np.float)
alpha = alpha * X.shape[0]
for _ in range(max_iter):
for i in range(X.shape[1]):
bb = beta.copy()
bb[i] = 0.
residual = np.dot(X[:, i], y - np.dot(X, bb).T)
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - alpha, 0) \
/ np.dot(X[:, i], X[:, i])
return beta
def shrinkage(X, y, alpha, beta, ever_active_set, max_iter):
bb = beta.copy()
for _ in range(max_iter):
for i in ever_active_set:
bb[i] = 0
residual = np.dot(X[:, i], y - np.dot(X, bb).T)
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - alpha, 0) \
/ np.dot(X[:, i], X[:, i])
return bb
def l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=False):
"""
The strategy is described in "Strong rules for discarding predictors in lasso-type problems"
alphas must be a decreasing sequence of regularization parameters
WARNING: does not compute intercept
"""
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float)
alphas = np.sort(alphas)[::-1] # make sure alphas are properly ordered
n_samples, n_features = X.shape
alphas_scaled = np.array(alphas) * X.shape[0]
# ever_active_set = np.arange(X.shape[1]).tolist()
for k, a in enumerate(alphas_scaled):
if k > 0:
# .. Strong rules for discarding predictors in lasso-type ..
tmp = np.dot(X.T, y - np.dot(X, beta[k - 1]))
tmp = np.abs(tmp)
strong_set = tmp >= 2 * alphas_scaled[k] - alphas_scaled[k - 1]
strong_set = np.where(strong_set)[0].tolist()
else:
# strong_set = np.arange(X.shape[1])
# .. Basic Strong rules for discarding predictors in lasso-type ..
tmp = np.abs(np.dot(X.T, y))
alpha_max = np.max(np.abs(np.dot(X.T, y)))
strong_set = tmp >= 2 * alphas_scaled[k] - alpha_max
strong_set = np.where(strong_set)[0].tolist()
ever_active_set = strong_set
if verbose:
print 'Strong active set ', strong_set
print 'Ever active set ', ever_active_set
# solve for the current active set
beta[k] = shrinkage(X, y, a, beta[k], ever_active_set, max_iter)
# check KKT in the strong active set
kkt_violations = False
for i in strong_set:
tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
if beta[k, i] != 0 and not np.allclose(tmp, np.abs(alphas_scaled[k])):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if beta[k, i] == 0 and abs(tmp) >= np.abs(alphas_scaled[k]):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if not kkt_violations:
# passed KKT for all variables in strong active set, we're done
ever_active_set = np.where(beta[k] != 0)[0].tolist()
if verbose:
print 'no KKT violated on strong active set'
# .. recompute with new active set ..
else:
if verbose:
print 'KKT violated on strong active set'
beta[k] = shrinkage(X, y, a, beta[k], ever_active_set, max_iter)
# .. check KKT on all predictors ..
kkt_violations = False
for i in range(X.shape[1]):
tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
if beta[k, i] != 0 and not np.allclose(tmp, np.abs(alphas_scaled[k])):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if beta[k, i] == 0 and abs(tmp) >= np.abs(alphas_scaled[k]):
if i not in ever_active_set:
ever_active_set.append(i)
kkt_violations = True
if not kkt_violations:
# passed KKT for all variables, we're done
ever_active_set = np.where(beta[k] != 0)[0].tolist()
print 'no KKT violated on full active set'
else:
if verbose:
print 'KKT violated on full active set'
beta[k] = shrinkage(X, y, a, beta[k], ever_active_set, max_iter)
return beta
def check_kkt_lasso(xr, coef, penalty, tol=1e-3):
"""
Check KKT conditions for Lasso
xr : X'(y - X coef)
"""
nonzero = (coef != 0)
return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \
and np.all(np.abs(xr[~nonzero] / penalty) <= 1)
if __name__ == '__main__':
np.random.seed(0)
from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
# X = np.random.randn(20, 100)
# y = np.random.randn(20)
# X = (X / np.sqrt((X ** 2).sum(0)))
#print l1_coordinate_descent(X, y, .001)
alphas = np.linspace(.1, 2., 10)[::-1]
coef = l1_path(X, y, alphas, verbose=True)
for i in range(len(alphas)):
Xr = np.dot(X.T, y - np.dot(X, coef[i]))
assert check_kkt_lasso(Xr, coef[i], X.shape[0] * alphas[i])
import strong_rule_enet as enet
import strong_rule_lasso as lasso
import numpy as np
from sklearn import datasets
from sklearn import linear_model
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
assert_equal
from sklearn.datasets.samples_generator import make_regression
from sklearn.linear_model import lasso_path
from sklearn.linear_model import enet_path
from sklearn.linear_model.base import center_data
np.random.seed(0)
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
MAX_ITER = 100
def test_lasso_coordinate_descent():
MAX_ITER = 10000
X, y = make_regression(n_samples=5, n_features=3, n_informative=2,
random_state=0)
clf = linear_model.Lasso(alpha=0.5, tol=1e-10, max_iter=MAX_ITER, fit_intercept=False)
clf.fit(X, y)
coef = lasso.l1_coordinate_descent(X, y, 0.5, max_iter=MAX_ITER)
assert_array_almost_equal(coef, clf.coef_, 2)
def test_enet_coordinate_descent():
MAX_ITER = 1000
X, y = make_regression(n_samples=5, n_features=3, n_informative=2,
random_state=0)
clf = linear_model.ElasticNet(alpha=0.5, rho=0.7, max_iter=MAX_ITER, fit_intercept=False)
clf.fit(X, y)
coef_sr_ = enet.enet_coordinate_descent(X, y, alpha=0.5, rho=0.7, max_iter=MAX_ITER)
assert_array_almost_equal(coef_sr_, clf.coef_, 2)
def test_lasso_path():
MAX_ITER = 1000
X, y = make_regression(n_samples=10, n_features=20, n_informative=5,
random_state=0)
X, y, _, _, _ = center_data(X, y, fit_intercept=False, normalize=True)
alphas = np.linspace(.2, 150., 5)
coefs_sr_ = lasso.l1_path(X, y, alphas, verbose=True, max_iter=MAX_ITER)
print coefs_sr_
# compare with sklearn
models = lasso_path(X, y, tol=1e-8, alphas=alphas, fit_intercept=False, normalize=False, copy_X=True, max_iter=MAX_ITER)
coefs_skl_ = np.array([m.coef_ for m in models])
assert_array_almost_equal(coefs_sr_[1], coefs_skl_[1], 5)
assert_array_almost_equal(coefs_sr_[4], coefs_skl_[4], 5)
# assert_equal(0,1)
def test_enet_path():
MAX_ITER = 1000
X, y = make_regression(n_samples=100, n_features=50, n_informative=20,
random_state=0)
rho = 0.9
alphas = np.linspace(.2, 150., 5)
coefs_sr_ = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)
# compare with sklearn
models = enet_path(X, y, tol=1e-8, alphas=alphas, rho=rho, fit_intercept=False, normalize=False, copy_X=True, max_iter=MAX_ITER)
coefs_skl_ = np.array([m.coef_ for m in models])#[::-1]
assert_array_almost_equal(coefs_sr_[1], coefs_skl_[1], 5)
assert_array_almost_equal(coefs_sr_[4], coefs_skl_[4], 5)
# assert_equal(0,1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment