eickenberg/coordinate_descent.py

## coordinate_descent.py
# Coordinate descent algorithm for Lasso in pure python
# Goal: evaluate speed up potential when using dynamic screening
# rules every few coordinate sweeps, on all types of design
# See Bonnefoy et al., 2014, https://hal.inria.fr/hal-00880787v4
# Author: Michael Eickenberg, michael.eickenberg@nsup.org
import numpy as np

def screen_dictionary(X, residual, y, alpha,
                      current_support=None,
                      current_radius=None,
                      abs_XTy_over_alpha=None,
                      X_column_norms=None,
                      verbose=0):

    if abs_XTy_over_alpha is None:
        abs_XTy_over_alpha = np.abs(X.T.dot(y) / alpha)

    if current_support is None:
        current_support = np.arange(len(X.T))

    if X_column_norms is None:
        X_column_norms = np.sqrt((X ** 2).sum(0))

    infty_norm_scale_factor = np.abs(X.T.dot(residual)).max()
    within_constraints_scale_factor = (np.dot(residual, y) /
                                       (np.dot(residual, residual) * alpha))
    scale_factor_mu = max(-1. / infty_norm_scale_factor,
                           min(1. / infty_norm_scale_factor,
                               within_constraints_scale_factor))
    new_radius = np.linalg.norm(scale_factor_mu * residual - y / alpha)

    if verbose > 9999:
        print "Scale factors mu\ninfty: %1.5f\nwithin: %1.5f\nall: %1.5f" % (
            infty_norm_scale_factor, within_constraints_scale_factor,
            scale_factor_mu)
        print "Feasibility %1.5f" % (
            np.abs(X.T.dot(scale_factor_mu * residual)).max())
    if current_radius is None:
        current_radius = np.linalg.norm(y / alpha -
                                        y / infty_norm_scale_factor)
    if verbose > 999:
        print "Current radius: %1.2f, new radius %1.2f" % (current_radius,
                                                           new_radius)

    if new_radius < current_radius:
        current_radius = new_radius

    support_change = (abs_XTy_over_alpha[current_support] >
                      1. - current_radius * X_column_norms)
    new_support = current_support[support_change]

    return new_support, current_radius, abs_XTy_over_alpha, X_column_norms


def lasso_cd(X, y, alpha, screen_every=10, max_iter=1000, verbose=0):

    n_samples, n_features = X.shape
    residual = y.copy()

    w = np.zeros(n_features)
    X_norm = (X ** 2).sum(0)

    features = np.arange(n_features)
    abs_XTy_over_alpha = None
    X_column_norms = None
    current_radius = None

    for i in xrange(max_iter):
        print i
        if i % screen_every == 0:
            old_num_features = len(features)
            features, current_radius, abs_XTy_over_alpha, X_column_norms = \
                screen_dictionary(
                X, residual, y, alpha, current_support=features,
                current_radius=current_radius,
                abs_XTy_over_alpha=abs_XTy_over_alpha,
                X_column_norms=X_column_norms,
                verbose=verbose)
            if verbose > 99:
                print "Screened to %d from %d of a total of %d" % (
                    len(features), old_num_features, X.shape[1])
                print "Radius %1.5f" % current_radius
        for c in features:
            if X_norm[c] == 0:
                continue

            x_c = X[:, c]
            residual += w[c] * x_c
            dot_prod = x_c.dot(residual)
            soft_thresholded = max(0, np.abs(dot_prod) -
                                   n_samples * alpha
                                      ) * np.sign(dot_prod)
            w[c] = soft_thresholded / X_norm[c]
            residual -= w[c] * x_c

    return w


if __name__ == "__main__":
    n_samples, n_features = 50, 1000
    n_active = 20
    alpha = .001

    rng = np.random.RandomState(42)

    X = rng.randn(n_samples, n_features)
    X = X / np.sqrt((X ** 2).sum(0))
    w_support = rng.permutation(n_features)[:n_active]
    w = np.zeros(n_features)
    w[w_support] = rng.randn(n_active)

    noise_level = .001
    noise = rng.randn(n_samples) * noise_level
    y = X.dot(w) + noise
    y = y / np.sqrt((y ** 2).sum())

    # X and y normalized because required by dynamic screening rules

    from sklearn.linear_model import Lasso
    lasso = Lasso(fit_intercept=False, alpha=alpha)
    lasso.fit(X, y)
    coef = lasso.coef_
    cd_coef = lasso_cd(X, y, alpha, max_iter=100, verbose=10000)
	# Coordinate descent algorithm for Lasso in pure python
	# Goal: evaluate speed up potential when using dynamic screening
	# rules every few coordinate sweeps, on all types of design
	# See Bonnefoy et al., 2014, https://hal.inria.fr/hal-00880787v4
	# Author: Michael Eickenberg, michael.eickenberg@nsup.org
	import numpy as np

	def screen_dictionary(X, residual, y, alpha,
	current_support=None,
	current_radius=None,
	abs_XTy_over_alpha=None,
	X_column_norms=None,
	verbose=0):

	if abs_XTy_over_alpha is None:
	abs_XTy_over_alpha = np.abs(X.T.dot(y) / alpha)

	if current_support is None:
	current_support = np.arange(len(X.T))

	if X_column_norms is None:
	X_column_norms = np.sqrt((X ** 2).sum(0))

	infty_norm_scale_factor = np.abs(X.T.dot(residual)).max()
	within_constraints_scale_factor = (np.dot(residual, y) /
	(np.dot(residual, residual) * alpha))
	scale_factor_mu = max(-1. / infty_norm_scale_factor,
	min(1. / infty_norm_scale_factor,
	within_constraints_scale_factor))
	new_radius = np.linalg.norm(scale_factor_mu * residual - y / alpha)

	if verbose > 9999:
	print "Scale factors mu\ninfty: %1.5f\nwithin: %1.5f\nall: %1.5f" % (
	infty_norm_scale_factor, within_constraints_scale_factor,
	scale_factor_mu)
	print "Feasibility %1.5f" % (
	np.abs(X.T.dot(scale_factor_mu * residual)).max())
	if current_radius is None:
	current_radius = np.linalg.norm(y / alpha -
	y / infty_norm_scale_factor)
	if verbose > 999:
	print "Current radius: %1.2f, new radius %1.2f" % (current_radius,
	new_radius)

	if new_radius < current_radius:
	current_radius = new_radius

	support_change = (abs_XTy_over_alpha[current_support] >
	1. - current_radius * X_column_norms)
	new_support = current_support[support_change]

	return new_support, current_radius, abs_XTy_over_alpha, X_column_norms


	def lasso_cd(X, y, alpha, screen_every=10, max_iter=1000, verbose=0):

	n_samples, n_features = X.shape
	residual = y.copy()

	w = np.zeros(n_features)
	X_norm = (X ** 2).sum(0)

	features = np.arange(n_features)
	abs_XTy_over_alpha = None
	X_column_norms = None
	current_radius = None

	for i in xrange(max_iter):
	print i
	if i % screen_every == 0:
	old_num_features = len(features)
	features, current_radius, abs_XTy_over_alpha, X_column_norms = \
	screen_dictionary(
	X, residual, y, alpha, current_support=features,
	current_radius=current_radius,
	abs_XTy_over_alpha=abs_XTy_over_alpha,
	X_column_norms=X_column_norms,
	verbose=verbose)
	if verbose > 99:
	print "Screened to %d from %d of a total of %d" % (
	len(features), old_num_features, X.shape[1])
	print "Radius %1.5f" % current_radius
	for c in features:
	if X_norm[c] == 0:
	continue

	x_c = X[:, c]
	residual += w[c] * x_c
	dot_prod = x_c.dot(residual)
	soft_thresholded = max(0, np.abs(dot_prod) -
	n_samples * alpha
	) * np.sign(dot_prod)
	w[c] = soft_thresholded / X_norm[c]
	residual -= w[c] * x_c

	return w


	if __name__ == "__main__":
	n_samples, n_features = 50, 1000
	n_active = 20
	alpha = .001

	rng = np.random.RandomState(42)

	X = rng.randn(n_samples, n_features)
	X = X / np.sqrt((X ** 2).sum(0))
	w_support = rng.permutation(n_features)[:n_active]
	w = np.zeros(n_features)
	w[w_support] = rng.randn(n_active)

	noise_level = .001
	noise = rng.randn(n_samples) * noise_level
	y = X.dot(w) + noise
	y = y / np.sqrt((y ** 2).sum())

	# X and y normalized because required by dynamic screening rules

	from sklearn.linear_model import Lasso
	lasso = Lasso(fit_intercept=False, alpha=alpha)
	lasso.fit(X, y)
	coef = lasso.coef_
	cd_coef = lasso_cd(X, y, alpha, max_iter=100, verbose=10000)