erikcs/cd.py

## cd.py
# Toy coordinate descent for Lasso regression

import numpy as np
import warnings

def _normalise(X, y):
    Xbar = X.mean(axis=0)
    Xstd = X.std(axis=0)
    ybar = y.mean(axis=0)
    ystd = y.std()
    X_norm = (X - Xbar) / Xstd
    y_cent = y - ybar

    return X_norm, y_cent, Xstd, Xbar, ybar, ystd

def _soft_threshold(x, value):
    return np.sign(x) * np.maximum(np.abs(x) - value, 0)

def coordinate_descent(X, y, alpha=0, alpha_weights=None, tol=1e-5, maxiter=1000):
    """ Runs coordinate descent on the objective

    1\2n||y - w0 - Xw||^2_2 + P(alpha, x, w)

    where

    P = alpha sum_{i=1}^{p} |w_i| * alpha_weights[i]

    i.e. Lasso where each l1 penalty coordinate is weighted according to
    `alpha_weights`

    Fitting is done on normalized data, but coefficients are returned
    on original scale.

    Parameters
    ----------
    X : ndarray, shape (n_observations, n_features)
    y : ndarray, shape (n_observations,)
    alpha_weights : ndarray, shape (n_features,)

    Returns
    -------
    w_rescaled: array, shape (n_features, )
        Estimated coefficients on original scale

    intercept : int
        Estimated (unpenalized) intercept on original scale

    w : array, shape (n_features)
        Estimated coefficients, normalized

    Reference:
    Hastie, T., Tibshirani, R., & Wainwright, M. (2015). Statistical learning
    with sparsity: the lasso and generalizations. CRC Press.

    """
    n, p = X.shape
    w = np.zeros(p)
    wp = w.copy()
    X, y, Xstd, Xbar, ybar, ystd = _normalise(X, y)

    if alpha_weights is None:
        alpha_weights = np.ones(p)

    # Normalize alpha_weights to sum to nvar...?
    alpha_weights *= 1.0 * p / alpha_weights.sum()

    r = y - (X * w).sum(axis=1)
    for n_iter in range(maxiter):

        for i in range(p):
            x = X[:, i]
            w_ols = w[i] + np.dot(x, r) / n
            w[i] = _soft_threshold(w_ols, alpha * alpha_weights[i])
            # Update residuals
            r += wp[i] * X[:, i]
            r -= w[i] * X[:, i]

        converged = np.linalg.norm(wp - w) < tol
        if converged:
            break
        wp = w.copy()

    if not converged:
        warnings.warn("Coordinate descent did not converge. You might want "
                      "to increase `maxiter` or decrease `tol`")

    w_rescaled = w / Xstd
    intercept = ybar - np.sum(Xbar * w_rescaled)

    return w_rescaled, intercept, w
	# Toy coordinate descent for Lasso regression

	import numpy as np
	import warnings

	def _normalise(X, y):
	Xbar = X.mean(axis=0)
	Xstd = X.std(axis=0)
	ybar = y.mean(axis=0)
	ystd = y.std()
	X_norm = (X - Xbar) / Xstd
	y_cent = y - ybar

	return X_norm, y_cent, Xstd, Xbar, ybar, ystd

	def _soft_threshold(x, value):
	return np.sign(x) * np.maximum(np.abs(x) - value, 0)

	def coordinate_descent(X, y, alpha=0, alpha_weights=None, tol=1e-5, maxiter=1000):
	""" Runs coordinate descent on the objective

	1\2n\|\|y - w0 - Xw\|\|^2_2 + P(alpha, x, w)

	where

	P = alpha sum_{i=1}^{p} \|w_i\| * alpha_weights[i]

	i.e. Lasso where each l1 penalty coordinate is weighted according to
	`alpha_weights`

	Fitting is done on normalized data, but coefficients are returned
	on original scale.

	Parameters
	----------
	X : ndarray, shape (n_observations, n_features)
	y : ndarray, shape (n_observations,)
	alpha_weights : ndarray, shape (n_features,)

	Returns
	-------
	w_rescaled: array, shape (n_features, )
	Estimated coefficients on original scale

	intercept : int
	Estimated (unpenalized) intercept on original scale

	w : array, shape (n_features)
	Estimated coefficients, normalized

	Reference:
	Hastie, T., Tibshirani, R., & Wainwright, M. (2015). Statistical learning
	with sparsity: the lasso and generalizations. CRC Press.

	"""
	n, p = X.shape
	w = np.zeros(p)
	wp = w.copy()
	X, y, Xstd, Xbar, ybar, ystd = _normalise(X, y)

	if alpha_weights is None:
	alpha_weights = np.ones(p)

	# Normalize alpha_weights to sum to nvar...?
	alpha_weights = 1.0 p / alpha_weights.sum()

	r = y - (X * w).sum(axis=1)
	for n_iter in range(maxiter):

	for i in range(p):
	x = X[:, i]
	w_ols = w[i] + np.dot(x, r) / n
	w[i] = _soft_threshold(w_ols, alpha * alpha_weights[i])
	# Update residuals
	r += wp[i] * X[:, i]
	r -= w[i] * X[:, i]

	converged = np.linalg.norm(wp - w) < tol
	if converged:
	break
	wp = w.copy()

	if not converged:
	warnings.warn("Coordinate descent did not converge. You might want "
	"to increase `maxiter` or decrease `tol`")

	w_rescaled = w / Xstd
	intercept = ybar - np.sum(Xbar * w_rescaled)

	return w_rescaled, intercept, w