Skip to content

Instantly share code, notes, and snippets.

@eickenberg
Created February 2, 2015 17:26
Show Gist options
  • Save eickenberg/fe7010b63a4196f849fa to your computer and use it in GitHub Desktop.
Save eickenberg/fe7010b63a4196f849fa to your computer and use it in GitHub Desktop.
An implementation of Sparse Additive Models in Coordinate Descent
# Author Michael Eickenberg <michael.eickenberg@nsup.org>, Fabian Pedregosa
# Coded in 2012, another era, pure python, no guarantees for 1000% correctness or speed
# requires loess.py
import numpy as np
from loess import lowess
VERBOSE = 100
def spam_coord_descent(X, y,
alpha=1.,
max_iter=100,
f_init=None,
loess_f=2./3.,
loess_iter=3):
"""Implementation of SpAM by Ravikumar et al.
Coordinate Descent using a LOESS to fit the residues
X: Data matrix of size n x p, where n = n_samples, p = n_features
y: Target of fitting
f_j_init: Initialization of the f_js, is set to 0 if not specified
Specify it for a warm start
loess_f: LOESS algorithm parameter, larger f, smoother curve
loess_iter: number of iterations in LOESS algorithm
"""
num_samples, num_features = X.shape
inv_sqrt_num_samples = 1. / np.sqrt(num_samples)
# initialize some variables
outer_iteration_counter = 0
# if f_init is None, set it to 0
if f_init is None:
f = np.zeros([num_samples, num_features])
else:
f = f_init.copy()
# calculate residue
R = y - f.sum(1)
while outer_iteration_counter < max_iter:
outer_iteration_counter += 1
if VERBOSE > 9:
print "In outer iteration %d" % outer_iteration_counter
for j in range(num_features):
if VERBOSE > 99:
print "Processing feature %d for the %d. time" %\
(j, outer_iteration_counter)
# update residue in place by adding back feature j
R[:] = R[:] + f[:, j]
# fit a LOESS to this residue as a function of the feature
P_j = lowess(X[:, j], R, f=loess_f, iter=loess_iter)
# estimate norm using the simple,
# biased version from the paper (15)
s_j = inv_sqrt_num_samples * np.sqrt((P_j ** 2).sum())
# soft threshold
soft_thresh = 1. - alpha / s_j
if soft_thresh > 0:
f_j = soft_thresh * P_j
f_j = f_j - f_j.mean()
f[:, j] = f_j
R[:] = R[:] - f_j
else:
f[:, j] = 0.
return f
if __name__ == "__main__":
np.random.seed(42)
num_samples = 50
features = [
lambda x: 2 * x * x,
lambda x: 10. * x,
lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
lambda x: np.exp(x)
]
num_true_features = len(features)
num_fake_features = 16
num_features = num_true_features + num_fake_features
X = np.random.randn(num_samples, num_features)
y = np.hstack([feature(X[:, i])[:, np.newaxis]
for i, feature in enumerate(features)]).sum(1)
X = (X - X.mean()) / X.std()
y = y - y.mean()
f = spam_coord_descent(X, y)
import pylab as pl
pl.figure()
for i in range(num_true_features):
pl.subplot(2, num_true_features, i + 1)
pl.plot(X[:, i], features[i](X[:, i]), "rx")
pl.subplot(2, num_true_features, i + 1 + num_true_features)
pl.plot(X[:, i], f[:, i], "b+")
pl.figure()
pl.imshow(f, interpolation="nearest")
pl.hot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment