Skip to content

Instantly share code, notes, and snippets.

@fabianp
Last active August 29, 2015 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fabianp/10349186 to your computer and use it in GitHub Desktop.
Save fabianp/10349186 to your computer and use it in GitHub Desktop.
SParse Additive Models (SPAM) in Python
"""
This module implements the Lowess function for nonparametric regression.
Functions:
lowess Fit a smooth nonparametric regression curve to a scatterplot.
For more information, see
William S. Cleveland: "Robust locally weighted regression and smoothing
scatterplots", Journal of the American Statistical Association, December 1979,
volume 74, number 368, pp. 829-836.
William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
approach to regression analysis by local fitting", Journal of the American
Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
"""
from math import ceil
import numpy as np
from scipy import linalg
def lowess(x, y, f=2./3., iter=3):
"""lowess(x, y, f=2./3., iter=3) -> yest
Lowess smoother: Robust locally weighted regression.
The lowess function fits a nonparametric regression curve to a scatterplot.
The arrays x and y contain an equal number of elements; each pair
(x[i], y[i]) defines a data point in the scatterplot. The function returns
the estimated (smooth) values of y.
The smoothing span is given by f. A larger value for f will result in a
smoother curve. The number of robustifying iterations is given by iter. The
function will run faster with a smaller number of iterations."""
n = len(x)
r = int(ceil(f*n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:,None] - x[None,:]) / h), 0.0, 1.0)
w = (1 - w**3)**3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iter):
for i in range(n):
weights = delta * w[:,i]
b = np.array([np.sum(weights*y), np.sum(weights*y*x)])
A = np.array([[np.sum(weights), np.sum(weights*x)],
[np.sum(weights*x), np.sum(weights*x*x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1]*x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta**2)**2
return yest
if __name__ == '__main__':
import math
n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3*np.random.randn(n)
f = 0.25
yest = lowess(x, y, f=f, iter=3)
import pylab as pl
pl.clf()
pl.plot(x, y, label='y noisy')
pl.plot(x, yest, label='y pred')
pl.legend()
pl.show()
"""
Implements the Sparse additive models described in Ravikumar, Pradeep D., et al.
"SpAM: Sparse Additive Models." NIPS. 2007.
"""
import numpy as np
from scipy import linalg
from loess import lowess
from sklearn.externals.joblib import Parallel, delayed, Memory
VERBOSE = 10
def spam_coord_descent(X, y, alpha=1., max_iter=10000, tol=1e-8, f_init=None,
loess_f=2. / 3., loess_iter=3,
pre_calculate_smoothing_matrix=True,
mem=Memory(cachedir=None),
smoothing_matrices_n_jobs=-1):
"""Implementation of SpAM by Ravikumar et al.
Coordinate Descent using a LOESS to fit the residues
Parameters
----------
X: ndarray
Data matrix of size n x p, where n = n_samples, p = n_features
y: ndarray
Target of fitting
f_j_init: ndarray
Initialization of the f_js, is set to 0 if not specified
Specify it for a warm start
loess_f: float
LOESS algorithm parameter, larger f, smoother curve
loess_iter: float
number of iterations in LOESS algorithm
Returns
-------
Z : [f(X_1), f(X_2), ..., f(X_n)]
"""
X = np.asanyarray(X)
y = np.asanyarray(y)
n_samples, n_features = X.shape
inv_sqrt_n_samples = 1. / np.sqrt(n_samples)
# initialize some variables
outer_iteration_counter = 0
# if f_init is None, set it to 0
if f_init is None:
f = np.zeros([n_samples, n_features], dtype=X.dtype)
else:
f = f_init.copy()
if pre_calculate_smoothing_matrix:
import my_loess
# smoothing_matrices = np.empty([n_features, n_samples, n_samples])
if VERBOSE > 9:
print "Precalculating smoothing matrices"
make_smoothing_matrix = mem.cache(my_loess.make_smoothing_matrix)
# for j in range(n_features):
# if VERBOSE > 99:
# print "Precalculating smoothing matrix for feature %d" % j
# smoothing_matrices[j] = my_loess.make_smoothing_matrix(X[:, j],
# kernel_params={"f_fraction": loess_f})
smoothing_matrices = Parallel(n_jobs=smoothing_matrices_n_jobs)(
delayed(make_smoothing_matrix)(
X[:, j], kernel_params={"f_fraction": loess_f})
for j in range(n_features))
# calculate residue
R = y - f.sum(axis=1)
while outer_iteration_counter < max_iter:
R_old = R.copy()
outer_iteration_counter += 1
if VERBOSE > 9:
print "In outer iteration %d" % outer_iteration_counter
for j in range(n_features):
if VERBOSE > 99:
print "Processing feature %d for the %d. time" %\
(j, outer_iteration_counter)
# update residue in place by adding back feature j
R += f[:, j]
# fit a LOESS to this residue as a function of the feature
if pre_calculate_smoothing_matrix:
P_j = np.dot(smoothing_matrices[j], R)
else:
P_j = lowess(X[:, j], R, f=loess_f, iter=loess_iter)
# estimate norm using the simple,
# biased version from the paper (15)
s_j = inv_sqrt_n_samples * np.sqrt((P_j ** 2).sum())
# soft threshold
soft_thresh = 1. - alpha / s_j
if soft_thresh > 0:
f_j = soft_thresh * P_j
f_j -= f_j.mean()
f[:, j] = f_j
R[:] -= f_j
else:
f[:, j] = 0.
max_inc = linalg.norm(R - R_old)
if max_inc < tol / 2:
break
# .. dual gap (WIP)..
if False: #max_inc < rtol * np.amax(w_new):
group_norm = alpha * np.sum([linalg.norm(f[:, j], 2)
for j in range(f.shape[1])])
# norm_Anu = [linalg.norm(np.dot(H[g], residual))\
# for g in group_labels]
# if np.any(norm_Anu > C):
# nnu = residual * np.min(C / norm_Anu)
# else:
# nnu = residual
# primal_obj = .5 * np.dot(residual, residual) + group_norm
# dual_obj = -.5 * np.dot(nnu, nnu) - np.dot(nnu, y)
# dual_gap = primal_obj - dual_obj
# if verbose:
# print 'Relative error: %s' % (dual_gap / dual_obj)
# if np.abs(dual_gap / dual_obj) < rtol:
# break
return f
def plot_spam_polynomial():
"""using a group lasso"""
rng = np.random.RandomState(42)
n_samples = 200
features = [
lambda x: 2 * x * x,
lambda x: 10. * x,
lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
lambda x: np.exp(x)
]
n_true_features = len(features)
n_fake_features = 16
n_features = n_true_features + n_fake_features
# X = 4 * (rng.rand(n_samples, n_features) - .5)
X = rng.randn(n_samples, n_features)
y = np.hstack([feature(
X[:, (i + 1) * n_features / n_true_features - 1])[:, np.newaxis]
for i, feature in enumerate(features)]).sum(axis=1)
X -= X.mean(axis=0)
X /= X.std(axis=0)
y -= y.mean()
from group_lasso import group_lasso
X1 = np.empty((X.shape[0], X.shape[1] * 3))
groups = []
for j in range(X.shape[1]):
X1[:, 3 * j] = X[:, j]
X1[:, 3 * j + 1] = X[:, j] ** 2
X1[:, 3 * j + 2] = X[:, j] ** 3
groups += [j] * 3
w = group_lasso(X1, y, .8, groups)
import pylab as pl
pl.figure()
for i in range(n_true_features):
j = (i + 1) * n_features / n_true_features - 1
pl.subplot(2, n_true_features, i + 1)
pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
features[i](X[:, (i + 1) * n_features / n_true_features - 1]),
"rx")
#pl.yticks([])
pl.axis('tight')
pl.subplot(2, n_true_features, i + 1 + n_true_features)
x = X[:, (i + 1) * n_features / n_true_features - 1]
pl.plot(x,
w[3 * j] * x + w[3 * j + 1] * (x ** 2) + w[3 * j + 2] * (x**3), "b+")
pl.axis('tight')
#pl.yticks([])
if i in (0, 3):
pl.ylim(ymin=-.5)
# pl.xticks([])
# pl.yticks([])
pl.show()
if __name__ == "__main__":
# plot_spam_polynomial()
rng = np.random.RandomState(42)
n_samples = 100
features = [
lambda x: 2 * x * x,
lambda x: 10. * x,
lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
lambda x: np.exp(x)
]
n_true_features = len(features)
n_fake_features = 16
n_features = n_true_features + n_fake_features
# X = 4 * (rng.rand(n_samples, n_features) - .5)
X = rng.randn(n_samples, n_features)
# y = np.hstack([feature(
# X[:, (i + 1) * n_features / n_true_features - 1])[:, np.newaxis]
# for i, feature in enumerate(features)]).sum(axis=1)
non_univariate_features = [
lambda x1, x2: np.exp((x1 + x2) / np.sqrt(2.))
]
n_true_features = len(non_univariate_features)
n_features = n_true_features + n_fake_features
y = np.hstack([
feature(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
X[:, (i + 1) * n_features / (n_true_features + 1) - 1])[:, np.newaxis]
for i, feature in enumerate(non_univariate_features)]).sum(axis=1)
X -= X.mean(axis=0)
X /= X.std(axis=0)
y -= y.mean()
f = spam_coord_descent(X, y, alpha=1., loess_f=.6)
import pylab as pl
pl.figure()
# for i in range(n_true_features):
# pl.subplot(2, n_true_features, i + 1)
# pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
# features[i](X[:, (i + 1) * n_features / n_true_features - 1]),
# "rx")
# pl.subplot(2, n_true_features, i + 1 + n_true_features)
# pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
# f_r1[:, (i + 1) * n_features / n_true_features - 1], "b+")
for i in range(n_true_features):
pl.subplot(2, n_true_features * 3, 3 * i + 1)
pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
non_univariate_features[i](
X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
), "rx")
pl.xlabel("x2")
pl.ylabel("f(x1, x2)")
pl.subplot(2, n_true_features * 3, 3 * i + 2)
pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
non_univariate_features[i](
X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
), "rx")
pl.xlabel("x1")
pl.ylabel("f(x1, x2)")
pl.subplot(2, n_true_features * 3, 3 * i + 3)
pl.plot(1./np.sqrt(2.) * (X[:, (i + 1) * n_features / (n_true_features + 1) - 2]
+ X[:, (i + 1) * n_features / (n_true_features + 1) - 1]),
non_univariate_features[i](
X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
), "rx")
pl.xlabel("x1 + x2")
pl.ylabel("f(x1, x2)")
pl.subplot(2, n_true_features * 3, 3 * i + 1 + 3 * n_true_features)
pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
f[:, (i + 1) * n_features / (n_true_features + 1) - 1]
+f[:, (i + 1) * n_features / (n_true_features + 1) - 2], "b+")
pl.xlabel("x2")
pl.ylabel("f_2(x2)")
pl.subplot(2, n_true_features * 3, 3 * i + 2 + 3 * n_true_features)
pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
f[:, (i + 1) * n_features / (n_true_features + 1) - 2]
+f[:, (i + 1) * n_features / (n_true_features + 1) - 1], "b+")
pl.xlabel("x1")
pl.ylabel("f_1(x1)")
pl.subplot(2, n_true_features * 3, 3 * i + 3 + 3 * n_true_features)
pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2]
+X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
f[:, (i + 1) * n_features / (n_true_features + 1) - 2]
+f[:, (i + 1) * n_features / (n_true_features + 1) - 1], "b+")
pl.xlabel("x1 + x2")
pl.ylabel("f_1(x1) + f_2(x2)")
pl.tight_layout()
pl.figure()
pl.imshow(np.abs(f), interpolation="nearest")
# pl.colorbar()
pl.hot()
pl.show()
@agramfort
Copy link

this code does not run.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment