fabianp/lowess.py

## lowess.py
"""
This module implements the Lowess function for nonparametric regression.

Functions:
lowess Fit a smooth nonparametric regression curve to a scatterplot.

For more information, see

William S. Cleveland: "Robust locally weighted regression and smoothing
scatterplots", Journal of the American Statistical Association, December 1979,
volume 74, number 368, pp. 829-836.

William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
approach to regression analysis by local fitting", Journal of the American
Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
"""

from math import ceil
import numpy as np
from scipy import linalg


def lowess(x, y, f=2./3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest

Lowess smoother: Robust locally weighted regression.
The lowess function fits a nonparametric regression curve to a scatterplot.
The arrays x and y contain an equal number of elements; each pair
(x[i], y[i]) defines a data point in the scatterplot. The function returns
the estimated (smooth) values of y.

The smoothing span is given by f. A larger value for f will result in a
smoother curve. The number of robustifying iterations is given by iter. The
function will run faster with a smaller number of iterations."""
    n = len(x)
    r = int(ceil(f*n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:,None] - x[None,:]) / h), 0.0, 1.0)
    w = (1 - w**3)**3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iter):
        for i in range(n):
            weights = delta * w[:,i]
            b = np.array([np.sum(weights*y), np.sum(weights*y*x)])
            A = np.array([[np.sum(weights), np.sum(weights*x)],
                   [np.sum(weights*x), np.sum(weights*x*x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1]*x[i]

        residuals = y - yest
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta**2)**2

    return yest

if __name__ == '__main__':
    import math
    n = 100
    x = np.linspace(0, 2 * math.pi, n)
    y = np.sin(x) + 0.3*np.random.randn(n)

    f = 0.25
    yest = lowess(x, y, f=f, iter=3)

    import pylab as pl
    pl.clf()
    pl.plot(x, y, label='y noisy')
    pl.plot(x, yest, label='y pred')
    pl.legend()
    pl.show()

## spam.py
"""
Implements the Sparse additive models described in Ravikumar, Pradeep D., et al.
"SpAM: Sparse Additive Models." NIPS. 2007.
"""
import numpy as np
from scipy import linalg
from loess import lowess
from sklearn.externals.joblib import Parallel, delayed, Memory

VERBOSE = 10


def spam_coord_descent(X, y, alpha=1., max_iter=10000, tol=1e-8, f_init=None,
                       loess_f=2. / 3., loess_iter=3,
                       pre_calculate_smoothing_matrix=True,
                       mem=Memory(cachedir=None),
                       smoothing_matrices_n_jobs=-1):
    """Implementation of SpAM by Ravikumar et al.

    Coordinate Descent using a LOESS to fit the residues

    Parameters
    ----------
    X: ndarray
        Data matrix of size n x p, where n = n_samples, p = n_features
    y: ndarray
        Target of fitting
    f_j_init: ndarray
        Initialization of the f_js, is set to 0 if not specified
        Specify it for a warm start
    loess_f: float
        LOESS algorithm parameter, larger f, smoother curve
    loess_iter: float
        number of iterations in LOESS algorithm

    Returns
    -------
    Z : [f(X_1), f(X_2), ..., f(X_n)]
    """
    X = np.asanyarray(X)
    y = np.asanyarray(y)

    n_samples, n_features = X.shape
    inv_sqrt_n_samples = 1. / np.sqrt(n_samples)

    # initialize some variables
    outer_iteration_counter = 0

    # if f_init is None, set it to 0
    if f_init is None:
        f = np.zeros([n_samples, n_features], dtype=X.dtype)
    else:
        f = f_init.copy()

    if pre_calculate_smoothing_matrix:
        import my_loess
        # smoothing_matrices = np.empty([n_features, n_samples, n_samples])
        if VERBOSE > 9:
            print "Precalculating smoothing matrices"
        make_smoothing_matrix = mem.cache(my_loess.make_smoothing_matrix)

        # for j in range(n_features):
        #     if VERBOSE > 99:
        #         print "Precalculating smoothing matrix for feature %d" % j
        #     smoothing_matrices[j] = my_loess.make_smoothing_matrix(X[:, j],
        #     kernel_params={"f_fraction": loess_f})
        smoothing_matrices = Parallel(n_jobs=smoothing_matrices_n_jobs)(
        delayed(make_smoothing_matrix)(
            X[:, j], kernel_params={"f_fraction": loess_f})
            for j in range(n_features))

    # calculate residue
    R = y - f.sum(axis=1)

    while outer_iteration_counter < max_iter:
        R_old = R.copy()
        outer_iteration_counter += 1
        if VERBOSE > 9:
            print "In outer iteration %d" % outer_iteration_counter

        for j in range(n_features):
            if VERBOSE > 99:
                print "Processing feature %d for the %d. time" %\
                    (j, outer_iteration_counter)

            # update residue in place by adding back feature j
            R += f[:, j]

            # fit a LOESS to this residue as a function of the feature
            if pre_calculate_smoothing_matrix:
                P_j = np.dot(smoothing_matrices[j], R)
            else:
                P_j = lowess(X[:, j], R, f=loess_f, iter=loess_iter)

            # estimate norm using the simple,
            # biased version from the paper (15)
            s_j = inv_sqrt_n_samples * np.sqrt((P_j ** 2).sum())

            # soft threshold
            soft_thresh = 1. - alpha / s_j
            if soft_thresh > 0:
                f_j = soft_thresh * P_j
                f_j -= f_j.mean()
                f[:, j] = f_j
                R[:] -= f_j
            else:
                f[:, j] = 0.


        max_inc = linalg.norm(R - R_old)
        if max_inc < tol / 2:
            break

        # .. dual gap (WIP)..
        if False: #max_inc < rtol * np.amax(w_new):
            group_norm = alpha * np.sum([linalg.norm(f[:, j], 2)
                                         for j in range(f.shape[1])])
#            norm_Anu = [linalg.norm(np.dot(H[g], residual))\
#                            for g in group_labels]
#            if np.any(norm_Anu > C):
#                nnu = residual * np.min(C / norm_Anu)
#            else:
#                nnu = residual
#            primal_obj =  .5 * np.dot(residual, residual) + group_norm
#            dual_obj   = -.5 * np.dot(nnu, nnu) - np.dot(nnu, y)
#            dual_gap = primal_obj - dual_obj
#            if verbose:
#                print 'Relative error: %s' % (dual_gap / dual_obj)
#            if np.abs(dual_gap / dual_obj) < rtol:
#                break

    return f


def plot_spam_polynomial():
    """using a group lasso"""
    rng = np.random.RandomState(42)
    n_samples = 200
    features = [
        lambda x: 2 * x * x,
        lambda x: 10. * x,
        lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
        lambda x: np.exp(x)
    ]
    n_true_features = len(features)
    n_fake_features = 16
    n_features = n_true_features + n_fake_features

    # X = 4 * (rng.rand(n_samples, n_features) - .5)
    X = rng.randn(n_samples, n_features)
    y = np.hstack([feature(
        X[:, (i + 1) * n_features / n_true_features - 1])[:, np.newaxis]
                   for i, feature in enumerate(features)]).sum(axis=1)

    X -= X.mean(axis=0)
    X /= X.std(axis=0)
    y -= y.mean()

    from group_lasso import group_lasso
    X1 = np.empty((X.shape[0], X.shape[1] * 3))
    groups = []
    for j in range(X.shape[1]):
        X1[:, 3 * j] = X[:, j]
        X1[:, 3 * j + 1] = X[:, j] ** 2
        X1[:, 3 * j + 2] = X[:, j] ** 3
        groups += [j] * 3

    w = group_lasso(X1, y, .8, groups)
    import pylab as pl
    pl.figure()
    for i in range(n_true_features):
        j = (i + 1) * n_features / n_true_features - 1
        pl.subplot(2, n_true_features, i + 1)
        pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
            features[i](X[:, (i + 1) * n_features / n_true_features - 1]),
            "rx")
        #pl.yticks([])
        pl.axis('tight')
        pl.subplot(2, n_true_features, i + 1 + n_true_features)
        x = X[:, (i + 1) * n_features / n_true_features - 1]
        pl.plot(x,
            w[3 * j] * x + w[3 * j + 1] * (x ** 2) + w[3 * j + 2] * (x**3), "b+")
        pl.axis('tight')
        #pl.yticks([])
        if i in (0, 3):
            pl.ylim(ymin=-.5)
        #        pl.xticks([])
        #        pl.yticks([])
    pl.show()


if __name__ == "__main__":
    # plot_spam_polynomial()

    rng = np.random.RandomState(42)
    n_samples = 100
    features = [
        lambda x: 2 * x * x,
        lambda x: 10. * x,
        lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
        lambda x: np.exp(x)
    ]
    n_true_features = len(features)
    n_fake_features = 16
    n_features = n_true_features + n_fake_features

    # X = 4 * (rng.rand(n_samples, n_features) - .5)
    X = rng.randn(n_samples, n_features)
    # y = np.hstack([feature(
    #     X[:, (i + 1) * n_features / n_true_features - 1])[:, np.newaxis]
    #                for i, feature in enumerate(features)]).sum(axis=1)

    non_univariate_features = [
        lambda x1, x2: np.exp((x1 + x2) / np.sqrt(2.))
    ]
    n_true_features = len(non_univariate_features)
    n_features = n_true_features + n_fake_features

    y = np.hstack([
        feature(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
            X[:, (i + 1) * n_features / (n_true_features + 1) - 1])[:, np.newaxis]
        for i, feature in enumerate(non_univariate_features)]).sum(axis=1)


    X -= X.mean(axis=0)
    X /= X.std(axis=0)
    y -= y.mean()
    f = spam_coord_descent(X, y, alpha=1., loess_f=.6)

    import pylab as pl
    pl.figure()
    # for i in range(n_true_features):
    #     pl.subplot(2, n_true_features, i + 1)
    #     pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
    #             features[i](X[:, (i + 1) * n_features / n_true_features - 1]),
    #         "rx")
    #     pl.subplot(2, n_true_features, i + 1 + n_true_features)
    #     pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
    #             f_r1[:, (i + 1) * n_features / n_true_features - 1], "b+")
    for i in range(n_true_features):
        pl.subplot(2, n_true_features * 3, 3 * i + 1)
        pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
                non_univariate_features[i](
                    X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
                    X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
                    ), "rx")
        pl.xlabel("x2")
        pl.ylabel("f(x1, x2)")

        pl.subplot(2, n_true_features * 3, 3 * i + 2)
        pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
                non_univariate_features[i](
                    X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
                    X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
                    ), "rx")
        pl.xlabel("x1")
        pl.ylabel("f(x1, x2)")

        pl.subplot(2, n_true_features * 3, 3 * i + 3)
        pl.plot(1./np.sqrt(2.) * (X[:, (i + 1) * n_features / (n_true_features + 1) - 2]
                                + X[:, (i + 1) * n_features / (n_true_features + 1) - 1]),
                non_univariate_features[i](
                    X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
                    X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
                    ), "rx")
        pl.xlabel("x1 + x2")
        pl.ylabel("f(x1, x2)")


        pl.subplot(2, n_true_features * 3, 3 * i + 1 + 3 * n_true_features)
        pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
                f[:, (i + 1) * n_features / (n_true_features + 1) - 1]
                +f[:, (i + 1) * n_features / (n_true_features + 1) - 2], "b+")
        pl.xlabel("x2")
        pl.ylabel("f_2(x2)")


        pl.subplot(2, n_true_features * 3, 3 * i + 2 + 3 * n_true_features)
        pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
                f[:, (i + 1) * n_features / (n_true_features + 1) - 2]
                +f[:, (i + 1) * n_features / (n_true_features + 1) - 1], "b+")
        pl.xlabel("x1")
        pl.ylabel("f_1(x1)")


        pl.subplot(2, n_true_features * 3, 3 * i + 3 + 3 * n_true_features)
        pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2]
                +X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
                f[:, (i + 1) * n_features / (n_true_features + 1) - 2]
                +f[:, (i + 1) * n_features / (n_true_features + 1) - 1], "b+")
        pl.xlabel("x1 + x2")
        pl.ylabel("f_1(x1) + f_2(x2)")

    pl.tight_layout()

    pl.figure()
    pl.imshow(np.abs(f), interpolation="nearest")
    # pl.colorbar()
    pl.hot()
    pl.show()
	"""
	This module implements the Lowess function for nonparametric regression.

	Functions:
	lowess Fit a smooth nonparametric regression curve to a scatterplot.

	For more information, see

	William S. Cleveland: "Robust locally weighted regression and smoothing
	scatterplots", Journal of the American Statistical Association, December 1979,
	volume 74, number 368, pp. 829-836.

	William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
	approach to regression analysis by local fitting", Journal of the American
	Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
	"""

	from math import ceil
	import numpy as np
	from scipy import linalg


	def lowess(x, y, f=2./3., iter=3):
	"""lowess(x, y, f=2./3., iter=3) -> yest

	Lowess smoother: Robust locally weighted regression.
	The lowess function fits a nonparametric regression curve to a scatterplot.
	The arrays x and y contain an equal number of elements; each pair
	(x[i], y[i]) defines a data point in the scatterplot. The function returns
	the estimated (smooth) values of y.

	The smoothing span is given by f. A larger value for f will result in a
	smoother curve. The number of robustifying iterations is given by iter. The
	function will run faster with a smaller number of iterations."""
	n = len(x)
	r = int(ceil(f*n))
	h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
	w = np.clip(np.abs((x[:,None] - x[None,:]) / h), 0.0, 1.0)
	w = (1 - w3)3
	yest = np.zeros(n)
	delta = np.ones(n)
	for iteration in range(iter):
	for i in range(n):
	weights = delta * w[:,i]
	b = np.array([np.sum(weightsy), np.sum(weightsy*x)])
	A = np.array([[np.sum(weights), np.sum(weights*x)],
	[np.sum(weightsx), np.sum(weightsx*x)]])
	beta = linalg.solve(A, b)
	yest[i] = beta[0] + beta[1]*x[i]

	residuals = y - yest
	s = np.median(np.abs(residuals))
	delta = np.clip(residuals / (6.0 * s), -1, 1)
	delta = (1 - delta2)2

	return yest

	if __name__ == '__main__':
	import math
	n = 100
	x = np.linspace(0, 2 * math.pi, n)
	y = np.sin(x) + 0.3*np.random.randn(n)

	f = 0.25
	yest = lowess(x, y, f=f, iter=3)

	import pylab as pl
	pl.clf()
	pl.plot(x, y, label='y noisy')
	pl.plot(x, yest, label='y pred')
	pl.legend()
	pl.show()
	"""
	Implements the Sparse additive models described in Ravikumar, Pradeep D., et al.
	"SpAM: Sparse Additive Models." NIPS. 2007.
	"""
	import numpy as np
	from scipy import linalg
	from loess import lowess
	from sklearn.externals.joblib import Parallel, delayed, Memory

	VERBOSE = 10


	def spam_coord_descent(X, y, alpha=1., max_iter=10000, tol=1e-8, f_init=None,
	loess_f=2. / 3., loess_iter=3,
	pre_calculate_smoothing_matrix=True,
	mem=Memory(cachedir=None),
	smoothing_matrices_n_jobs=-1):
	"""Implementation of SpAM by Ravikumar et al.

	Coordinate Descent using a LOESS to fit the residues

	Parameters
	----------
	X: ndarray
	Data matrix of size n x p, where n = n_samples, p = n_features
	y: ndarray
	Target of fitting
	f_j_init: ndarray
	Initialization of the f_js, is set to 0 if not specified
	Specify it for a warm start
	loess_f: float
	LOESS algorithm parameter, larger f, smoother curve
	loess_iter: float
	number of iterations in LOESS algorithm

	Returns
	-------
	Z : [f(X_1), f(X_2), ..., f(X_n)]
	"""
	X = np.asanyarray(X)
	y = np.asanyarray(y)

	n_samples, n_features = X.shape
	inv_sqrt_n_samples = 1. / np.sqrt(n_samples)

	# initialize some variables
	outer_iteration_counter = 0

	# if f_init is None, set it to 0
	if f_init is None:
	f = np.zeros([n_samples, n_features], dtype=X.dtype)
	else:
	f = f_init.copy()

	if pre_calculate_smoothing_matrix:
	import my_loess
	# smoothing_matrices = np.empty([n_features, n_samples, n_samples])
	if VERBOSE > 9:
	print "Precalculating smoothing matrices"
	make_smoothing_matrix = mem.cache(my_loess.make_smoothing_matrix)

	# for j in range(n_features):
	# if VERBOSE > 99:
	# print "Precalculating smoothing matrix for feature %d" % j
	# smoothing_matrices[j] = my_loess.make_smoothing_matrix(X[:, j],
	# kernel_params={"f_fraction": loess_f})
	smoothing_matrices = Parallel(n_jobs=smoothing_matrices_n_jobs)(
	delayed(make_smoothing_matrix)(
	X[:, j], kernel_params={"f_fraction": loess_f})
	for j in range(n_features))

	# calculate residue
	R = y - f.sum(axis=1)

	while outer_iteration_counter < max_iter:
	R_old = R.copy()
	outer_iteration_counter += 1
	if VERBOSE > 9:
	print "In outer iteration %d" % outer_iteration_counter

	for j in range(n_features):
	if VERBOSE > 99:
	print "Processing feature %d for the %d. time" %\
	(j, outer_iteration_counter)

	# update residue in place by adding back feature j
	R += f[:, j]

	# fit a LOESS to this residue as a function of the feature
	if pre_calculate_smoothing_matrix:
	P_j = np.dot(smoothing_matrices[j], R)
	else:
	P_j = lowess(X[:, j], R, f=loess_f, iter=loess_iter)

	# estimate norm using the simple,
	# biased version from the paper (15)
	s_j = inv_sqrt_n_samples * np.sqrt((P_j ** 2).sum())

	# soft threshold
	soft_thresh = 1. - alpha / s_j
	if soft_thresh > 0:
	f_j = soft_thresh * P_j
	f_j -= f_j.mean()
	f[:, j] = f_j
	R[:] -= f_j
	else:
	f[:, j] = 0.


	max_inc = linalg.norm(R - R_old)
	if max_inc < tol / 2:
	break

	# .. dual gap (WIP)..
	if False: #max_inc < rtol * np.amax(w_new):
	group_norm = alpha * np.sum([linalg.norm(f[:, j], 2)
	for j in range(f.shape[1])])
	# norm_Anu = [linalg.norm(np.dot(H[g], residual))\
	# for g in group_labels]
	# if np.any(norm_Anu > C):
	# nnu = residual * np.min(C / norm_Anu)
	# else:
	# nnu = residual
	# primal_obj = .5 * np.dot(residual, residual) + group_norm
	# dual_obj = -.5 * np.dot(nnu, nnu) - np.dot(nnu, y)
	# dual_gap = primal_obj - dual_obj
	# if verbose:
	# print 'Relative error: %s' % (dual_gap / dual_obj)
	# if np.abs(dual_gap / dual_obj) < rtol:
	# break

	return f


	def plot_spam_polynomial():
	"""using a group lasso"""
	rng = np.random.RandomState(42)
	n_samples = 200
	features = [
	lambda x: 2 * x * x,
	lambda x: 10. * x,
	lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
	lambda x: np.exp(x)
	]
	n_true_features = len(features)
	n_fake_features = 16
	n_features = n_true_features + n_fake_features

	# X = 4 * (rng.rand(n_samples, n_features) - .5)
	X = rng.randn(n_samples, n_features)
	y = np.hstack([feature(
	X[:, (i + 1) * n_features / n_true_features - 1])[:, np.newaxis]
	for i, feature in enumerate(features)]).sum(axis=1)

	X -= X.mean(axis=0)
	X /= X.std(axis=0)
	y -= y.mean()

	from group_lasso import group_lasso
	X1 = np.empty((X.shape[0], X.shape[1] * 3))
	groups = []
	for j in range(X.shape[1]):
	X1[:, 3 * j] = X[:, j]
	X1[:, 3 * j + 1] = X[:, j] ** 2
	X1[:, 3 * j + 2] = X[:, j] ** 3
	groups += [j] * 3

	w = group_lasso(X1, y, .8, groups)
	import pylab as pl
	pl.figure()
	for i in range(n_true_features):
	j = (i + 1) * n_features / n_true_features - 1
	pl.subplot(2, n_true_features, i + 1)
	pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
	features[i](X[:, (i + 1) * n_features / n_true_features - 1]),
	"rx")
	#pl.yticks([])
	pl.axis('tight')
	pl.subplot(2, n_true_features, i + 1 + n_true_features)
	x = X[:, (i + 1) * n_features / n_true_features - 1]
	pl.plot(x,
	w[3 * j] * x + w[3 * j + 1] * (x ** 2) + w[3 * j + 2] * (x**3), "b+")
	pl.axis('tight')
	#pl.yticks([])
	if i in (0, 3):
	pl.ylim(ymin=-.5)
	# pl.xticks([])
	# pl.yticks([])
	pl.show()


	if __name__ == "__main__":
	# plot_spam_polynomial()

	rng = np.random.RandomState(42)
	n_samples = 100
	features = [
	lambda x: 2 * x * x,
	lambda x: 10. * x,
	lambda x: 20. * (1. / (1 + np.exp(-(5. * x)))),
	lambda x: np.exp(x)
	]
	n_true_features = len(features)
	n_fake_features = 16
	n_features = n_true_features + n_fake_features

	# X = 4 * (rng.rand(n_samples, n_features) - .5)
	X = rng.randn(n_samples, n_features)
	# y = np.hstack([feature(
	# X[:, (i + 1) * n_features / n_true_features - 1])[:, np.newaxis]
	# for i, feature in enumerate(features)]).sum(axis=1)

	non_univariate_features = [
	lambda x1, x2: np.exp((x1 + x2) / np.sqrt(2.))
	]
	n_true_features = len(non_univariate_features)
	n_features = n_true_features + n_fake_features

	y = np.hstack([
	feature(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
	X[:, (i + 1) * n_features / (n_true_features + 1) - 1])[:, np.newaxis]
	for i, feature in enumerate(non_univariate_features)]).sum(axis=1)


	X -= X.mean(axis=0)
	X /= X.std(axis=0)
	y -= y.mean()
	f = spam_coord_descent(X, y, alpha=1., loess_f=.6)

	import pylab as pl
	pl.figure()
	# for i in range(n_true_features):
	# pl.subplot(2, n_true_features, i + 1)
	# pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
	# features[i](X[:, (i + 1) * n_features / n_true_features - 1]),
	# "rx")
	# pl.subplot(2, n_true_features, i + 1 + n_true_features)
	# pl.plot(X[:, (i + 1) * n_features / n_true_features - 1],
	# f_r1[:, (i + 1) * n_features / n_true_features - 1], "b+")
	for i in range(n_true_features):
	pl.subplot(2, n_true_features * 3, 3 * i + 1)
	pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
	non_univariate_features[i](
	X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
	X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
	), "rx")
	pl.xlabel("x2")
	pl.ylabel("f(x1, x2)")

	pl.subplot(2, n_true_features * 3, 3 * i + 2)
	pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
	non_univariate_features[i](
	X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
	X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
	), "rx")
	pl.xlabel("x1")
	pl.ylabel("f(x1, x2)")

	pl.subplot(2, n_true_features * 3, 3 * i + 3)
	pl.plot(1./np.sqrt(2.) * (X[:, (i + 1) * n_features / (n_true_features + 1) - 2]
	+ X[:, (i + 1) * n_features / (n_true_features + 1) - 1]),
	non_univariate_features[i](
	X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
	X[:, (i + 1) * n_features / (n_true_features + 1) - 1]
	), "rx")
	pl.xlabel("x1 + x2")
	pl.ylabel("f(x1, x2)")


	pl.subplot(2, n_true_features * 3, 3 * i + 1 + 3 * n_true_features)
	pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
	f[:, (i + 1) * n_features / (n_true_features + 1) - 1]
	+f[:, (i + 1) * n_features / (n_true_features + 1) - 2], "b+")
	pl.xlabel("x2")
	pl.ylabel("f_2(x2)")


	pl.subplot(2, n_true_features * 3, 3 * i + 2 + 3 * n_true_features)
	pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2],
	f[:, (i + 1) * n_features / (n_true_features + 1) - 2]
	+f[:, (i + 1) * n_features / (n_true_features + 1) - 1], "b+")
	pl.xlabel("x1")
	pl.ylabel("f_1(x1)")


	pl.subplot(2, n_true_features * 3, 3 * i + 3 + 3 * n_true_features)
	pl.plot(X[:, (i + 1) * n_features / (n_true_features + 1) - 2]
	+X[:, (i + 1) * n_features / (n_true_features + 1) - 1],
	f[:, (i + 1) * n_features / (n_true_features + 1) - 2]
	+f[:, (i + 1) * n_features / (n_true_features + 1) - 1], "b+")
	pl.xlabel("x1 + x2")
	pl.ylabel("f_1(x1) + f_2(x2)")

	pl.tight_layout()

	pl.figure()
	pl.imshow(np.abs(f), interpolation="nearest")
	# pl.colorbar()
	pl.hot()
	pl.show()