agramfort/lowess.py

## lowess.py
"""
This module implements the Lowess function for nonparametric regression.

Functions:
lowess Fit a smooth nonparametric regression curve to a scatterplot.

For more information, see

William S. Cleveland: "Robust locally weighted regression and smoothing
scatterplots", Journal of the American Statistical Association, December 1979,
volume 74, number 368, pp. 829-836.

William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
approach to regression analysis by local fitting", Journal of the American
Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
"""

# Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
#
# License: BSD (3-clause)

from math import ceil
import numpy as np
from scipy import linalg


def lowess(x, y, f=2. / 3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest

    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.
    """
    n = len(x)
    r = int(ceil(f * n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w ** 3) ** 3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iter):
        for i in range(n):
            weights = delta * w[:, i]
            b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
            A = np.array([[np.sum(weights), np.sum(weights * x)],
                          [np.sum(weights * x), np.sum(weights * x * x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1] * x[i]

        residuals = y - yest
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta ** 2) ** 2

    return yest

if __name__ == '__main__':
    import math
    n = 100
    x = np.linspace(0, 2 * math.pi, n)
    y = np.sin(x) + 0.3 * np.random.randn(n)

    f = 0.25
    yest = lowess(x, y, f=f, iter=3)

    import pylab as pl
    pl.clf()
    pl.plot(x, y, label='y noisy')
    pl.plot(x, yest, label='y pred')
    pl.legend()
    pl.show()
	"""
	This module implements the Lowess function for nonparametric regression.

	Functions:
	lowess Fit a smooth nonparametric regression curve to a scatterplot.

	For more information, see

	William S. Cleveland: "Robust locally weighted regression and smoothing
	scatterplots", Journal of the American Statistical Association, December 1979,
	volume 74, number 368, pp. 829-836.

	William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
	approach to regression analysis by local fitting", Journal of the American
	Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
	"""

	# Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
	#
	# License: BSD (3-clause)

	from math import ceil
	import numpy as np
	from scipy import linalg


	def lowess(x, y, f=2. / 3., iter=3):
	"""lowess(x, y, f=2./3., iter=3) -> yest

	Lowess smoother: Robust locally weighted regression.
	The lowess function fits a nonparametric regression curve to a scatterplot.
	The arrays x and y contain an equal number of elements; each pair
	(x[i], y[i]) defines a data point in the scatterplot. The function returns
	the estimated (smooth) values of y.

	The smoothing span is given by f. A larger value for f will result in a
	smoother curve. The number of robustifying iterations is given by iter. The
	function will run faster with a smaller number of iterations.
	"""
	n = len(x)
	r = int(ceil(f * n))
	h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
	w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
	w = (1 - w 3) 3
	yest = np.zeros(n)
	delta = np.ones(n)
	for iteration in range(iter):
	for i in range(n):
	weights = delta * w[:, i]
	b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
	A = np.array([[np.sum(weights), np.sum(weights * x)],
	[np.sum(weights * x), np.sum(weights * x * x)]])
	beta = linalg.solve(A, b)
	yest[i] = beta[0] + beta[1] * x[i]

	residuals = y - yest
	s = np.median(np.abs(residuals))
	delta = np.clip(residuals / (6.0 * s), -1, 1)
	delta = (1 - delta 2) 2

	return yest

	if __name__ == '__main__':
	import math
	n = 100
	x = np.linspace(0, 2 * math.pi, n)
	y = np.sin(x) + 0.3 * np.random.randn(n)

	f = 0.25
	yest = lowess(x, y, f=f, iter=3)

	import pylab as pl
	pl.clf()
	pl.plot(x, y, label='y noisy')
	pl.plot(x, yest, label='y pred')
	pl.legend()
	pl.show()