vene/siegel.py

## siegel.py
# Author: Vlad Niculae <vlad@vene.ro>
# License: 2-clause BSD

"""2D implementation of the robust Siegel Repeated Median slope estimator

This estimator tolerates corruption of up to 50% of the input points in either
the X or the Y dimension.

Vectorized implementation, and a naive implementation for sanity-check.
"""
import numpy as np


def siegel_slow(x, y):
    n_obs = len(x)
    slopes = []
    for i in range(n_obs):
        slopes_i = []
        for j in range(n_obs):
            if i == j:
                continue
            slopes_i.append((y[j] - y[i]) / (x[j] - x[i]))
        slopes.append(np.median(slopes_i))
    return np.median(slopes)


def siegel_fast(x, y):
    # slopes computation reused from scipy.stats.theilslopes
    x = np.asarray(x)
    y = np.asarray(y)
    deltax = x[:, np.newaxis] - x
    deltay = y[:, np.newaxis] - y
    olderr = np.seterr(all='ignore')
    try:
        slopes = deltay / deltax
    finally:
        np.seterr(**olderr)
    return np.median(np.nanmedian(slopes, axis=0))


if __name__ == '__main__':
    # no noise
    x = np.random.randn(10)
    y = 10 + 3 * x
    print siegel_slow(x, y)  # should be 3
    print siegel_fast(x, y)

    y_noise = y.copy()
    x_noise = x.copy()
    x_noise[0] *= 100.0
    y_noise[-1] *= -100.0
    print siegel_slow(x_noise, y_noise)  # should also be 3
    print siegel_fast(x_noise, y_noise)

    y_noise += 0.1 * np.random.randn(10)
    x_noise += 0.1 * np.random.randn(10)
    print siegel_slow(x_noise, y_noise)  # should also be close to 3
    print siegel_fast(x_noise, y_noise)
	# Author: Vlad Niculae <vlad@vene.ro>
	# License: 2-clause BSD

	"""2D implementation of the robust Siegel Repeated Median slope estimator

	This estimator tolerates corruption of up to 50% of the input points in either
	the X or the Y dimension.

	Vectorized implementation, and a naive implementation for sanity-check.
	"""
	import numpy as np


	def siegel_slow(x, y):
	n_obs = len(x)
	slopes = []
	for i in range(n_obs):
	slopes_i = []
	for j in range(n_obs):
	if i == j:
	continue
	slopes_i.append((y[j] - y[i]) / (x[j] - x[i]))
	slopes.append(np.median(slopes_i))
	return np.median(slopes)


	def siegel_fast(x, y):
	# slopes computation reused from scipy.stats.theilslopes
	x = np.asarray(x)
	y = np.asarray(y)
	deltax = x[:, np.newaxis] - x
	deltay = y[:, np.newaxis] - y
	olderr = np.seterr(all='ignore')
	try:
	slopes = deltay / deltax
	finally:
	np.seterr(**olderr)
	return np.median(np.nanmedian(slopes, axis=0))


	if __name__ == '__main__':
	# no noise
	x = np.random.randn(10)
	y = 10 + 3 * x
	print siegel_slow(x, y) # should be 3
	print siegel_fast(x, y)

	y_noise = y.copy()
	x_noise = x.copy()
	x_noise[0] *= 100.0
	y_noise[-1] *= -100.0
	print siegel_slow(x_noise, y_noise) # should also be 3
	print siegel_fast(x_noise, y_noise)

	y_noise += 0.1 * np.random.randn(10)
	x_noise += 0.1 * np.random.randn(10)
	print siegel_slow(x_noise, y_noise) # should also be close to 3
	print siegel_fast(x_noise, y_noise)