josef-pkt/vectorized_percentile.py

## vectorized_percentile.py
import numpy as np
from numpy import asarray, add, rollaxis, sort, arange

def percentile(a, q, limit=None, interpolation='linear', axis=None,
               out=None, overwrite_input=False):
    """
    Compute the qth percentile of the data along the specified axis.

    Returns the qth percentile of the array elements.

    Parameters
    ----------
    a : array_like
        Input array or object that can be converted to an array.
    q : array_like in the range of [0,100]
        Percentile to compute which must be between 0 and 100 inclusive. If
        `q` is an array, its dimensions are added at the start of the result.
    limit : tuple, optional
        Tuple of two scalars, the lower and upper limits within which to
        compute the percentile.  Values outside of this range are ommitted from
        the percentile calculation. None includes all values in calculation.
    interpolation : {'linear', 'lower', 'higher', 'midpoint'}, optional
        This optional parameter specifies the interpolation method to use,
        when the desired quantile lies between two data points `i` and `j`:

          * linear: `i + (j - i) * fraction`, where `fraction` is the
            fractional part of the index surrounded by `i` and `j`.
          * lower: `i`.
          * higher: `j`.
    axis : int, optional
        Axis along which the percentiles are computed. The default (None)
        is to compute the median along a flattened version of the array.
    out : ndarray, optional
        Alternative output array in which to place the result. It must
        have the same shape and buffer length as the expected output,
        but the type (of the output) will be cast if necessary.
    overwrite_input : bool, optional
       If True, then allow use of memory of input array `a` for
       calculations. The input array will be modified by the call to
       median. This will save memory when you do not need to preserve
       the contents of the input array. Treat the input as undefined,
       but it will probably be fully or partially sorted.
       Default is False. Note that, if `overwrite_input` is True and the
       input is not already an array, an error will be raised.

    Returns
    -------
    percentile : ndarray
        A new array holding the result (unless `out` is specified, in
        which case that array is returned instead).  If the input contains
        integers, or floats of smaller precision than 64, then the output
        data-type is float64.  Otherwise, the output data-type is the same
        as that of the input.

    See Also
    --------
    mean, median

    Notes
    -----
    Given a vector V of length N, the qth percentile of V is the qth ranked
    value in a sorted copy of V.  A weighted average of the two nearest
    neighbors is used if the normalized ranking does not match q exactly.
    The same as the median if ``q=50``, the same as the minimum if ``q=0``
    and the same as the maximum if ``q=100``.

    Examples
    --------
    >>> a = np.array([[10, 7, 4], [3, 2, 1]])
    >>> a
    array([[10,  7,  4],
           [ 3,  2,  1]])
    >>> np.percentile(a, 50)
    3.5
    >>> np.percentile(a, 0.5, axis=0)
    array([ 6.5,  4.5,  2.5])
    >>> np.percentile(a, 50, axis=1)
    array([ 7.,  2.])

    >>> m = np.percentile(a, 50, axis=0)
    >>> out = np.zeros_like(m)
    >>> np.percentile(a, 50, axis=0, out=m)
    array([ 6.5,  4.5,  2.5])
    >>> m
    array([ 6.5,  4.5,  2.5])

    >>> b = a.copy()
    >>> np.percentile(b, 50, axis=1, overwrite_input=True)
    array([ 7.,  2.])
    >>> assert not np.all(a==b)
    >>> b = a.copy()
    >>> np.percentile(b, 50, axis=None, overwrite_input=True)
    3.5

    """
    a = asarray(a)

    if limit:
        a = a[(limit[0] <= a) & (a <= limit[1])]

    if overwrite_input:
        if axis is None:
            sorted = a.ravel()
            sorted.sort()
        else:
            a.sort(axis=axis)
            sorted = a
    else:
        sorted = sort(a, axis=axis)
    if axis is None:
        axis = 0

    # The new axes should be added at the front:
    sorted = rollaxis(sorted, axis, 0)

    q = asarray(q)
    q = q.reshape(q.shape + (1,))
    q = q / 100.0
    if (q < 0).any() or (q > 1).any():
        raise ValueError("percentile must be either in the range [0,100]")

    Nx = sorted.shape[0]
    index = q * (Nx - 1)

    # round fractional indices according to interpolation method
    if interpolation == 'lower':
        index = np.floor(index).astype(np.intp)
    elif interpolation == 'higher':
        index = np.ceil(index).astype(np.intp)
    elif interpolation == 'linear':
        pass  # keep index as fraction and interpolate
    else:
        raise ValueError("interpolation can only be 'linear', 'lower' "
                         "or 'higher'")

    if index.dtype == np.intp:
        i = index
        indexer = (i, Ellipsis)
        weights = array(1)
        sumval = 1.0
    else:
        i = index.astype(np.intp) + arange(2)
        indexer = (i, Ellipsis)
        weights = index - i[...,::-1]
        weights[..., 0] *= -1
        weights.shape = weights.shape + (1,) * (sorted.ndim - 1)
        sumval = weights.sum(i.ndim-1) # numerical accuracy reasons?

    # Use add.reduce in both cases to coerce data type as well as
    #   check and use out array.
    res = add.reduce(sorted[indexer] * weights, axis=i.ndim-1, out=out)
    res /= sumval
    return res
	import numpy as np
	from numpy import asarray, add, rollaxis, sort, arange

	def percentile(a, q, limit=None, interpolation='linear', axis=None,
	out=None, overwrite_input=False):
	"""
	Compute the qth percentile of the data along the specified axis.

	Returns the qth percentile of the array elements.

	Parameters
	----------
	a : array_like
	Input array or object that can be converted to an array.
	q : array_like in the range of [0,100]
	Percentile to compute which must be between 0 and 100 inclusive. If
	`q` is an array, its dimensions are added at the start of the result.
	limit : tuple, optional
	Tuple of two scalars, the lower and upper limits within which to
	compute the percentile. Values outside of this range are ommitted from
	the percentile calculation. None includes all values in calculation.
	interpolation : {'linear', 'lower', 'higher', 'midpoint'}, optional
	This optional parameter specifies the interpolation method to use,
	when the desired quantile lies between two data points `i` and `j`:

	* linear: `i + (j - i) * fraction`, where `fraction` is the
	fractional part of the index surrounded by `i` and `j`.
	* lower: `i`.
	* higher: `j`.
	axis : int, optional
	Axis along which the percentiles are computed. The default (None)
	is to compute the median along a flattened version of the array.
	out : ndarray, optional
	Alternative output array in which to place the result. It must
	have the same shape and buffer length as the expected output,
	but the type (of the output) will be cast if necessary.
	overwrite_input : bool, optional
	If True, then allow use of memory of input array `a` for
	calculations. The input array will be modified by the call to
	median. This will save memory when you do not need to preserve
	the contents of the input array. Treat the input as undefined,
	but it will probably be fully or partially sorted.
	Default is False. Note that, if `overwrite_input` is True and the
	input is not already an array, an error will be raised.

	Returns
	-------
	percentile : ndarray
	A new array holding the result (unless `out` is specified, in
	which case that array is returned instead). If the input contains
	integers, or floats of smaller precision than 64, then the output
	data-type is float64. Otherwise, the output data-type is the same
	as that of the input.

	See Also
	--------
	mean, median

	Notes
	-----
	Given a vector V of length N, the qth percentile of V is the qth ranked
	value in a sorted copy of V. A weighted average of the two nearest
	neighbors is used if the normalized ranking does not match q exactly.
	The same as the median if ``q=50``, the same as the minimum if ``q=0``
	and the same as the maximum if ``q=100``.

	Examples
	--------
	>>> a = np.array([[10, 7, 4], [3, 2, 1]])
	>>> a
	array([[10, 7, 4],
	[ 3, 2, 1]])
	>>> np.percentile(a, 50)
	3.5
	>>> np.percentile(a, 0.5, axis=0)
	array([ 6.5, 4.5, 2.5])
	>>> np.percentile(a, 50, axis=1)
	array([ 7., 2.])

	>>> m = np.percentile(a, 50, axis=0)
	>>> out = np.zeros_like(m)
	>>> np.percentile(a, 50, axis=0, out=m)
	array([ 6.5, 4.5, 2.5])
	>>> m
	array([ 6.5, 4.5, 2.5])

	>>> b = a.copy()
	>>> np.percentile(b, 50, axis=1, overwrite_input=True)
	array([ 7., 2.])
	>>> assert not np.all(a==b)
	>>> b = a.copy()
	>>> np.percentile(b, 50, axis=None, overwrite_input=True)
	3.5

	"""
	a = asarray(a)

	if limit:
	a = a[(limit[0] <= a) & (a <= limit[1])]

	if overwrite_input:
	if axis is None:
	sorted = a.ravel()
	sorted.sort()
	else:
	a.sort(axis=axis)
	sorted = a
	else:
	sorted = sort(a, axis=axis)
	if axis is None:
	axis = 0

	# The new axes should be added at the front:
	sorted = rollaxis(sorted, axis, 0)

	q = asarray(q)
	q = q.reshape(q.shape + (1,))
	q = q / 100.0
	if (q < 0).any() or (q > 1).any():
	raise ValueError("percentile must be either in the range [0,100]")

	Nx = sorted.shape[0]
	index = q * (Nx - 1)

	# round fractional indices according to interpolation method
	if interpolation == 'lower':
	index = np.floor(index).astype(np.intp)
	elif interpolation == 'higher':
	index = np.ceil(index).astype(np.intp)
	elif interpolation == 'linear':
	pass # keep index as fraction and interpolate
	else:
	raise ValueError("interpolation can only be 'linear', 'lower' "
	"or 'higher'")

	if index.dtype == np.intp:
	i = index
	indexer = (i, Ellipsis)
	weights = array(1)
	sumval = 1.0
	else:
	i = index.astype(np.intp) + arange(2)
	indexer = (i, Ellipsis)
	weights = index - i[...,::-1]
	weights[..., 0] *= -1
	weights.shape = weights.shape + (1,) * (sorted.ndim - 1)
	sumval = weights.sum(i.ndim-1) # numerical accuracy reasons?

	# Use add.reduce in both cases to coerce data type as well as
	# check and use out array.
	res = add.reduce(sorted[indexer] * weights, axis=i.ndim-1, out=out)
	res /= sumval
	return res