alessandrocucci/statistics.py

## statistics.py
"""
A very basic statistics module.

This is a summary of statistics module from Python 3 for Python 2.6+ development.

==================  =============================================
Function            Description
==================  =============================================
mean                Arithmetic mean (average) of data.
variance            Sample variance of data.
stdev               Sample standard deviation of data.
==================  =============================================

Calculate the arithmetic mean ("the average") of data:
>>> mean([-1.0, 2.5, 3.25, 5.75])
2.625

Calculate the standard deviation of sample data:
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75])
4.38961843444...

If you have previously calculated the mean, you can pass it as the optional
second argument to the four "spread" functions to avoid recalculating it:
>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
>>> mu = mean(data)
>>> variance(data, mu)
2.8571428571428572


Exceptions
----------
A single exception is defined: StatisticsError is a subclass of ValueError.

"""

from __future__ import division
import math


__all__ = ['StatisticsError', 'stdev', 'variance', 'mean']


# === Exceptions ===
class StatisticsError(ValueError):
    pass


# === Measures of central tendency (averages) ===
def mean(data):
    """
    Return the sample arithmetic mean of data.

    >>> mean([1, 2, 3, 4, 4])
    2.8

    If ``data`` is empty, StatisticsError will be raised.
    """
    if iter(data) is data:
        data = tuple(data)
    n = len(data)
    if n < 1:
        raise StatisticsError("Mean requires at least one data point")
    total = sum(data)
    return total / n


# === Measures of spread ===
def sum_of_squares(data, c=None):
    """
    Return sum of square deviations of sequence data.
    If ``c`` is None, the mean is calculated in one pass, and the deviations
    from the mean are calculated in a second pass.
    Otherwise, deviations are calculated from ``c`` as given.
    """
    if c is None:
        c = mean(data)
    return sum((x-c)**2 for x in data)


def variance(data, xbar=None):
    """
    Return the sample variance of data.
    data should be an iterable of Real-valued numbers, with at least two
    values.
    The optional argument xbar, if given, should be the mean of
    the data. If it is missing or None, the mean is automatically calculated.

    Use this function when your data is a sample from a population.

    Examples:
    >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
    >>> variance(data)
    1.3720238095238095

    If you have already calculated the mean of your data, you can pass it as
    the optional second argument ``xbar`` to avoid recalculating it:
    >>> m = mean(data)
    >>> variance(data, m)
    1.3720238095238095

    This function does not check that ``xbar`` is actually the mean of
    ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
    impossible results.

    """
    if iter(data) is data:
        data = tuple(data)
    n = len(data)
    if n < 2:
        raise StatisticsError("Variance requires at least two data points")
    ss = sum_of_squares(data, xbar)
    return ss / (n - 1)


def stdev(data, xbar=None):
    """
    Return the square root of the sample variance.
    See ``variance`` for arguments and other details.

    >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
    1.0810874155219827
    """
    var = variance(data, xbar)
    return math.sqrt(var)
	"""
	A very basic statistics module.

	This is a summary of statistics module from Python 3 for Python 2.6+ development.

	================== =============================================
	Function Description
	================== =============================================
	mean Arithmetic mean (average) of data.
	variance Sample variance of data.
	stdev Sample standard deviation of data.
	================== =============================================

	Calculate the arithmetic mean ("the average") of data:
	>>> mean([-1.0, 2.5, 3.25, 5.75])
	2.625

	Calculate the standard deviation of sample data:
	>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75])
	4.38961843444...

	If you have previously calculated the mean, you can pass it as the optional
	second argument to the four "spread" functions to avoid recalculating it:
	>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
	>>> mu = mean(data)
	>>> variance(data, mu)
	2.8571428571428572


	Exceptions
	----------
	A single exception is defined: StatisticsError is a subclass of ValueError.

	"""

	from __future__ import division
	import math


	__all__ = ['StatisticsError', 'stdev', 'variance', 'mean']


	# === Exceptions ===
	class StatisticsError(ValueError):
	pass


	# === Measures of central tendency (averages) ===
	def mean(data):
	"""
	Return the sample arithmetic mean of data.

	>>> mean([1, 2, 3, 4, 4])
	2.8

	If ``data`` is empty, StatisticsError will be raised.
	"""
	if iter(data) is data:
	data = tuple(data)
	n = len(data)
	if n < 1:
	raise StatisticsError("Mean requires at least one data point")
	total = sum(data)
	return total / n


	# === Measures of spread ===
	def sum_of_squares(data, c=None):
	"""
	Return sum of square deviations of sequence data.
	If ``c`` is None, the mean is calculated in one pass, and the deviations
	from the mean are calculated in a second pass.
	Otherwise, deviations are calculated from ``c`` as given.
	"""
	if c is None:
	c = mean(data)
	return sum((x-c)**2 for x in data)


	def variance(data, xbar=None):
	"""
	Return the sample variance of data.
	data should be an iterable of Real-valued numbers, with at least two
	values.
	The optional argument xbar, if given, should be the mean of
	the data. If it is missing or None, the mean is automatically calculated.

	Use this function when your data is a sample from a population.

	Examples:
	>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
	>>> variance(data)
	1.3720238095238095

	If you have already calculated the mean of your data, you can pass it as
	the optional second argument ``xbar`` to avoid recalculating it:
	>>> m = mean(data)
	>>> variance(data, m)
	1.3720238095238095

	This function does not check that ``xbar`` is actually the mean of
	``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
	impossible results.

	"""
	if iter(data) is data:
	data = tuple(data)
	n = len(data)
	if n < 2:
	raise StatisticsError("Variance requires at least two data points")
	ss = sum_of_squares(data, xbar)
	return ss / (n - 1)


	def stdev(data, xbar=None):
	"""
	Return the square root of the sample variance.
	See ``variance`` for arguments and other details.

	>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
	1.0810874155219827
	"""
	var = variance(data, xbar)
	return math.sqrt(var)