Jasata/RunningStatistics.py

## RunningStatistics.py
#! /usr/bin/env python3
#
#   A class to compute the mean, SAMPLE variance, and SAMPLE standard deviation
#   of a stream of data.
#
#   RunningStatistics.py - 2018, Jani Tammi <jasata@utu.fi>
#   0.1.0   Initial version.
#
#   Makes use of a method to calculate running variance
#   by B. P. Welford, 1962,
#   'Art of Computer Programming', Vol 2, page 232, 3rd edition.
#   see: https://www.johndcook.com/blog/standard_deviation/
#
#
#
#    Maintains following values:
#
#       .nsamples: int              Number of samples calculated.
#       .min: float                 Smallest sample value encountered.
#       .max: float                 Largest sample value encountered.
#       .mean: float                Mean of all samples.
#       .rolling_mean: float        Mean of X last samples.
#       .standard_deviation: float  Sample standard deviation.
#       .variance: float            Sample variation.
#       .range: float               Difference between min and max.
#
#   Constructor/init takes only one (optional) argument, the number
#   of samples to keep in a rolling buffer/list.
#
#   .reset() method has been provided, if reusage of an object is desired.
#
import math

class RunningStatistics:
    """Provides some basic statistics for running values."""

    def __init__(self, n_rolling_values=5):
        self._nrolling = n_rolling_values
        self._lrolling = []
        self.min        = None
        self.max        = None
        # for Welford's method
        self.nsamples   = 0
        self.mean       = 0.0
        self._svar      = 0.0


    @property
    def rolling_mean(self):
        # if len() vs try: .. except: are equally fast
        try:
            return sum(self._lrolling) / len(self._lrolling)
        except ZeroDivisionError:
            return 0.0


    @property
    def variance(self):
        return self._svar / (self.nsamples - 1) if self.nsamples > 1 else 0.0


    @property
    def standard_deviation(self):
        return math.sqrt(self.variance)


    @property
    def range(self):
        return self.max - self.min if self.min else 0.0


    def update(self, value: float):
        self.min = min(self.min, value) if self.min else value
        self.max = max(self.max, value) if self.max else value
        self._lrolling.append(value)
        if len(self._lrolling) > self._nrolling:
            self._lrolling.pop(0)
        self.nsamples += 1
        if self.nsamples == 1:
            self.mean   = value
            self._svar = 0.0
        else:
            old_mean = self.mean
            self.mean   = self.mean + (value - self.mean) / self.nsamples
            self._svar  = self._svar + (value - old_mean) * (value - self.mean)


    def reset(self):
        self.nsamples   = 0
        self.mean       = 0.0
        self._svar      = 0.0
        self._lrolling  = []


    def __str__(self):
        return  """{nsamples_str:.<{w}}: {nsamples_val: <{t}}\n{min_str:.<{w}}: {min_val: <{t}}\n{max_str:.<{w}}: {max_val: <{t}}\n{range_str:.<{w}}: {range_val: <{t}}\n{mean_str:.<{w}}: {mean_val: <{t}}\n{rolling_mean_str:.<{w}}: {rolling_mean_val: <{t}}\n{variance_str:.<{w}}: {variance_val: <{t}}\n{stddev_str:.<{w}}: {stddev_val: <{t}}""".format(
            w                   = 40,
            t                   = 40,
            nsamples_str        = "Number of samples",
            nsamples_val        = self.nsamples,
            min_str             = "Minimum",
            min_val             = self.min,
            max_str             = "Maximum",
            max_val             = self.max,
            range_str           = "Range",
            range_val           = self.range,
            mean_str            = "Mean",
            mean_val            = self.mean,
            rolling_mean_str    = "Rolling mean of {} last samples".format(self._nrolling),
            rolling_mean_val    = self.rolling_mean,
            variance_str        = "Variance",
            variance_val        = self.variance,
            stddev_str          = "Standard Deviation",
            stddev_val          = self.standard_deviation
        )


if __name__ == '__main__':

    import time

    data = [-86, -44, -141, -172, -112, -128, 10, 137, 118, 7, 188, 172, 170, 17, -101, -186, -105, 28, -121, -97, 4, -121, -52, -114, -134, -197, -118, -61, -139]

    o = RunningStatistics(6)
    # import random
    # start = time.time()
    # data = [random.uniform(-10000, 10000) for _ in range(0,20000)]
    # lapsed = time.time() - start
    # print("Data generation took", lapsed, "seconds")

    print("\u001b[7B")
    for val in data:
        print("\u001b[10AUpdating with value {v: <{w}}".format(v=val,w=30))
        o.update(val)
        print(str(o))
        input("Press ENTER...")


# EOF
	#! /usr/bin/env python3
	#
	# A class to compute the mean, SAMPLE variance, and SAMPLE standard deviation
	# of a stream of data.
	#
	# RunningStatistics.py - 2018, Jani Tammi <jasata@utu.fi>
	# 0.1.0 Initial version.
	#
	# Makes use of a method to calculate running variance
	# by B. P. Welford, 1962,
	# 'Art of Computer Programming', Vol 2, page 232, 3rd edition.
	# see: https://www.johndcook.com/blog/standard_deviation/
	#
	#
	#
	# Maintains following values:
	#
	# .nsamples: int Number of samples calculated.
	# .min: float Smallest sample value encountered.
	# .max: float Largest sample value encountered.
	# .mean: float Mean of all samples.
	# .rolling_mean: float Mean of X last samples.
	# .standard_deviation: float Sample standard deviation.
	# .variance: float Sample variation.
	# .range: float Difference between min and max.
	#
	# Constructor/init takes only one (optional) argument, the number
	# of samples to keep in a rolling buffer/list.
	#
	# .reset() method has been provided, if reusage of an object is desired.
	#
	import math

	class RunningStatistics:
	"""Provides some basic statistics for running values."""

	def __init__(self, n_rolling_values=5):
	self._nrolling = n_rolling_values
	self._lrolling = []
	self.min = None
	self.max = None
	# for Welford's method
	self.nsamples = 0
	self.mean = 0.0
	self._svar = 0.0


	@property
	def rolling_mean(self):
	# if len() vs try: .. except: are equally fast
	try:
	return sum(self._lrolling) / len(self._lrolling)
	except ZeroDivisionError:
	return 0.0


	@property
	def variance(self):
	return self._svar / (self.nsamples - 1) if self.nsamples > 1 else 0.0


	@property
	def standard_deviation(self):
	return math.sqrt(self.variance)


	@property
	def range(self):
	return self.max - self.min if self.min else 0.0


	def update(self, value: float):
	self.min = min(self.min, value) if self.min else value
	self.max = max(self.max, value) if self.max else value
	self._lrolling.append(value)
	if len(self._lrolling) > self._nrolling:
	self._lrolling.pop(0)
	self.nsamples += 1
	if self.nsamples == 1:
	self.mean = value
	self._svar = 0.0
	else:
	old_mean = self.mean
	self.mean = self.mean + (value - self.mean) / self.nsamples
	self._svar = self._svar + (value - old_mean) * (value - self.mean)


	def reset(self):
	self.nsamples = 0
	self.mean = 0.0
	self._svar = 0.0
	self._lrolling = []


	def __str__(self):
	return """{nsamples_str:.<{w}}: {nsamples_val: <{t}}\n{min_str:.<{w}}: {min_val: <{t}}\n{max_str:.<{w}}: {max_val: <{t}}\n{range_str:.<{w}}: {range_val: <{t}}\n{mean_str:.<{w}}: {mean_val: <{t}}\n{rolling_mean_str:.<{w}}: {rolling_mean_val: <{t}}\n{variance_str:.<{w}}: {variance_val: <{t}}\n{stddev_str:.<{w}}: {stddev_val: <{t}}""".format(
	w = 40,
	t = 40,
	nsamples_str = "Number of samples",
	nsamples_val = self.nsamples,
	min_str = "Minimum",
	min_val = self.min,
	max_str = "Maximum",
	max_val = self.max,
	range_str = "Range",
	range_val = self.range,
	mean_str = "Mean",
	mean_val = self.mean,
	rolling_mean_str = "Rolling mean of {} last samples".format(self._nrolling),
	rolling_mean_val = self.rolling_mean,
	variance_str = "Variance",
	variance_val = self.variance,
	stddev_str = "Standard Deviation",
	stddev_val = self.standard_deviation
	)


	if __name__ == '__main__':

	import time

	data = [-86, -44, -141, -172, -112, -128, 10, 137, 118, 7, 188, 172, 170, 17, -101, -186, -105, 28, -121, -97, 4, -121, -52, -114, -134, -197, -118, -61, -139]

	o = RunningStatistics(6)
	# import random
	# start = time.time()
	# data = [random.uniform(-10000, 10000) for _ in range(0,20000)]
	# lapsed = time.time() - start
	# print("Data generation took", lapsed, "seconds")

	print("\u001b[7B")
	for val in data:
	print("\u001b[10AUpdating with value {v: <{w}}".format(v=val,w=30))
	o.update(val)
	print(str(o))
	input("Press ENTER...")


	# EOF