Skip to content

Instantly share code, notes, and snippets.

@Jasata
Created December 17, 2019 14:49
Show Gist options
  • Save Jasata/b72727a15749e906b9d4f0ba0596d9a5 to your computer and use it in GitHub Desktop.
Save Jasata/b72727a15749e906b9d4f0ba0596d9a5 to your computer and use it in GitHub Desktop.
Welford's method for running statistics
#! /usr/bin/env python3
#
# A class to compute the mean, SAMPLE variance, and SAMPLE standard deviation
# of a stream of data.
#
# RunningStatistics.py - 2018, Jani Tammi <jasata@utu.fi>
# 0.1.0 Initial version.
#
# Makes use of a method to calculate running variance
# by B. P. Welford, 1962,
# 'Art of Computer Programming', Vol 2, page 232, 3rd edition.
# see: https://www.johndcook.com/blog/standard_deviation/
#
#
#
# Maintains following values:
#
# .nsamples: int Number of samples calculated.
# .min: float Smallest sample value encountered.
# .max: float Largest sample value encountered.
# .mean: float Mean of all samples.
# .rolling_mean: float Mean of X last samples.
# .standard_deviation: float Sample standard deviation.
# .variance: float Sample variation.
# .range: float Difference between min and max.
#
# Constructor/init takes only one (optional) argument, the number
# of samples to keep in a rolling buffer/list.
#
# .reset() method has been provided, if reusage of an object is desired.
#
import math
class RunningStatistics:
"""Provides some basic statistics for running values."""
def __init__(self, n_rolling_values=5):
self._nrolling = n_rolling_values
self._lrolling = []
self.min = None
self.max = None
# for Welford's method
self.nsamples = 0
self.mean = 0.0
self._svar = 0.0
@property
def rolling_mean(self):
# if len() vs try: .. except: are equally fast
try:
return sum(self._lrolling) / len(self._lrolling)
except ZeroDivisionError:
return 0.0
@property
def variance(self):
return self._svar / (self.nsamples - 1) if self.nsamples > 1 else 0.0
@property
def standard_deviation(self):
return math.sqrt(self.variance)
@property
def range(self):
return self.max - self.min if self.min else 0.0
def update(self, value: float):
self.min = min(self.min, value) if self.min else value
self.max = max(self.max, value) if self.max else value
self._lrolling.append(value)
if len(self._lrolling) > self._nrolling:
self._lrolling.pop(0)
self.nsamples += 1
if self.nsamples == 1:
self.mean = value
self._svar = 0.0
else:
old_mean = self.mean
self.mean = self.mean + (value - self.mean) / self.nsamples
self._svar = self._svar + (value - old_mean) * (value - self.mean)
def reset(self):
self.nsamples = 0
self.mean = 0.0
self._svar = 0.0
self._lrolling = []
def __str__(self):
return """{nsamples_str:.<{w}}: {nsamples_val: <{t}}\n{min_str:.<{w}}: {min_val: <{t}}\n{max_str:.<{w}}: {max_val: <{t}}\n{range_str:.<{w}}: {range_val: <{t}}\n{mean_str:.<{w}}: {mean_val: <{t}}\n{rolling_mean_str:.<{w}}: {rolling_mean_val: <{t}}\n{variance_str:.<{w}}: {variance_val: <{t}}\n{stddev_str:.<{w}}: {stddev_val: <{t}}""".format(
w = 40,
t = 40,
nsamples_str = "Number of samples",
nsamples_val = self.nsamples,
min_str = "Minimum",
min_val = self.min,
max_str = "Maximum",
max_val = self.max,
range_str = "Range",
range_val = self.range,
mean_str = "Mean",
mean_val = self.mean,
rolling_mean_str = "Rolling mean of {} last samples".format(self._nrolling),
rolling_mean_val = self.rolling_mean,
variance_str = "Variance",
variance_val = self.variance,
stddev_str = "Standard Deviation",
stddev_val = self.standard_deviation
)
if __name__ == '__main__':
import time
data = [-86, -44, -141, -172, -112, -128, 10, 137, 118, 7, 188, 172, 170, 17, -101, -186, -105, 28, -121, -97, 4, -121, -52, -114, -134, -197, -118, -61, -139]
o = RunningStatistics(6)
# import random
# start = time.time()
# data = [random.uniform(-10000, 10000) for _ in range(0,20000)]
# lapsed = time.time() - start
# print("Data generation took", lapsed, "seconds")
print("\u001b[7B")
for val in data:
print("\u001b[10AUpdating with value {v: <{w}}".format(v=val,w=30))
o.update(val)
print(str(o))
input("Press ENTER...")
# EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment