Skip to content

Instantly share code, notes, and snippets.

@paul-schwendenman
Last active December 24, 2015 02:29
Show Gist options
  • Save paul-schwendenman/6730797 to your computer and use it in GitHub Desktop.
Save paul-schwendenman/6730797 to your computer and use it in GitHub Desktop.
Basic numerical statistics for python lists
'''
Basic statistics on a list
The idea of this project is to find the best width for a column in a table
of data.
'''
class Stats():
def __init__(self, multiplier=1, percent_flag=False, tolerance=0):
self.mult = multiplier
self.percent_flag = percent_flag
self.tolerance = tolerance
def __call__(self, lst):
self.orig = lst
if not isinstance(lst, list):
return
if isinstance(lst[0], int):
pass
elif isinstance(lst[0], str):
lst = [len(item) for item in lst]
self.lst = lst
mean = lambda x: float(sum(x))/len(x)
var = lambda x: mean([(i-mean(x)) ** 2 for i in x])
stddev = lambda x: var(x) ** .5
self.min = min(lst)
self.max = max(lst)
self.mean = mean(lst)
self.stddev = stddev(lst)
self.filter_below = self.mean + self.mult * self.stddev
percent = lambda x,y: float(len(y)-len(x))/len(y)
self.excluded = filter(lambda x: x > self.filter_below, lst)
self.percentage = percent(self.excluded, self.lst)
if self.tolerance + self.filter_below > self.max:
self.width = self.max
else:
self.width = self.filter_below
return self.width
def prettyPrint(self):
print "Width:", self.width
print "Filter point:", self.filter_below
print "Mean:", self.mean
print "Std dev:", self.stddev
print "Percentage:", self.percentage
print "Min:", self.min
print "Max:", self.max
print "Diff:", self.max - self.filter_below
print "Excluded:", sorted(self.excluded)
print "Tolerance", self.tolerance
if __name__ == '__main__':
import sys
if len(sys.argv) == 1:
import random
import string
a = [''.join([random.choice(string.ascii_uppercase) for x in range(random.randrange(100))]) for i in range(30)]
print "No args: Randomly selecting 30 strings 1 to 100 characters in length"
print
else:
try:
a = [int(i) for i in sys.argv[1:]]
except ValueError:
a = sys.argv[1:]
s = Stats(multiplier=1.25, tolerance=5)
width = s(a)
s.prettyPrint()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment