Skip to content

Instantly share code, notes, and snippets.

@stantonk
Last active November 4, 2017 14:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stantonk/977de66d2d9749a62623 to your computer and use it in GitHub Desktop.
Save stantonk/977de66d2d9749a62623 to your computer and use it in GitHub Desktop.
Get a histogram, mean, median, stddev, and percentiles from a pipe on the command line with numpy
#!/usr/bin/env python
"""
Note: requires numpy. `sudo pip install numpy`
Example:
$ echo -e "1\n2\n5\n10\n20\n" | get-stats
mean=7.6
median=5.0
std=6.94550214167
95th=18.0
99th=19.6
2 1.0 - 3.0
1 3.0 - 8.0
2 8.0 - 21.0
0 21.0 - 55.0
0 55.0 - 144.0
0 144.0 - 377.0
0 377.0 - 987.0
0 987.0 - 2584.0
0 2584.0 - 6765.0
0 6765.0 - 17711.0
0 17711.0 - 1000000.0
"""
import argparse
import re
import sys
from numpy import mean
from numpy import median
from numpy import std
from numpy import percentile
from numpy import histogram
stats = ('mean', 'median', 'std')
def csv_list(s):
try:
return [float(i) for i in s.split(',')]
except Exception:
raise argparse.ArgumentTypeError('')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='compute stats from newline separated stdin')
parser.add_argument('-b', '--bins', type=csv_list, default='1,5,10,20,40,80')
parser.add_argument('-p', '--percentiles', type=csv_list, default='50,95,99')
args = parser.parse_args()
vals = []
for l in sys.stdin:
try:
vals.append(float(l.strip(' \n')))
except ValueError as e:
pass
for stat in stats:
print '%s=%s' % (stat, vars()[stat](vals))
for pct in args.percentiles:
print '%sth=%s' % (pct, percentile(vals, pct))
# TODO make bins configurable on cmdline
hist, bin_edges = histogram(vals, bins=args.bins)
for i, (val, bn) in enumerate(zip(hist, bin_edges)):
print val, '\t', bn, '-', bin_edges[i+1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment