Skip to content

Instantly share code, notes, and snippets.

@sgrankin
Last active August 29, 2015 14:04
Show Gist options
  • Save sgrankin/9e238d1c753ad568acf3 to your computer and use it in GitHub Desktop.
Save sgrankin/9e238d1c753ad568acf3 to your computer and use it in GitHub Desktop.
Quantize inputs like dtrace does
"""
Re-bucket input histogram in linear or exponential buckets.
Input:
value\tcount
Output:
quantized_value\tcount
"""
import itertools
import sys
from collections import OrderedDict
parser = argparse.ArgumentParser(description='Quantize input data')
parser.add_argument('-t', '--type', choices=('q', 'l', 'll'), default='q',
help='type of quantization')
parser.add_argument('params', type=int, nargs='*',
help='type of quantization')
args, more_args = parser.parse_known_args()
if args.type == 'q':
def gen():
for i in itertools.count():
yield 2 ** i
elif args.type == 'l':
lower, upper, step = args.params
def gen():
for i in xrange(lower, upper, step):
yield i
elif args.type == 'll':
factor, lower, upper, step = args.params
def gen():
for i in xrange(lower, upper):
bottom = factor ** i
top = bottom * factor
lstep = max(top//step, 1)
for j in xrange(bottom, top, lstep):
yield j
buckets = OrderedDict()
for l in sys.stdin:
val, count = map(int, l.split('\t', 1))
first = True
last = None
for bucket in gen():
if last is None:
last = '<' + str(bucket)
buckets.setdefault(last, 0)
if val < bucket:
buckets[last] += count
break
last = bucket
else:
buckets.setdefault(last, 0)
buckets[last] += count
first = True
for k, v in buckets.iteritems():
if first and not v:
continue
first = False
print '{}\t{}'.format(k, v)
@sgrankin
Copy link
Author

see quantize and lquantize in dtrace literature, and llquantize here

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment