Last active
August 29, 2015 14:04
-
-
Save sgrankin/9e238d1c753ad568acf3 to your computer and use it in GitHub Desktop.
Quantize inputs like dtrace does
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Re-bucket input histogram in linear or exponential buckets. | |
Input: | |
value\tcount | |
Output: | |
quantized_value\tcount | |
""" | |
import itertools | |
import sys | |
from collections import OrderedDict | |
parser = argparse.ArgumentParser(description='Quantize input data') | |
parser.add_argument('-t', '--type', choices=('q', 'l', 'll'), default='q', | |
help='type of quantization') | |
parser.add_argument('params', type=int, nargs='*', | |
help='type of quantization') | |
args, more_args = parser.parse_known_args() | |
if args.type == 'q': | |
def gen(): | |
for i in itertools.count(): | |
yield 2 ** i | |
elif args.type == 'l': | |
lower, upper, step = args.params | |
def gen(): | |
for i in xrange(lower, upper, step): | |
yield i | |
elif args.type == 'll': | |
factor, lower, upper, step = args.params | |
def gen(): | |
for i in xrange(lower, upper): | |
bottom = factor ** i | |
top = bottom * factor | |
lstep = max(top//step, 1) | |
for j in xrange(bottom, top, lstep): | |
yield j | |
buckets = OrderedDict() | |
for l in sys.stdin: | |
val, count = map(int, l.split('\t', 1)) | |
first = True | |
last = None | |
for bucket in gen(): | |
if last is None: | |
last = '<' + str(bucket) | |
buckets.setdefault(last, 0) | |
if val < bucket: | |
buckets[last] += count | |
break | |
last = bucket | |
else: | |
buckets.setdefault(last, 0) | |
buckets[last] += count | |
first = True | |
for k, v in buckets.iteritems(): | |
if first and not v: | |
continue | |
first = False | |
print '{}\t{}'.format(k, v) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
see quantize and lquantize in dtrace literature, and llquantize here