Created
September 19, 2015 04:57
-
-
Save rickhull/aa5a12916ed7267b7d9b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
test_mode = false | |
BINS = {} | |
BINS[-1] = 0 | |
[10, 100, 1000].each { |mult| | |
9.times { |i| | |
BINS[mult * (i+1)] = 0 | |
} | |
} | |
BINS[10000] = 0 | |
BINS[10001] = 0 | |
# determine the correct bin for any value | |
def interval_key(val) | |
return 10001 if val > 10000 # too high | |
return -1 if val < 0 # too low | |
return 10 if val <= 10 # protect against value 0 | |
flt, exp = scientize(val) | |
flt.ceil * 10**exp | |
end | |
# return the two numeric components of scientific notation | |
def scientize(val) | |
exp = Math.log10(val).floor | |
[val.to_f / 10 ** exp, exp] | |
end | |
# print a pseudo-histogram, with exact counts displayed | |
def print_histo(bins) | |
total = bins.values.inject(:+) | |
puts "Data points: #{total}" | |
puts "===========" | |
max_count = bins.values.max | |
count_width = max_count.to_s.length | |
# 80ch width minus 10ch for ID / percentage minus 2ch for spacing | |
hist_width = 68.0 - count_width | |
normal_factor = [bins.values.max / hist_width, 1.0].max | |
bins.each { |bin, count| | |
pctg = (count * 100 / total.to_f).round | |
hist_width = (count / normal_factor).ceil | |
puts ["#{bin.to_s.rjust(5, ' ')}:#{pctg.to_s.rjust(2, ' ')}%", | |
count.to_s.ljust(count_width, ' '), | |
'*' * hist_width].join(' ') | |
} | |
end | |
unless test_mode | |
$stdin.each_line { |num| | |
next if num == "null\n" | |
num.chomp! | |
# next unless num.match /\A[\d.]+\z/ | |
key = interval_key(num.to_f) | |
# BINS[key] ||= 0 | |
BINS[key] += 1 | |
} | |
print_histo BINS | |
else | |
puts "running tests..." | |
tests = { | |
-5 => -1, | |
-1 => -1, | |
-0.00001 => -1, | |
0 => 10, | |
5 => 10, | |
10 => 10, | |
10.0 => 10, | |
10.01 => 20, | |
25 => 30, | |
99 => 100, | |
100 => 100, | |
101 => 200, | |
199 => 200, | |
200 => 200, | |
201 => 300, | |
999 => 1000, | |
1000 => 1000, | |
3645 => 4000, | |
9999 => 10000, | |
10000 => 10000, | |
463573745 => 10001, | |
} | |
tests.each { |val, expected| | |
actual = interval_key(val) | |
if actual != expected | |
raise "val: #{val}; expected: #{expected}; actual: #{actual}" | |
end | |
} | |
puts "SUCCESS" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment