Skip to content

Instantly share code, notes, and snippets.

@rickhull
Created September 19, 2015 04:57
Show Gist options
  • Save rickhull/aa5a12916ed7267b7d9b to your computer and use it in GitHub Desktop.
Save rickhull/aa5a12916ed7267b7d9b to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
test_mode = false
BINS = {}
BINS[-1] = 0
[10, 100, 1000].each { |mult|
9.times { |i|
BINS[mult * (i+1)] = 0
}
}
BINS[10000] = 0
BINS[10001] = 0
# determine the correct bin for any value
def interval_key(val)
return 10001 if val > 10000 # too high
return -1 if val < 0 # too low
return 10 if val <= 10 # protect against value 0
flt, exp = scientize(val)
flt.ceil * 10**exp
end
# return the two numeric components of scientific notation
def scientize(val)
exp = Math.log10(val).floor
[val.to_f / 10 ** exp, exp]
end
# print a pseudo-histogram, with exact counts displayed
def print_histo(bins)
total = bins.values.inject(:+)
puts "Data points: #{total}"
puts "==========="
max_count = bins.values.max
count_width = max_count.to_s.length
# 80ch width minus 10ch for ID / percentage minus 2ch for spacing
hist_width = 68.0 - count_width
normal_factor = [bins.values.max / hist_width, 1.0].max
bins.each { |bin, count|
pctg = (count * 100 / total.to_f).round
hist_width = (count / normal_factor).ceil
puts ["#{bin.to_s.rjust(5, ' ')}:#{pctg.to_s.rjust(2, ' ')}%",
count.to_s.ljust(count_width, ' '),
'*' * hist_width].join(' ')
}
end
unless test_mode
$stdin.each_line { |num|
next if num == "null\n"
num.chomp!
# next unless num.match /\A[\d.]+\z/
key = interval_key(num.to_f)
# BINS[key] ||= 0
BINS[key] += 1
}
print_histo BINS
else
puts "running tests..."
tests = {
-5 => -1,
-1 => -1,
-0.00001 => -1,
0 => 10,
5 => 10,
10 => 10,
10.0 => 10,
10.01 => 20,
25 => 30,
99 => 100,
100 => 100,
101 => 200,
199 => 200,
200 => 200,
201 => 300,
999 => 1000,
1000 => 1000,
3645 => 4000,
9999 => 10000,
10000 => 10000,
463573745 => 10001,
}
tests.each { |val, expected|
actual = interval_key(val)
if actual != expected
raise "val: #{val}; expected: #{expected}; actual: #{actual}"
end
}
puts "SUCCESS"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment