# Counts the number of files in each subdirectory of the given directory # and then outputs the count in a Tufte-esque style histogram, where the # number before the | is the 10's column, and each number after the | # represents one data point with that value. This lets you see the # spread to get a sense of the distribution. Each 10's value has two # rows: one for values 0-4, one for 5-9 # # 0| # 0|99 # 1|0001233333333344444444444 # 1|55555555555556666666677777777777777777778888888888888888888888899999 # 2|0000000000001111111111111111112222222222222222333333333334444444444 # 2|5555555555555555555666666667777788888888999 # 3|123 # 3| # counts = [] dir = ARGV[0] Dir.entries(dir).each do |entry| next if entry =~ /^\./ || !File.directory?("#{dir}/#{entry}") counts << Dir.entries("#{dir}/#{entry}").size - 2 end display = Hash.new { |h,k| h[k] = [ [], [] ] } counts.sort.each do |count| key = count / 10 value = count % 10 display[key][value < 5 ? 0 : 1] << value end display.keys.sort.each do |key| puts "#{key}|#{display[key][0].sort.join}" puts "#{key}|#{display[key][1].sort.join}" end