Skip to content

Instantly share code, notes, and snippets.

@lnaia
Created August 18, 2015 12:11
Show Gist options
  • Save lnaia/c32ff26d0abffb5df035 to your computer and use it in GitHub Desktop.
Save lnaia/c32ff26d0abffb5df035 to your computer and use it in GitHub Desktop.
Display top 10 words, % and also a histogram distribution.
#!/usr/bin/ruby
#encoding: UTF-8
require 'terminal-table'
require 'histogram/array'
# Usage:
# echo "a e i o u" | word_histogram
top_words = 10
contents = ''
ARGF.each { |line| contents << line }
# == count occurrence of all words ==
words_histogram_arr = []
words_histogram = Hash.new(0)
words = contents.split(/\W+/)
words.each { |word| words_histogram[word] += 1 if word.length > 3 }
words_histogram.each { |word, count| words_histogram_arr << {:word => word, :count => count} }
sort_by_count = words_histogram_arr.sort { |a, b| a[:count] <=> b[:count] }.reverse
# == create table with top words ==
total_words = words.length
options = {:title => "Top #{top_words} Words", :headings => ['Word', 'Count', '%']}
table = Terminal::Table.new(options) do |t|
sort_by_count[0 .. top_words].each { |item|
percentage = (item[:count].to_i*100.0)/total_words.to_i
t.add_row [item[:word], item[:count], "#{sprintf('%.2f', percentage)}%"]
}
t.add_separator
t.add_row ['total words ', total_words, '100%']
end
puts table
# == create histogram ==
sort_by_length = words_histogram_arr.sort { |a, b| a[:word].length <=> b[:word].length }.reverse
max_num_bars = 100
total_words = words.length
options = {
:title => 'Word Histogram',
:headings => %w(count length percentage word),
:style => {:border_x => '', :border_i => '', :border_y => ''}
}
table = Terminal::Table.new(options) do |t|
sort_by_length.each { |item|
percentage = (item[:count].to_i*100.0)/total_words.to_i
length = sprintf('%.2f', (percentage * max_num_bars * 1.0 / 100))
next if length.to_i == 0
bars = Array.new(length.to_i, '#').join('')
t.add_row [
item[:count].to_s.rjust(2), #count
item[:word].length, # length
sprintf('%.2f', percentage), # percentage
item[:word],
bars
]
}
end
puts table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment