Skip to content

Instantly share code, notes, and snippets.

@samaaron
Created December 15, 2009 16:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samaaron/257079 to your computer and use it in GitHub Desktop.
Save samaaron/257079 to your computer and use it in GitHub Desktop.
#!/usr/bin/ruby -wKU
require 'benchmark'
class WordCounter
def initialize(directory)
files_to_count = Dir["#{ARGV[0]}/**"].reject{|name| File.directory?(name)}
@counts = Hash.new(0)
count_words_in_files(files_to_count)
end
def output_alphabetical(outfile) ; output(outfile, sorted_alphabetically) ; end
def output_decreasing(outfile) ; output(outfile, sorted_by_count) ; end
def output(outfile, counts=@counts)
File.open(outfile, 'w') do |out|
counts.each { |pair| out << "#{pair[0]}\t#{pair[1]}\n" }
end
end
private
def sorted_alphabetically ; sort(0) ; end
def sorted_by_count ; sort(1) ; end
def count_words_in_files(filenames)
filenames.each do |name|
File.open(name, 'r').read.scan(/\w+/) { |word| @counts[word.downcase] += 1 }
end
end
def sort(sort_by_index)
@counts.sort { |a, b| b[sort_by_index] <=> a[sort_by_index] }
end
end
Benchmark.bm do |benchmark|
benchmark.report("alphabetical:") do
counter = WordCounter.new(ARGV[0])
counter.output_alphabetical("counts-alphabetical-ruby")
end
benchmark.report("decreasing: ") do
counter = WordCounter.new(ARGV[0])
counter.output_decreasing("counts-descreasing-ruby")
end
benchmark.report("total: ") do
counter = WordCounter.new(ARGV[0])
counter.output_alphabetical("counts-alphabetical-ruby")
counter.output_decreasing("counts-descreasing-ruby")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment