public
Created

[05/13/13] Challenge #125 [Easy] Word Analytics

  • Download Gist
gistfile1.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
inputFileName = ARGV[0]
if inputFileName == nil
puts "no input given"
exit
end
words = Hash.new(0)
letters = Hash.new(0)
symbols = Hash.new(0)
pWords = Hash.new(0)
 
paragraphs = Array.new()
inputFile = File.new(inputFileName)
inputFile.each_line("\n\n") do |line|
line.downcase!.gsub!(/[^a-z|'s]/) { |x| ' ' + x + ' ' }
newP = []
line.split.each {|word| newP << word}
paragraphs << newP
 
end
 
word_sum = 0
letter_sum = 0
symbol_sum = 0
paragraphs.each do |p|
p.each do |w|
if /[a-z]/.match(w) != nil
words[w.to_sym] += 1
word_sum += 1
let_string = w.gsub(/\S/) { |x| x + ' ' }
let_string.split.each do |l|
if /[a-z]/.match(l) != nil
letters[l.to_sym] += 1
letter_sum += 1
else
symbols[l.to_sym] += 1
symbol_sum += 1
end
end
else
symbols[w.to_sym] += 1
symbol_sum += 1
end
end
pWords[p[0].to_sym] += 1
end
 
common_words = Array.new(3, ['',0])
words.each do |w, d|
if d > common_words[2][1]
common_words[2] = [w,d]
end
common_words.sort! {|x,y| y[1] <=> x[1]}
end
 
 
 
common_letters = Array.new(3, ['',0])
common_pWords = ['',0]
#if any nil do not print line
 
common_pWord = ['',0]
pWords.each do |w, d|
if d > common_pWord[1]
common_pWord = [w, d]
end
end
 
#Used once
onlyonce_words = 0
words.each do |w, d|
if d == 1
onlyonce_words += 1
end
end
 
#letters not used
notused_letters = Array.new()
('a'..'z').each {|l| notused_letters << l unless letters[l.to_sym] > 0}
 
# Output
puts "#{word_sum} words"
puts "#{letter_sum} letters"
puts "#{symbol_sum} symbols"
print "Top three most common words: #{common_words[0][0]}" unless common_words[0][1] == 0
print ", #{common_words[1][0]}" unless common_words[1][1] == 0
print ", #{common_words[2][0]}" unless common_words[2][1] == 0
puts '' unless common_words[0][1] == 0
print "Top three most common letters: #{common_letters[0][0]}" unless common_letters[0][1] == 0
print ", #{common_letters[1][0]}" unless common_letters[1][1] == 0
print ", #{common_letters[2][0]}" unless common_letters[2][1] == 0
puts '' unless common_letters[0][1] == 0
puts "Most common first word of a paragraph: #{common_pWord[0]}" unless common_pWord[1] == 0
puts "#{onlyonce_words} words used only once"
puts "Letters not used: #{notused_letters.join(', ')}" unless notused_letters.length == 0

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.