Created
November 17, 2016 15:48
-
-
Save loisgh/43e95481f017059703995652e3d2718e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#The output is as follows: | |
# word followed by "total: " with the total amount next to it | |
# 2:1 The sentence that the word appears in and the number of times it appears. | |
# 2:1 means that the word appears in the second sentence one time. | |
class Concordance | |
shash = Array.new | |
total_hash = Hash.new | |
def self.sent_totals(shash, key) | |
out = " " | |
total = 0 | |
shash.each_with_index do |sha,idx| | |
out << "#{idx+1}:#{sha[key]}," if sha.has_key? key | |
total += sha[key] | |
end | |
out_arr = [out.chomp(","), total] | |
end | |
in_text = "Given an arbitrary text document written in English, write a program that will generate a concordance, i.e. an alphabetical list of all word occurrences, labeled with word frequencies. Bonus: label each word with the sentence numbers in which each occurrence appeared." | |
sentence_list = in_text.downcase.split(/\. |\? |\! |\; /) | |
sentence_hash = sentence_list.each_with_index do |sentence,idx| | |
shash[idx] = sentence.split(" ").reduce(Hash.new(0)) { |h, w| h.update(w => h[w] + 1) } | |
total_hash = total_hash.merge shash[idx] | |
end | |
total_hash.sort.each do |k,v| | |
out = sent_totals(shash, k) | |
puts "#{k} total: #{out[1]} #{out[0]}" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment