Skip to content

Instantly share code, notes, and snippets.

@sukhchander
Created May 23, 2016 19:38
Show Gist options
  • Save sukhchander/ef62e2f29f9e99791b80a8a77ebb738f to your computer and use it in GitHub Desktop.
Save sukhchander/ef62e2f29f9e99791b80a8a77ebb738f to your computer and use it in GitHub Desktop.
concordance
TEXT = 'Given an arbitrary text document written in English, write a program
that will generate a concordance, i.e. an alphabetical list of all word
occurrences, labeled with word frequencies. Bonus: label each word with the
sentence numbers in which each occurrence appeared.'
def concordance1(string)
result = {}
string.split(".").each_with_index do |sentence, index|
sentence.gsub(",", "").split(" ").each do |word|
word = word.downcase
result[word] ||= [0, []]
result[word][0] += 1
result[word][1] << index + 1
end
end
result
end
def concordance2(string)
result = {}
string.gsub(/(i)\.(e)\./, 'i{dot}e{dot}').split('.').each_with_index do |sentence, index|
sentence.gsub(',','').split(' ').each do |word|
word = word.gsub(/{dot}/, '.').downcase
result[word] ||= [0, []]
result[word][0] += 1
result[word][1] << index + 1
end
end
result
end
def concordance3(string)
result = {}
string.gsub(/[.,\/#!$%,;]/, '').split('.').each_with_index do |sentence, index|
sentence.split(' ').each do |word|
word = word.downcase
result[word] ||= [0, []]
result[word][0] += 1
result[word][1] << index + 1
end
end
result
end
def format(collection)
result = ''
collection.sort_by { |k, _| k }.each do |pair|
result << "#{pair[0]} {#{pair[1][0]}:#{pair[1][1].join(',')}}\n" if pair
end
result
end
puts
result = concordance1(TEXT)
puts format(result)
=begin
a {2:1,1}
all {1:3}
alphabetical {1:3}
an {2:1,3}
appeared {1:4}
arbitrary {1:1}
bonus: {1:4}
concordance {1:1}
document {1:1}
e {1:2}
each {2:4,4}
english {1:1}
frequencies {1:3}
generate {1:1}
given {1:1}
i {1:1}
in {2:1,4}
label {1:4}
labeled {1:3}
list {1:3}
numbers {1:4}
occurrence {1:4}
occurrences {1:3}
of {1:3}
program {1:1}
sentence {1:4}
text {1:1}
that {1:1}
the {1:4}
which {1:4}
will {1:1}
with {2:3,4}
word {3:3,3,4}
write {1:1}
written {1:1}
=end
puts
result = concordance2(TEXT)
puts format(result)
=begin
a {2:1,1}
all {1:1}
alphabetical {1:1}
an {2:1,1}
appeared {1:2}
arbitrary {1:1}
bonus: {1:2}
concordance {1:1}
document {1:1}
each {2:2,2}
english {1:1}
frequencies {1:1}
generate {1:1}
given {1:1}
i.e. {1:1}
in {2:1,2}
label {1:2}
labeled {1:1}
list {1:1}
numbers {1:2}
occurrence {1:2}
occurrences {1:1}
of {1:1}
program {1:1}
sentence {1:2}
text {1:1}
that {1:1}
the {1:2}
which {1:2}
will {1:1}
with {2:1,2}
word {3:1,1,2}
write {1:1}
written {1:1}
=end
puts
result = concordance3(TEXT)
puts format(result)
=begin
a {2:1,1}
all {1:1}
alphabetical {1:1}
an {2:1,1}
appeared {1:1}
arbitrary {1:1}
bonus: {1:1}
concordance {1:1}
document {1:1}
each {2:1,1}
english {1:1}
frequencies {1:1}
generate {1:1}
given {1:1}
ie {1:1}
in {2:1,1}
label {1:1}
labeled {1:1}
list {1:1}
numbers {1:1}
occurrence {1:1}
occurrences {1:1}
of {1:1}
program {1:1}
sentence {1:1}
text {1:1}
that {1:1}
the {1:1}
which {1:1}
will {1:1}
with {2:1,1}
word {3:1,1,1}
write {1:1}
written {1:1}
=end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment