Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
#slightly modified
#made it part of a module for your using pleasure
module SentenceParser
#finds the earliest sentence-ending punctuation in the given String
#i realize this isn't pretty
#finds the earliest sentence-ending punctuation in the given String
def find_partition_pos(line)
found = false
pos = nil #the position of the valid sentence ending punctuation
puncs = %w[. ! ?]
puncs.each do |punc|
temp = line.index(punc)
if temp != nil and temp < pos
pos = temp
found = true
pos = nil if not found
return pos
def read_file(filename)
lines = [] #an array to store the parsed sentences
if File.exist? filename
file =, "r")
line = ""
until file.eof do
line << " " << file.gets.chomp.strip #appends next line to the incomplete sentence before it
pos = find_partition_pos(line) #finds the position of the earliest sentence-ending punctuation
unless pos == nil #extract a complete sentence
tokens = line.partition(line[pos])
lines << (tokens[0] << tokens[1]) #fuse the punctuation back to the sentence first
line = tokens[2].lstrip #save the remaining line for the next pass
end #else, skip to the next line read
end #until
#get the stragglers. there may be sentences left over in 'line' even after file reading is done
done = false
until done do
pos = find_partition_pos(line)
unless pos == nil
if pos == line.size-1 #if the only valid punctuation appears at the end, there's only one sentence
lines << line
done = true
else #keep going
tokens = line.partition(line[pos])
lines << (tokens[0] << tokens[1])
line = tokens[2].lstrip
else #pos == nil because there's no sentence-ending punctuation left in 'line'
lines << line #prevents infinite loop if no punctuation in last line
done = true
end #until
puts lines #print a copy of the inputted text as a whole, split by sentences
puts "#{filename} is an invalid file."
return lines
end #read_file
end #SentenceParser
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.