Created Sep 27, 2009
include Stemmable
class String
def to_tags
# lower case
# replace new lines, numbers, and puncuation with spaces
# break words on spaces
# get the word stem
# remove duplicates
# removed stems less than 3 letters
# remove common words (after they've been stemmed)
common_words = %w(and are but for from had have her his like not our she some than that the their them then there these they this via was were with you your)
self.downcase.gsub(/[^a-z \n]/, ' ')!{|s|s.stem}!{|s|s if (s.length > 2)}.compact -!{|s|s.stem}
