Created
May 12, 2011 12:58
-
-
Save jkal/968441 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# poor man's text index | |
# | |
# a simple ruby implementation of the idea descibed in: | |
# http://playnice.ly/blog/2010/05/05/a-fast-fuzzy-full-text-index-using-redis/ | |
# | |
require 'rubygems' | |
require 'text' | |
require 'set' | |
require 'redis' | |
class Item | |
attr_accessor :id, :txt | |
def initialize(id, txt) | |
@id = id | |
@txt = txt | |
end | |
end | |
class FullTextIndex | |
def initialize | |
@stopwords = ["the", "of", "to", "and", "a", "in", "is", "it", "you", "that"] | |
@redis = Redis.new | |
end | |
def get_words_from_text(text) | |
words = text.split(/[^a-zA-Z]/) | |
return words - @stopwords | |
end | |
def get_metaphones(words) | |
metaphones = Set.new | |
words.each do |word| | |
m = Text::Metaphone.double_metaphone(word) | |
metaphones.add(m) | |
end | |
return metaphones | |
end | |
def index_item(item) | |
words = get_words_from_text(item.txt) | |
metaphones = get_metaphones(words) | |
metaphones.each do |metaphone| | |
_link_item_and_metaphone(item, metaphone) | |
end | |
end | |
def _link_item_and_metaphone(item, metaphone) | |
@redis.sadd("metaphone:#{metaphone}", item.id) | |
end | |
end | |
item1 = Item.new(1, "hello my name is john") | |
item2 = Item.new(2, "hello my name is mary") | |
myindex = FullTextIndex.new | |
myindex.index_item(item1) | |
myindex.index_item(item2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment