public
Last active

A simple Trie to find words that match a simple pattern

  • Download Gist
trie.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
class TrieNode
 
def initialize
@sub_tries = {}
@end_of_word = false
end
 
def insert(input)
to_insert = input
 
# Convert input to array of chars if it's a string
to_insert = to_insert.split(//) if to_insert.is_a?(String)
 
# If to_insert is empty, we have reached the end of the word and add a
# end-of-word marker. Otherwise we would get partial words back when we're
# looking for a shorter word, for example "coverage", "cover?" => "covera"
if to_insert.empty?
@end_of_word = true
return
end
 
# Otherwise, create a sub_trie for the first character of the input and
# recursively insert the remainder of the word
cur_char = to_insert.shift
@sub_tries[cur_char] ||= TrieNode.new
@sub_tries[cur_char].insert(to_insert)
to_insert.unshift(cur_char)
end
 
def find_words(word)
to_find = word
to_find = to_find.split(//) if to_find.is_a?(String)
 
# If to_find is empty, we reached the end of the template. If the
# end-of-word marker is set, we return the empty string, which will be
# prepended with the characters of the word as we move upwards in the trie.
# Otherwise we return the empty array, which results in no word being added
# to the results.
if to_find.empty?
return [''] if @end_of_word
return []
end
 
cur_char = to_find.shift
 
result = []
if cur_char == '?'
# Search all sub-tries for words matching the remainder of the template
# and add them to the results
@sub_tries.each_pair do |key, value|
result += value.find_words(to_find).map { |w| key + w }
end
elsif @sub_tries[cur_char]
# We have a sub-trie that matches the current character of the template.
# Recursively get all words that match the remainder of the template
# below that sub-trie.
result = @sub_tries[cur_char].find_words(to_find).map { |w| cur_char + w }
else
# We don't have anything that matches the template.
result = []
end
to_find.unshift(cur_char)
return result
end
 
end
 
DICTIONARY = File.read('/usr/share/dict/usa').split("\n")
 
print "Building Trie... "
@root = TrieNode.new
DICTIONARY.each { |word| @root.insert(word) }
puts "done."
 
puts "Searching trie..."
@root.find_words('?r?te').each do |word|
puts word
end
 
# $wc -l /usr/share/dict/usa
# 98569 /usr/share/dict/usa
#
# $time ruby trie.rb
# Building Trie... done.
# Searching trie...
# Crete
# brute
# crate
# grate
# irate
# orate
# prate
# trite
# write
# wrote
#
# real 0m3.561s
# user 0m3.303s
# sys 0m0.117s

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.