-
-
Save mozey/2604f7ac08226f774bbc2fab57f0c67e to your computer and use it in GitHub Desktop.
An object-oriented implementation of a "Trie" in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# An object-oriented implementation of a "Trie" in Python | |
# https://gist.github.com/mozey/2604f7ac08226f774bbc2fab57f0c67e | |
class Node: | |
def __init__(self, label=None, data=None): | |
self.label = label | |
self.data = data | |
self.children = dict() | |
def add_child(self, key, data=None): | |
if not isinstance(key, Node): | |
self.children[key] = Node(key, data) | |
else: | |
self.children[key.label] = key | |
def __getitem__(self, key): | |
return self.children[key] | |
class Trie: | |
def __init__(self): | |
self.head = Node() | |
def __getitem__(self, key): | |
return self.head.children[key] | |
def add(self, word): | |
current_node = self.head | |
word_finished = True | |
i = 0 | |
for i in range(len(word)): | |
if word[i] in current_node.children: | |
current_node = current_node.children[word[i]] | |
else: | |
word_finished = False | |
break | |
# For every new letter, create a new child node | |
if not word_finished: | |
while i < len(word): | |
current_node.add_child(word[i]) | |
current_node = current_node.children[word[i]] | |
i += 1 | |
# Let's store the full word at the end node so we don't need to | |
# travel back up the tree to reconstruct the word | |
current_node.data = word | |
def has_word(self, word): | |
if word == '': | |
return False | |
if word is None: | |
raise ValueError('Trie.has_word requires a not-Null string') | |
# Start at the top | |
current_node = self.head | |
exists = True | |
for letter in word: | |
if letter in current_node.children: | |
current_node = current_node.children[letter] | |
else: | |
exists = False | |
break | |
# Still need to check if we just reached a word like 't' | |
# that isn't actually a full word in our dictionary | |
if exists: | |
if current_node.data is None: | |
exists = False | |
return exists | |
def start_with_prefix(self, prefix): | |
""" Returns a list of all words in tree that start with prefix """ | |
words = list() | |
if prefix is None: | |
raise ValueError('Requires not-Null prefix') | |
# Determine end-of-prefix node | |
top_node = self.head | |
for letter in prefix: | |
if letter in top_node.children: | |
top_node = top_node.children[letter] | |
else: | |
# Prefix not in tree, go no further | |
return words | |
# Get words under prefix | |
if top_node == self.head: | |
queue = [node for key, node in top_node.children.items()] | |
else: | |
queue = [top_node] | |
# Perform a breadth first search under the prefix | |
# A cool effect of using BFS as opposed to DFS is that BFS will return | |
# a list of words ordered by increasing length | |
while queue: | |
current_node = queue.pop() | |
if current_node.data is not None: | |
# Isn't it nice to not have to go back up the tree? | |
words.append(current_node.data) | |
queue = [node for key, node in | |
current_node.children.items()] + queue | |
return words | |
def get_data(self, word): | |
""" This returns the 'data' of the node identified by the given word """ | |
if not self.has_word(word): | |
raise ValueError('{} not found in trie'.format(word)) | |
# Race to the bottom, get data | |
current_node = self.head | |
for letter in word: | |
current_node = current_node[letter] | |
return current_node.data | |
def main(): | |
""" Example use """ | |
trie = Trie() | |
words = "hello goodbye help gerald gold tea " \ | |
+ "ted team to too tom stan standard money" | |
for word in words.split(): | |
trie.add(word) | |
print("'goodbye' in trie: ", trie.has_word('goodbye')) | |
print(trie.start_with_prefix('g')) | |
print(trie.start_with_prefix('to')) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Python 3