Created
March 24, 2014 15:01
-
-
Save howardhamilton/9741836 to your computer and use it in GitHub Desktop.
Create a trie data structure for storing a word dictionary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# trie.py | |
# | |
# create trie data structure | |
# | |
class Node(object): | |
""" | |
Node element of trie data structure. | |
This data structure is used to model letter frequencies in a | |
word dictionary. The depth of the trie is 27 levels (root + a level | |
for each letter of the alphabet) and words are placed at the end | |
of the leaf node (the node with no children at 'z' level). | |
""" | |
def __init__(self, letter=None, freq=None, children=None, words=None): | |
""" | |
Instantiate a node -- the basic element of a trie. | |
:param letter: letter in English alphabet. | |
:type letter: string | |
:param freq: frequency of letter in word. | |
:type freq: integer | |
:param children: child nodes. | |
:type children: list of type Node | |
:param words: words associated with node. Only used in leaf node. | |
:type words: list of string | |
""" | |
self.letter = letter | |
self.freq = freq | |
self.children = children if children is not None else [] | |
self.words = words if words is not None else [] | |
def count(self): | |
""" | |
Count number of nodes in trie relative to root. | |
Use a pre-order traversal to visit each node. | |
""" | |
count = 0 | |
if self.children: | |
for child in self.children: | |
count += 1 + child.count() | |
return count | |
def traverse(self,depth=0): | |
""" | |
A pre-order traversal of the trie relative to the current node. | |
Prints words associated with the node, if they exist. | |
This method is used primarily for diagnostics. It gets rather | |
unwieldy after 10-12 levels. | |
:param depth: current trie depth. Default 0. | |
:type depth: integer | |
""" | |
if depth == 0: | |
print "ROOT", | |
if self.letter: | |
print self.letter, self.freq, | |
if self.words: | |
for word in self.words: | |
print word, | |
if not self.children: | |
return | |
else: | |
for child in self.children: | |
for i in range(0,depth+1): | |
print " ", | |
child.traverse(depth+1) | |
def search(self, out, **letterbag): | |
""" | |
Search for words associated with unbroken chain of nodes in trie. | |
:param letterbag: collection of letters and frequencies in a word. | |
:type letterbag: dict | |
""" | |
if not letterbag: | |
out.extend(self.words) | |
else: | |
baglist = sorted(list(letterbag.items())) | |
letter = baglist[0][0] | |
freq = letterbag.pop(letter) | |
for child in self.children: | |
if child.letter == letter and child.freq <= freq: | |
child.search(out,**letterbag) | |
def insert(self, word, **letterbag): | |
""" | |
Insert word at end of chain of nodes in trie. | |
:param word: word to be inserted into trie structure. | |
:type word: string | |
:param letterbag: collection of letters and frequencies in a word. | |
:type letterbag: dict | |
""" | |
if not letterbag: | |
self.words.append(word) | |
else: | |
baglist = sorted(list(letterbag.items())) | |
letter = baglist[0][0] | |
freq = letterbag.pop(letter) | |
child = [x for x in self.children if | |
x.letter == letter and x.freq == freq] | |
if child: | |
child[0].insert(word,**letterbag) | |
else: | |
self.children.append(Node(letter,freq)) | |
self.children[-1].insert(word,**letterbag) | |
if __name__ == "__main__": | |
base = Node() | |
base.insert('tomato',a=0,b=0,c=0) | |
base.insert('carrot',a=0,b=0,c=0) | |
base.insert('apple',a=0,b=1,c=0) | |
base.insert('pineapple',a=0,b=2,c=1) | |
base.insert('howard',a=1,b=0,c=1) | |
base.insert('orange',a=2,b=1,c=1,d=0) | |
base.insert('test',a=2,b=1,c=1,d=1) | |
base.insert('daddy',a=2,b=1,c=1,d=2) | |
base.insert('yankee',a=2,b=1,c=1,d=2) | |
base.traverse() | |
out = [] | |
base.search(out,a=1,b=2,c=1) | |
print out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment