Skip to content

Instantly share code, notes, and snippets.

@howardhamilton
Created March 24, 2014 15:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save howardhamilton/9741836 to your computer and use it in GitHub Desktop.
Save howardhamilton/9741836 to your computer and use it in GitHub Desktop.
Create a trie data structure for storing a word dictionary
#
# trie.py
#
# create trie data structure
#
class Node(object):
"""
Node element of trie data structure.
This data structure is used to model letter frequencies in a
word dictionary. The depth of the trie is 27 levels (root + a level
for each letter of the alphabet) and words are placed at the end
of the leaf node (the node with no children at 'z' level).
"""
def __init__(self, letter=None, freq=None, children=None, words=None):
"""
Instantiate a node -- the basic element of a trie.
:param letter: letter in English alphabet.
:type letter: string
:param freq: frequency of letter in word.
:type freq: integer
:param children: child nodes.
:type children: list of type Node
:param words: words associated with node. Only used in leaf node.
:type words: list of string
"""
self.letter = letter
self.freq = freq
self.children = children if children is not None else []
self.words = words if words is not None else []
def count(self):
"""
Count number of nodes in trie relative to root.
Use a pre-order traversal to visit each node.
"""
count = 0
if self.children:
for child in self.children:
count += 1 + child.count()
return count
def traverse(self,depth=0):
"""
A pre-order traversal of the trie relative to the current node.
Prints words associated with the node, if they exist.
This method is used primarily for diagnostics. It gets rather
unwieldy after 10-12 levels.
:param depth: current trie depth. Default 0.
:type depth: integer
"""
if depth == 0:
print "ROOT",
if self.letter:
print self.letter, self.freq,
if self.words:
for word in self.words:
print word,
print
if not self.children:
return
else:
for child in self.children:
for i in range(0,depth+1):
print " ",
child.traverse(depth+1)
def search(self, out, **letterbag):
"""
Search for words associated with unbroken chain of nodes in trie.
:param letterbag: collection of letters and frequencies in a word.
:type letterbag: dict
"""
if not letterbag:
out.extend(self.words)
else:
baglist = sorted(list(letterbag.items()))
letter = baglist[0][0]
freq = letterbag.pop(letter)
for child in self.children:
if child.letter == letter and child.freq <= freq:
child.search(out,**letterbag)
def insert(self, word, **letterbag):
"""
Insert word at end of chain of nodes in trie.
:param word: word to be inserted into trie structure.
:type word: string
:param letterbag: collection of letters and frequencies in a word.
:type letterbag: dict
"""
if not letterbag:
self.words.append(word)
else:
baglist = sorted(list(letterbag.items()))
letter = baglist[0][0]
freq = letterbag.pop(letter)
child = [x for x in self.children if
x.letter == letter and x.freq == freq]
if child:
child[0].insert(word,**letterbag)
else:
self.children.append(Node(letter,freq))
self.children[-1].insert(word,**letterbag)
if __name__ == "__main__":
base = Node()
base.insert('tomato',a=0,b=0,c=0)
base.insert('carrot',a=0,b=0,c=0)
base.insert('apple',a=0,b=1,c=0)
base.insert('pineapple',a=0,b=2,c=1)
base.insert('howard',a=1,b=0,c=1)
base.insert('orange',a=2,b=1,c=1,d=0)
base.insert('test',a=2,b=1,c=1,d=1)
base.insert('daddy',a=2,b=1,c=1,d=2)
base.insert('yankee',a=2,b=1,c=1,d=2)
base.traverse()
out = []
base.search(out,a=1,b=2,c=1)
print out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment