Skip to content

Instantly share code, notes, and snippets.

@itissid
Last active August 3, 2018 16:02
Show Gist options
  • Save itissid/d2ba2f996bb388c1795be861efd07813 to your computer and use it in GitHub Desktop.
Save itissid/d2ba2f996bb388c1795be861efd07813 to your computer and use it in GitHub Desktop.
A trie from scratch.
import string
class TrieNode(object):
"""
Context: Implementation of the trie using a dictionary is trivial:
https://gist.github.com/itissid/6a5bec95424b5630908e877c02d957d5
But if one was to implement the Trie node as a DS then one needs to think about implementation a bit more.
So what I did was keep the API code for search, insert, search_prefix the same as the gist; i.e.
to pretend we are still using a dictionary, but I implemented some magic methods for the trie in the TrieNode (
__getitem__ and __setitem__) to support the dictionary(ish) usage. So to the API its as if we are using the dictionary
for tries which keeps things simple, but offoad all the complexity to the internal magic methods.
ace
acid
acne
book
{
a: {
c: {
i: {
d: {"$": True}
},
n: {
e: {$:True}
}
},
e: {
"$": True
}
b:{...}
}
1. The implmentation is done with a Node class.
It has an array the size of alphabet that needs to be indexed.
Assume that the index is coming from atoi() function and that the alphabet is known before hand.
A new node has a children array allocated.
Nodes don't have values themselves
2. When inserting a word, each alphabet is checked in the children array and if there is None in the index
then insert a new Node there.
"""
alpha_map = {s:i for i, s in enumerate(string.ascii_letters + string.digits + ".")}
@staticmethod
def atoi(a):
if a not in TrieNode.alpha_map:
raise ValueError
return TrieNode.alpha_map[a]
def __init__(self):
# TODO: If its the terminal then don't initialize this
self.children = [None for i in range(len(TrieNode.alpha_map))]
self.is_terminal = False
"""
Test by hand
fog
foggy
bond
root
s root children
f root [...5:TrieNode...]
o 5 [.........12:TrieNode...]
g 12 [....6:TrieNode...]
/ 6 $
foggy
f root [.... 5:TrieNOde...] Already present
o 5
g 12
g 6
y 6 [.... 24:TrieNode...]
/ 24 make terminal
"""
def __getitem__(self, item):
if item == '$':
return self.is_terminal
if item not in TrieNode.alpha_map:
raise ValueError("Cannot insert {} valid chars are {}".format(item, TrieNode.alpha_map.keys()))
idx = TrieNode.atoi(item)
if self.children[idx] is None:
self.children[idx] = TrieNode()
#print("Getting item {} at idx {}".format(self.children[idx], idx))
return self.children[idx]
def __setitem__(self, key, value):
if key == '$':
self.is_terminal = value
return
raise ValueError("Cannot set key {} value {} pair".format(key, value))
def __contains__(self, item):
if item == '$':
return self.is_terminal
return self.children[TrieNode.atoi(item)] is not None
def __delitem__(self, key):
if key == '$':
self.is_terminal = False
@staticmethod
def insert(trie, word):
root = trie
for s in word:
root = root[s]
root["$"] = True
@staticmethod
def search(trie, word):
root = trie
for s in word:
if s not in root:
return False
root = root[s]
return '$' in root
@staticmethod
def search_prefix(trie, word):
root = trie
for s in word:
if s not in root:
return False
root = root[s]
return True
@staticmethod
def _delete_helper(root, suffix, i):
# TODO: If Recursion may bottom out for long suffixes, use iteration.
# Optimization: Remove Dead paths. Consider a trie a-b-c-d when abcd is deleted and there is no other words that have prefixes
# common with all or part of abcd we can delete all the TrieNodes and save some space. To do this we can add an
# integer variable to TrieNode that counts the number of words that have the prefix common upto that TrieNode.
if root is None:
return False
if i == len(suffix):
if '$' in root:
del(root["$"])
return True
return False
if suffix[i] in root:
if TrieNode._delete_helper(root[suffix[i]], suffix, i + 1):
# As per the above optimization NOTE I can check here if root.num_common_prefix == 1 then I can
# delete the node here.
if '$' not in root:
return True
return False
@staticmethod
def delete(trie, word):
TrieNode._delete_helper(trie, word, 0)
def test_insert_search():
w = "frog"
root = TrieNode()
TrieNode.insert(root, w)
# TODO: Better testing by monkey patching and verifying calls
assert TrieNode.search(root, w) is True
print(".....")
def test_insert_search_multiple():
words = ["fog", "foggy", "brag"]
root = TrieNode()
for i, w in enumerate(words):
TrieNode.insert(root, w)
# TODO: Better testing by monkey patching and verifying calls
assert TrieNode.search(root, w) is True
print('.'*(i+1))
def test_partial_non_match():
words = ["fog", "foggy"]
root = TrieNode()
for i, w in enumerate(words):
TrieNode.insert(root, w)
assert TrieNode.search(root, "fo") is False
assert TrieNode.search(root, "fogg") is False
assert TrieNode.search(root, "foggys") is False
print('....')
def test_delete():
w = "frog"
root = TrieNode()
TrieNode.insert(root, w)
TrieNode.delete(root, w)
assert TrieNode.search(root, w) is False
print(".*")
def test_delete_prefix():
words = ["fog", "foggy"]
root = TrieNode()
for i, w in enumerate(words):
TrieNode.insert(root, w)
TrieNode.delete(root, words[0])
assert TrieNode.search(root, "fog") is False
assert TrieNode.search(root, "foggy") is True
print("..*")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment