LCP trie (trie with multi-character prefixes so nodes are 'common prefix' branches rather than common ancestor) adapted from dict trie in https://ychai.uk/notes/2019/03/03/Programming/Tricks-of-Python/ and mostly generated with GPT-3.5 by describing the algorithm as pseudo-code comments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TrieNode(object): | |
def __init__(self): | |
self.data = {} | |
self.is_word = False | |
class Trie(object): | |
def __init__(self): | |
self.root = TrieNode() | |
def insert_lcp(self, word: str) -> None: | |
""" | |
Inserts a word into the trie and aggregates it with existing nodes (longest | |
common prefix nodes). | |
""" | |
node = self.root | |
# If no common ancestors, insert entire word | |
if not node.data: | |
node.data[word] = TrieNode() | |
node.data[word].is_word = True | |
return | |
# Iterate through existing nodes | |
while True: | |
# Find the longest common prefix between the word and node keys | |
lcp = "" | |
lcp_key = None | |
for key in node.data.keys(): | |
prefix = "" | |
for i, (a, b) in enumerate(zip(word, key)): | |
if a == b: | |
prefix += a | |
else: | |
break | |
if len(prefix) > len(lcp): | |
lcp = prefix | |
lcp_key = key | |
if not lcp: | |
# No common prefix, insert entire word as new node | |
node.data[word] = TrieNode() | |
node.data[word].is_word = True | |
return | |
# If entire common ancestor is found, insert below | |
lcp_size = len(lcp) | |
if lcp == lcp_key: | |
node = node.data[lcp_key] | |
word = word[lcp_size:] | |
else: | |
# Split the node at the branch point and insert the rest of the word below it | |
new_node = TrieNode() | |
node.data[lcp] = new_node | |
new_node.data[lcp_key[lcp_size:]] = node.data.pop(lcp_key) | |
if not word: | |
# The branch point is the word | |
new_node.is_word = True | |
else: | |
# Put the remaining suffix below the LCP branch point node | |
partial_word = word[lcp_size:] | |
new_node.data[partial_word] = TrieNode() | |
new_node.data[partial_word].is_word = True | |
return | |
def insert_char(self, word: str) -> None: | |
""" | |
Inserts a word into the trie (char level nodes). | |
""" | |
node = self.root | |
for letter in word: | |
child = node.data.get(letter) | |
if not child: | |
node.data[letter] = TrieNode() | |
node = node.data[letter] | |
node.is_word = True | |
def search(self, word: str) -> None: | |
""" | |
Returns if the word is in the trie. | |
""" | |
node = self.root | |
for letter in word: | |
node = node.data.get(letter) | |
if not node: | |
return False | |
return node.is_word | |
def starts_with(self, prefix: str) -> bool: | |
""" | |
Returns if there is any word in the trie | |
that starts with the given prefix. | |
""" | |
node = self.root | |
for letter in prefix: | |
node = node.data.get(letter) | |
if not node: | |
return False | |
return True | |
def get_start(self, prefix: str) -> list[str]: | |
""" | |
Returns words started with prefix | |
""" | |
def _get_key(pre, pre_node): | |
words_list = [] | |
if pre_node.is_word: | |
words_list.append(pre) | |
for x in pre_node.data.keys(): | |
words_list.extend(_get_key(pre + str(x), pre_node.data.get(x))) | |
return words_list | |
words = [] | |
if not self.starts_with(prefix): | |
return words | |
if self.search(prefix): | |
words.append(prefix) | |
return words | |
node = self.root | |
for letter in prefix: | |
node = node.data.get(letter) | |
return _get_key(prefix, node) | |
def print_data(self) -> None: | |
def dfs(node, level): | |
if not node: | |
return | |
for key, child in node.data.items(): | |
print(" " * level + key) | |
dfs(child, level + 1) | |
dfs(self.root, 0) | |
def walk(self): | |
def dfs(node): | |
if not node: | |
return {} | |
children = {} | |
for letter, child in node.data.items(): | |
children[letter] = dfs(child) | |
return {"word": node.is_word, "children": children} | |
return dfs(self.root) | |
t = Trie() | |
strings = "all although allow alone altogether".split() | |
for s in strings: | |
t.insert_lcp(s) | |
t.print_data() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment