Created
January 22, 2019 02:20
-
-
Save abethcrane/d8dc5f0664d44a2c9e69854d5c1a7080 to your computer and use it in GitHub Desktop.
An overly-commented implementation of a Palindrome Tree in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
debug_prints = False | |
class PalindromeTreeNode: | |
# Important to note: we don't use start_index and end_index much | |
# Because if this node is 'bb' and our string is 'abbacabbac' we'll see 'bb' twice | |
# So we use the length to calculate where in the full string the start index would be | |
# Each time we run across this node | |
# The start and end indices are useful for printing out this node though | |
start_index = 0 | |
end_index = 0 | |
length = 0 | |
suffix_index = 0 | |
node_index = 0 | |
# Any palindrome can have up to 26 child palindromes | |
# e.g. aPALINDROMEa, bPALINDROMEb, cPALINDROMEc, ..., zPALINDROMEz | |
# So we store the node index, in the palindromic tree, where each one lives | |
# (If it doesn't exist, its letter isnt in the dict) | |
child_palindrome_node_index = {} | |
def __init__(self, **kwargs): | |
if "start_index" in kwargs: | |
self.start_index = kwargs.pop("start_index") | |
if "end_index" in kwargs: | |
self.end_index = kwargs.pop("end_index") | |
if "length" in kwargs: | |
self.length = kwargs.pop("length") | |
if "suffix_index" in kwargs: | |
self.suffix_index = kwargs.pop("suffix_index") | |
self.child_palindrome_node_index = {} | |
class PalindromeTree: | |
nodes = [] | |
prev_node_index = 0 | |
magic_imaginary_node_index = 0 | |
magic_null_string_node_index = 1 | |
# Push on the 2 base nodes | |
def __init__(self): | |
# First node is for single-letter palindromes | |
# It starts with length -1 because after you add on the letter to each side | |
# It becomes length 1 - kind of weird but okay | |
# Its longest suffix (not itself) is set to itself, because what else would it be?? | |
# Note that start_index needs to be -1 for our insert to work | |
self.nodes.append(PalindromeTreeNode(start_index=-1, end_index=-1, length=-1, suffix_index=self.magic_imaginary_node_index)) | |
self.nodes[0].node_index = 0 | |
# Second node is the base for 2-letter palindromes | |
# It's a null string | |
# Its length is 0, and then when you add on the letter to each side | |
# It becomes length 2 - totally normal | |
# Its longest suffix (not itself) is the imaginary node 0 | |
self.nodes.append(PalindromeTreeNode(start_index=-1, end_index=-1, length=0, suffix_index=self.magic_imaginary_node_index)) | |
self.nodes[1].node_index = 1 | |
def find_parent_and_create_child(self, s, index): | |
new_letter = s[index] | |
# Go back through the train of parent node connections until we find the right one | |
# Might end up being our first magic node | |
parent_palindrome_node = self.nodes[self.prev_node_index] | |
while True: | |
parent_palindrome_start_index = index - parent_palindrome_node.length | |
# The parent_palindrome_start_index needs to be at least 1, so that we can check the cell before it | |
# If the cell before the suffix is equal to our new letter then it can be a palindrome | |
# In the event where the parent_palindrome_node is our magic first node: | |
# parent_palindrome_start_index will be index+1 | |
# so it'll be >= 1, and then it'll compare new_letter to itself | |
# In the event where parent_palindrome_node is our magic second node: | |
# parent_palindrome_start_index will be index | |
# so for the first letter it won't be >= 1 | |
# which is okay - the second node is for palindromes like "bb" not "b" | |
# but for all letters afterwards it will be >= 1 | |
# so then it'll check our new_letter with the previous one, to see if they're the same | |
if parent_palindrome_start_index >= 1 and new_letter == s[parent_palindrome_start_index - 1]: | |
break | |
parent_palindrome_node = self.nodes[parent_palindrome_node.suffix_index] | |
# Now that we have our suffix (could be a magic node, could be a real palindrome) | |
# Check to see if the palindrome (new_letter suffix new_letter) is already in the tree | |
# i.e. check to see if the suffix already has an edge index for new_letter | |
# If it is already in the tree, we don't insert anything new | |
# Just update our prev_node_index counter to point to the node that has the new palindrome | |
# So that next insert we can try to build on it | |
# this might be the case in e.g. | |
# bbabba | |
# we would already have 'bb' when we get to it the 2nd time | |
if new_letter in parent_palindrome_node.child_palindrome_node_index: | |
self.prev_node_index = parent_palindrome_node.child_palindrome_node_index[new_letter] | |
if debug_prints: | |
print ("Parent node index is", parent_palindrome_node.node_index, "This node already exists") | |
return None | |
# Create a new node for this new entry | |
new_palindrome_length = parent_palindrome_node.length + 2 | |
new_node_start = index-new_palindrome_length + 1 | |
new_node = PalindromeTreeNode(start_index=new_node_start, end_index=index, length=new_palindrome_length) | |
# Append the node to our nodes array and update its index accordingly | |
self.nodes.append(new_node) | |
new_node.node_index = len(self.nodes) - 1 | |
# Update the suffix node to have its directed edge for the new letter point to our new node | |
parent_palindrome_node.child_palindrome_node_index[new_letter] = new_node.node_index | |
self.prev_node_index = new_node.node_index | |
return parent_palindrome_node | |
def set_child_suffix_node(self, s, index, parent_node): | |
new_letter = s[index] | |
child_node = self.nodes[len(self.nodes) - 1] | |
# Magic base case: if the palindrome is length 1, its suffix is the magic null node | |
if child_node.length == 1: | |
child_node.suffix_index = self.magic_null_string_node_index | |
return | |
# Similarly to how we did in find_parent_and_create_child | |
# We trace back through the suffix edges until we find the right one | |
# But instead of trying to find the palindrome parent | |
# We're trying to find the suffix palindrome | |
# So we go all the way back until we find a match | |
# And then go forward to their child palindrome | |
# Kind of weird but okay | |
suffix_node = self.nodes[parent_node.suffix_index] | |
while True: | |
suffix_start_index = index - suffix_node.length | |
if suffix_start_index >= 1 and new_letter == s[suffix_start_index - 1]: | |
break | |
suffix_node = self.nodes[suffix_node.suffix_index] | |
child_node.suffix_index = suffix_node.child_palindrome_node_index[new_letter] | |
def insert(self, s, index): | |
if debug_prints: | |
print("Inserting letter" , s[index], "at index", index) | |
parent_node = self.find_parent_and_create_child(s, index) | |
if parent_node is not None: | |
self.set_child_suffix_node(s, index, parent_node) | |
tree = PalindromeTree() | |
s = "abbalrcabbac" | |
longest_palindrome = "" | |
longest_palindrome_node = None | |
# Build the tree and cache off the longest palindrome | |
for i in range(0, len(s)): | |
tree.insert(s, i) | |
latest_node = tree.nodes[tree.prev_node_index] | |
if latest_node.length > len(longest_palindrome): | |
longest_palindrome = s[latest_node.start_index:latest_node.end_index + 1] | |
longest_palindrome_node = latest_node | |
# Print all nodes in the tree | |
if debug_prints: | |
for i in range(2, len(tree.nodes)): | |
cur_node = tree.nodes[i] | |
print (i-2, "= ",s[cur_node.start_index:cur_node.end_index + 1]) | |
print (longest_palindrome) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment