Created
May 10, 2016 16:15
-
-
Save beefy/89f8845d7c53bd0d4644e184f6f690d2 to your computer and use it in GitHub Desktop.
A suffix tree class with longest repeated substring implemented
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class suffix_tree(): | |
def __init__(self,input_str): | |
self.root = node(ptr=[]) | |
for i in range(0,len(input_str)): | |
self.add(self.root,input_str[i:]) | |
# add if input_str never starts with vals in node.ptr | |
# otherwise, add to node that starts with it | |
# if a val in node.ptr starts with input_str | |
# -split that up into one node with val input_str | |
# pointing to a node with val orig_val[len(input_str):] | |
def add(self,parent,input_str): | |
if parent.val == '': return | |
if sum([1 for i in parent.ptr if i.val.startswith(input_str)]) > 0: | |
for i in parent.ptr: | |
if i.val.startswith(input_str): | |
parent.ptr.remove(i) | |
break | |
new_node_point = node(ptr=[],val=i.val[len(input_str):]) | |
new_node = node(ptr=[new_node_point],val=input_str) | |
parent.ptr.append(new_node) | |
if sum([1 for i in parent.ptr if input_str.startswith(i.val)]) == 0: | |
new_node = node(val=input_str) | |
parent.ptr.append(new_node) | |
else: | |
for i in parent.ptr: | |
if input_str.startswith(i.val) and len(input_str[len(i.val):]) > 0: | |
self.add(i,input_str[len(i.val):]) | |
break | |
# 1.) preprocessing the tree to count the number of leaf descendants for each internal node | |
# 2.) finding the deepest node with at least k leaf descendants that have no children | |
def LRS(self): | |
result = '' | |
for i in self.root.ptr: | |
if len(i.ptr) == 0: pass | |
elif len(i.val) >= len(result): | |
result = i.val | |
return result | |
class node(): | |
def __init__(self, ptr=[], val=None): | |
self.val = val | |
self.ptr = ptr | |
list_tree = lambda node: [(child.val,list_tree(child)) for child in node.ptr] | |
if __name__ == "__main__": | |
tree = suffix_tree('123123') | |
print tree.LRS() | |
print list_tree(tree.root) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment