Created
March 21, 2017 03:53
-
-
Save samzhang111/5ab82afdfaea874bcb0cabca054264b2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rom collections import defaultdict | |
class SuffixTree(object): | |
def __init__(self, key=None): | |
self.key = key | |
self.dict = { | |
'count': 0, | |
'children': {} | |
} | |
def __getitem__(self, key): | |
if key == 'count': | |
return self.dict['count'] | |
return self.dict['children'][key] | |
def __setitem__(self, key, value): | |
if key == 'count': | |
self.dict['count'] = value | |
else: | |
self.dict['children'].setdefault(key, SuffixTree(key)) | |
def __repr__(self): | |
return str(self) | |
def __str__(self): | |
return str(self.dict) | |
def keys(self): | |
return self.children.keys() | |
@property | |
def count(self): | |
return self.dict['count'] | |
@property | |
def children(self): | |
return self.dict['children'] | |
@classmethod | |
def from_seq(klass, seq): | |
inf = SuffixTree() | |
for i in range(len(seq)): | |
node = inf | |
for token in seq[i:]: | |
node[token]['count'] += 1 | |
node = node[token].children | |
node['$']['count'] += 1 | |
return inf | |
def all_keys(self): | |
paths = [] | |
for key in self.keys(): | |
if key == '$': | |
paths.append(['$']) | |
else: | |
node = self[key] | |
future_paths = node.all_keys() | |
for path in future_paths: | |
paths.append([key, *path]) | |
return paths | |
@property | |
def is_leaf(self): | |
return len(self.dict['children']) <= 1 | |
def flattened_items(self): | |
results = set() | |
for k, v in self.children.items(): | |
results.add(((k, ), v.count)) | |
return results | |
######################### | |
import unittest | |
from suffix_tree import SuffixTree | |
from expects import * | |
class TestSuffixTree(unittest.TestCase): | |
def test_single_suffix_tree(self): | |
tree = SuffixTree.from_seq(['a']) | |
expect(tree['a'].count).to(equal(1)) | |
expect(tree['b'].count).to(equal(0)) | |
expect(tree['a']['$'].count).to(equal(1)) | |
def test_two_elements_in_tree(self): | |
tree = SuffixTree.from_seq(['a', 'a']) | |
expect(tree['a'].count).to(equal(2)) | |
expect(tree['a']['a'].count).to(equal(1)) | |
def test_elements_are_reversed(self): | |
tree = SuffixTree.from_seq(['a', 'a', 'b']) | |
expect(tree['a']['b'].count).to(equal(1)) | |
expect(tree['b']['a'].count).to(equal(0)) | |
def test_get_depth_tuples(self): | |
tree = SuffixTree.from_seq(['a', 'a', 'b']) | |
expect(tree.items()).to(equal(set(( | |
(('a', '$'), 2), | |
(('a', 'a', '$'), 1), | |
(('a', 'a', 'b', '$'), 1), | |
(('a', 'b', '$'), 1), | |
(('b', '$'), 1), | |
)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment