Skip to content

Instantly share code, notes, and snippets.

@matthewfranglen
Created January 15, 2015 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save matthewfranglen/64e417598355f811d900 to your computer and use it in GitHub Desktop.
Save matthewfranglen/64e417598355f811d900 to your computer and use it in GitHub Desktop.
Example use of Trie to find substring matches
#!/usr/bin/env python
# pylint: disable=C0111, R0903
class Trie(object):
def __init__(self):
self.children = {}
def add_child(self, letter):
if letter in self.children:
return self.children[letter]
else:
child = Trie()
self.children[letter] = child
return child
def traverse(self, letter):
return self.children.get(letter, None)
def populate_trie(root, letters):
current_positions = []
for letter in letters:
current_positions = [
position.add_child(letter)
for position in current_positions
]
current_positions.append(root.add_child(letter))
return root
class TrieSearch(object):
def __init__(self, trie, starting_index):
self.trie = trie
self.starting_index = starting_index
self.ending_index = starting_index + 1
def update(self, letter):
""" This returns a boolean indicating
if the search can accept the letter """
self.trie = self.trie.traverse(letter)
if self.trie is not None:
self.ending_index = self.ending_index + 1
return True
return False
def get_match(self, letters):
return letters[self.starting_index:self.ending_index]
def find_matches(root, letters):
completed_matches = []
current_matches = []
for index, letter in enumerate(letters):
new_current = []
for current in current_matches:
if current.update(letter):
new_current.append(current)
else:
completed_matches.append(current)
new_search_trie = root.traverse(letter)
if new_search_trie is not None:
new_current.append(TrieSearch(new_search_trie, index))
current_matches = new_current
all_matches = completed_matches + current_matches
return [match.get_match(letters) for match in all_matches]
def run():
fiveprime = "GATTCGAAGTCCACTATC"
threeprime = "TGAGTAGGACGGCACTATC"
root = Trie()
populate_trie(root, fiveprime)
populate_trie(root, threeprime)
matches = find_matches(root, "CACTATCAAAAAAA")
print matches
if __name__ == "__main__":
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment