Created
December 10, 2010 16:23
-
-
Save shawnchin/736416 to your computer and use it in GitHub Desktop.
a trie-based prefix matching utility
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Description: a trie-based prefix matching utility | |
# Author: Shawn Chin (http://shawnchin.github.com) | |
# | |
# Usage: | |
# keys = ["BANANA", "APPLE", "DURIAN", "MANGO"] | |
# m = PrefixMatch(keys) | |
# m.add_key("ORANGE") # extend list of keys | |
# m.match("BANANA") # returns True (matches key) | |
# m.match("ORANGEBOY") # returns True (starts with key) | |
# m.match("APP") # returns False (no match) | |
# | |
MATCH = 99 # magic number used to mark end-of-key node | |
class PrefixMatch: | |
""" | |
A trie-based prefix matching utility | |
""" | |
def __init__(self, keys=[]): | |
self.root = {} | |
for key in keys: | |
self.add_key(key) | |
def _get_deepest_match(self, target): | |
""" | |
Returns tuple of (node, depth) where node is the deepest node that | |
matches the target while depth is the trie depth (equals to number | |
of chars in target that matches existing key set. | |
""" | |
depth = 0 | |
node = self.root | |
for ch in target: | |
if not node.has_key(ch): | |
break | |
else: | |
depth += 1 | |
node = node[ch] | |
return (node, depth) | |
def add_key(self, key): | |
""" | |
Adds key to trie | |
""" | |
if not key: return # reject empty string | |
node, depth = self._get_deepest_match(key) | |
for ch in key[depth:]: | |
node[ch] = {} | |
node = node[ch] | |
node[MATCH] = None # mark end node | |
def match(self, target): | |
""" | |
Returns true if target string starts with a registered key. | |
""" | |
node, _ = self._get_deepest_match(target) | |
return node.has_key(MATCH) | |
if __name__ == "__main__": | |
# TODO: use DOCTEST instead | |
keys = ["BANANA", "APPLE", "DURIAN", "MANGO"] | |
m = PrefixMatch(keys) | |
m.add_key("ORANGE") # extend list of keys | |
if not m.match("BANANA"): print "FAIL" | |
else: print "OK" | |
if not m.match("ORANGEBOY"): print "FAIL" | |
else: print "OK" | |
if m.match("APP"): print "FAIL" | |
else: print "OK" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment