Skip to content

Instantly share code, notes, and snippets.

@cjdd3b
Created December 18, 2013 20:11
Show Gist options
  • Save cjdd3b/8029089 to your computer and use it in GitHub Desktop.
Save cjdd3b/8029089 to your computer and use it in GitHub Desktop.
def levenshtein(a,b):
"Calculates the Levenshtein distance between a and b."
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
a,b = b,a
n,m = m,n
current = range(n+1)
for i in range(1,m+1):
previous, current = current, [i]+[0]*n
for j in range(1,n+1):
add, delete = previous[j]+1, current[j-1]+1
change = previous[j-1]
if a[j-1] != b[i-1]:
change = change + 1
current[j] = min(add, delete, change)
return current[n]
class BkTree(object):
"A Bk-Tree implementation."
def __init__(self, root, distance_function):
self.df = distance_function
self.root = root
self.tree = (root, {})
def build(self, words):
"Build the tree."
for word in words:
self.tree = self.insert(self.tree, word)
def insert(self, node, word):
"Inserts a word in the tree."
d = self.df(word, node[0])
if d not in node[1]:
node[1][d] = (word, {})
else:
self.insert(node[1][d], word)
return node
def query(self, word, n):
"Returns a list of words that have the specified edit distance from the search word."
def search(node):
d = self.df(word, node[0])
results = []
if d == n:
results.append(node[0])
for i in range(d-n, d+n+1):
children = node[1]
if i in children:
results.extend(search(node[1][i]))
return results
root = self.tree
return search(root)
if __name__ == '__main__':
words = ['cat', 'hat', 'tacos', 'monacles']
t = BkTree('', levenshtein)
t.build(words)
print t.query('mat', 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment