Skip to content

Instantly share code, notes, and snippets.

@JakeAustwick
Created December 23, 2011 21:22
Show Gist options
  • Save JakeAustwick/1515386 to your computer and use it in GitHub Desktop.
Save JakeAustwick/1515386 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
#By Steve Hanov, 2011. Released to the public domain
import time
import sys
DICTIONARY = "/usr/share/dict/words";
TARGET = sys.argv[1]
MAX_COST = int(sys.argv[2])
# Keep some interesting statistics
NodeCount = 0
WordCount = 0
# The Trie data structure keeps a set of words, organized with one node for
# each letter. Each node has a branch for each letter that may follow it in the
# set of words.
class TrieNode:
def __init__(self):
self.word = None
self.children = {}
global NodeCount
NodeCount += 1
def insert( self, word ):
node = self
for letter in word:
if letter not in node.children:
node.children[letter] = TrieNode()
node = node.children[letter]
node.word = word
# read dictionary file into a trie
trie = TrieNode()
for word in open(DICTIONARY, "rt").read().split():
WordCount += 1
trie.insert( word )
print "Read %d words into %d nodes" % (WordCount, NodeCount)
# The search function returns a list of all words that are less than the given
# maximum distance from the target word
def search( word, maxCost ):
# build first row
currentRow = range( len(word) + 1 )
results = []
# recursively search each branch of the trie
for letter in trie.children:
searchRecursive( trie.children[letter], letter, word, currentRow,
results, maxCost )
return results
# This recursive helper is used by the search function above. It assumes that
# the previousRow has been filled in already.
def searchRecursive( node, letter, word, previousRow, results, maxCost ):
columns = len( word ) + 1
currentRow = [ previousRow[0] + 1 ]
# Build one row for the letter, with a column for each letter in the target
# word, plus one for the empty string at column 0
for column in xrange( 1, columns ):
insertCost = currentRow[column - 1] + 1
deleteCost = previousRow[column] + 1
if word[column - 1] != letter:
replaceCost = previousRow[ column - 1 ] + 1
else:
replaceCost = previousRow[ column - 1 ]
currentRow.append( min( insertCost, deleteCost, replaceCost ) )
# if the last entry in the row indicates the optimal cost is less than the
# maximum cost, and there is a word in this trie node, then add it.
if currentRow[-1] <= maxCost and node.word != None:
results.append( (node.word, currentRow[-1] ) )
# if any entries in the row are less than the maximum cost, then
# recursively search each branch of the trie
if min( currentRow ) <= maxCost:
for letter in node.children:
searchRecursive( node.children[letter], letter, word, currentRow,
results, maxCost )
start = time.time()
results = search( TARGET, MAX_COST )
end = time.time()
for result in results: print result
print "Search took %g s" % (end - start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment