Last active
March 23, 2016 17:46
-
-
Save mtimkovich/10183495 to your computer and use it in GitHub Desktop.
Python code to generate word ladders using A* search. Should find the shortest word path between any two words. The word list I'm using can be found at http://norvig.com/ngrams/TWL06.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from heapq import * | |
import string | |
import sys | |
class WordNode: | |
def __init__(self, word, goal, distance, parent): | |
self.word = word | |
self.distance = distance | |
self.goal = goal | |
self.heuristic = self.letter_difference(self.goal) | |
self.parent = parent | |
def letter_difference(self, goal): | |
return len([i for i in range(len(self.word)) if self.word[i] != goal[i]]) | |
def cost(self): | |
return self.distance + self.heuristic | |
# Generate a list of words that are one letter off from this word | |
# Check that they are in the dictionary before adding to list | |
def get_children(self): | |
children = [] | |
for i in range(len(self.word)): | |
for l in string.ascii_uppercase: | |
child = self.word[:i] + l + self.word[i+1:] | |
if child != self.word and child in word_list: | |
children.append(WordNode(child, self.goal, self.distance+1, self)) | |
return children | |
def path(self): | |
path = [] | |
node = self | |
while node is not None: | |
path.append(node) | |
node = node.parent | |
return path[::-1] | |
def __lt__(self, other): | |
return self.cost() < other.cost() | |
def __eq__(self, other): | |
return self.cost() == other.cost() | |
# def __repr__(self): | |
# return str((self.word, self.distance, self.heuristic)) | |
def __repr__(self): | |
return self.word | |
if __name__ == "__main__": | |
if len(sys.argv) < 3: | |
print "Usage {}: start end".format(sys.argv[0]) | |
exit() | |
start = sys.argv[1].upper() | |
goal = sys.argv[2].upper() | |
if len(start) != len(goal): | |
print "Start and goal words must be the same length" | |
exit() | |
with open("TWL06.txt") as file: | |
word_list = file.read() | |
word_list = set(word_list.splitlines()) | |
if start not in word_list or goal not in word_list: | |
print "Start or goal is not a valid word" | |
exit() | |
frontier = [] | |
visited = set() | |
heappush(frontier, WordNode(start, goal, 0, None)) | |
while True: | |
if frontier: | |
word = heappop(frontier) | |
visited.add(word.word) | |
else: | |
print "Path not possible" | |
break | |
if word.word == goal: | |
for node in word.path(): | |
print node | |
break | |
for child in word.get_children(): | |
if child.word not in visited: | |
heappush(frontier, child) | |
# print frontier |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment