Skip to content

Instantly share code, notes, and snippets.

@sumchattering
Created March 2, 2012 06:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sumchattering/1956110 to your computer and use it in GitHub Desktop.
Save sumchattering/1956110 to your computer and use it in GitHub Desktop.
Python Word Chainer
# !/usr/bin/env python
# encoding: utf-8
# Written by Sumeru Chatterjee
import sys
import pdb
import os
import pprint
characters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
def main():
sys.setrecursionlimit(2000)
usage = '''\nWord Chainer.
\nFinds if a possible chain of operations exists between two words in a list
\nUsage\n-------\n%s <path to words file>'''%sys.argv[0]
if len(sys.argv)<2:
print usage
sys.exit(0)
wordfile = sys.argv[1];
try:
words = open(wordfile).read().splitlines()
chaingraph = WordChainGraph(words)
except IOError as e:
print 'There was a problem reading the file '+wordfile
sys.exit(0)
while(1):
inputwords = raw_input('\nEnter two words seperated by space :').split(' ')
if not len(inputwords)==2 or not inputwords[0] or not inputwords[1]:
print "Invalid Input"
continue
first = inputwords[0].lower()
second = inputwords[1].lower()
if not chaingraph.hasWord(first):
print "Word %s does not exist in list"%first
continue
if not chaingraph.hasWord(second):
print "Word %s does not exist in list"%second
continue
wordchain = chaingraph.findChainBetweenWords(first,second)
if len(wordchain):
print "The chain between the words is %s"%(",".join(wordchain))
else:
print "There is no chain between the given words"
class WordChainGraph(object):
def __init__(self,words):
self.initialize(words)
def __str__(self):
return "\n\nNodes -> \n" + str(self.nodeMap) + "\nEdges ->\n" + pprint.pformat(self.edgeMap)
def __repr__(self):
return self.__str__()
def hasWord(self,word):
return (word in self.nodeMap)
def findChainBetweenWords(self,word1,word2):
#Get the corresponding nodes
startNode = self.nodeMap[word1]
endNode = self.nodeMap[word2]
wordchain = []
#see if there is a path between them
path = self.findPathBetweenNodes(startNode,endNode)
if path:
for node in path:
wordchain.append(node.word)
return wordchain
def findPathBetweenNodes(self,start, end, path=[]):
todo = [[start, [start]]]
while 0 < len(todo):
(node, path) = todo.pop(0)
for next_node in self.edgeMap[node]:
if next_node in path:
continue
elif next_node == end:
return path + [next_node]
else:
todo.append([next_node, path + [next_node]])
def initialize(self,words):
print "Processing words...."
total = len(words);
current = 1
#Create the Nodes and the NodeMap which is a mapping between words and their corresponding node objects
self.nodeMap = {}
self.edgeMap = {}
for word in words:
lowerword = word.lower()
if lowerword in self.nodeMap:
continue;
else:
node = WordNode(lowerword)
self.nodeMap[lowerword] = node
#Now create the edgeMap which is a mapping between a node object and a list of all the nodes it is connected to
self.edgeMap = {}
for word in words:
#Show status
progress = (float(current)/float(total))*100;
sys.stdout.write("\r%f%%"%progress)
sys.stdout.flush()
current = current+1;
lowerword = word.lower()
currentNode = self.nodeMap[lowerword]
connectedNodes = set()
#check all words that can be created by insertions
for character in characters:
for index in range(0,len(lowerword)+1):
#Get a possible word from insertion
wordlist = list(lowerword)
wordlist.insert(index,character)
newword = "".join(wordlist)
if(newword!=lowerword) and (newword in self.nodeMap):
#get the new node
node = self.nodeMap[newword]
#add the node to the list of nodes for the current node
connectedNodes.add(node)
#add the current node to the list of nodes for the node
if(node in self.edgeMap):
self.edgeMap[node].add(currentNode)
else:
self.edgeMap[node] = set([currentNode])
#If the index is after the end then dont try replacing
if(index==len(lowerword)):
continue
#get a possible word from replacement
wordlist = list(lowerword)
wordlist[index] = character
newword = "".join(wordlist)
if (newword!=lowerword) and (newword in self.nodeMap):
#get the new node
node = self.nodeMap[newword]
#add the node to the list of nodes for the current node
connectedNodes.add(node)
#add the current node to the list of nodes for the node
if(node in self.edgeMap):
self.edgeMap[node].add(currentNode)
else:
self.edgeMap[node]=set([currentNode])
if(currentNode in self.edgeMap):
self.edgeMap[currentNode].update(connectedNodes)
else:
self.edgeMap[currentNode]=connectedNodes
class WordNode(object):
def __init__(self,word):
self.word = word
def __str__(self):
return "("+self.word+")"
def __repr__(self):
return self.__str__()
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment