msmoot/planagrams

## planagrams
# An anagram is a word formed by rearranging the letters of another, like "topside" and
#  "deposit". In some cases, there might be as many (or more) anagrams than there are
#  characters, like "post", "spot", "stop" and "tops".
#
# Write a program to find all of the anagrams in a dictionary in which there are at
#   least 4 letters in the word and at least as many anagrams as there are letters.
#
# The dictionary will be a file on disk with one line per word. Your operating system
#   likely already has such a file in /usr/dict/words or /usr/share/dict/words.
#
# The output produced by your code should be in this format:
#
#	emit, item, mite, time
#	merit, miter, mitre, remit, timer
#	reins, resin, rinse, risen, serin, siren
#	inert, inter, niter, retin, trine
#	inset, neist, snite, stein, stine, tsine

from itertools import permutations
from collections import defaultdict

def loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999):
	# Load dictionary found in 'dictFile'
	#   Only save words that are at least 'minLetters' letters long in the dictionary
	#	'maxLetters' used for debug purposes (from debugging findAnagramsSlow())
	return set(wordIn.strip() for wordIn in open(dictFile) if (minLetters <= len(wordIn.strip()) <= maxLetters))

def findAnagramsSlow(realWords):
	# !!! xXx !!! - Do not run this with long words unless you are trying punish your computer
	testedWords = set()

	# Step 2: Find anagrams
	for word in realWords:
		if word not in testedWords:
			# Step 2a: Find all permutations of each string
			#  !!! xXx !!! -- This step alone hangs for a single long word
			toTest = set(''.join(perm) for perm in permutations(word))

			# Step 2b: Search through permutations and test if they are in our word list
			confirmed = []
			for maybeAnagram in toTest:
				# if maybeAnagram in (realWords - testedWords):
				if maybeAnagram in realWords:
					confirmed.append(maybeAnagram)
					# Consider deleting words from word list as we use them to speed it up
					# realWords.remove(maybeAnagram) ...nvm changing size during iteration is no bueno

			# Commenting out the 'testedWords' updates speeds things up but every anagram is repeated
			# testedWords |= set(confirmed)

			# Step 2c: If there are enough anagrams, print them out (alphabetized)
			if (len(confirmed) >= len(word)):
				confirmed.sort()
				print ', '.join(confirmed)

def groupAnagrams(realWords):
	# Group all anagrams together
	anagramsDict = defaultdict(list)
	for word in realWords:
		# Load into dictionary according to key that will be common only to anagrams
		#  Assume case sensitive
		key = ''.join(sorted(word))
		anagramsDict[key].append(word)
	return anagramsDict

def printAnagrams(anagramsDict):
	# Print anagrams when there are "at least as many anagrams as there are letters"
	for key in anagramsDict.keys():
		if len(anagramsDict[key]) >= len(key):
			anagramsDict[key].sort()
			print ', '.join(anagramsDict[key])


## Here's how to run it
realWords = loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999)
anaDict = groupAnagrams(realWords)
printAnagrams(anaDict)
	# An anagram is a word formed by rearranging the letters of another, like "topside" and
	# "deposit". In some cases, there might be as many (or more) anagrams than there are
	# characters, like "post", "spot", "stop" and "tops".
	#
	# Write a program to find all of the anagrams in a dictionary in which there are at
	# least 4 letters in the word and at least as many anagrams as there are letters.
	#
	# The dictionary will be a file on disk with one line per word. Your operating system
	# likely already has such a file in /usr/dict/words or /usr/share/dict/words.
	#
	# The output produced by your code should be in this format:
	#
	# emit, item, mite, time
	# merit, miter, mitre, remit, timer
	# reins, resin, rinse, risen, serin, siren
	# inert, inter, niter, retin, trine
	# inset, neist, snite, stein, stine, tsine

	from itertools import permutations
	from collections import defaultdict

	def loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999):
	# Load dictionary found in 'dictFile'
	# Only save words that are at least 'minLetters' letters long in the dictionary
	# 'maxLetters' used for debug purposes (from debugging findAnagramsSlow())
	return set(wordIn.strip() for wordIn in open(dictFile) if (minLetters <= len(wordIn.strip()) <= maxLetters))

	def findAnagramsSlow(realWords):
	# !!! xXx !!! - Do not run this with long words unless you are trying punish your computer
	testedWords = set()

	# Step 2: Find anagrams
	for word in realWords:
	if word not in testedWords:
	# Step 2a: Find all permutations of each string
	# !!! xXx !!! -- This step alone hangs for a single long word
	toTest = set(''.join(perm) for perm in permutations(word))

	# Step 2b: Search through permutations and test if they are in our word list
	confirmed = []
	for maybeAnagram in toTest:
	# if maybeAnagram in (realWords - testedWords):
	if maybeAnagram in realWords:
	confirmed.append(maybeAnagram)
	# Consider deleting words from word list as we use them to speed it up
	# realWords.remove(maybeAnagram) ...nvm changing size during iteration is no bueno

	# Commenting out the 'testedWords' updates speeds things up but every anagram is repeated
	# testedWords \|= set(confirmed)

	# Step 2c: If there are enough anagrams, print them out (alphabetized)
	if (len(confirmed) >= len(word)):
	confirmed.sort()
	print ', '.join(confirmed)

	def groupAnagrams(realWords):
	# Group all anagrams together
	anagramsDict = defaultdict(list)
	for word in realWords:
	# Load into dictionary according to key that will be common only to anagrams
	# Assume case sensitive
	key = ''.join(sorted(word))
	anagramsDict[key].append(word)
	return anagramsDict

	def printAnagrams(anagramsDict):
	# Print anagrams when there are "at least as many anagrams as there are letters"
	for key in anagramsDict.keys():
	if len(anagramsDict[key]) >= len(key):
	anagramsDict[key].sort()
	print ', '.join(anagramsDict[key])



	## Here's how to run it
	realWords = loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999)
	anaDict = groupAnagrams(realWords)
	printAnagrams(anaDict)