Last active
August 29, 2015 14:13
-
-
Save msmoot/0a379397363cd67cdd1c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# An anagram is a word formed by rearranging the letters of another, like "topside" and | |
# "deposit". In some cases, there might be as many (or more) anagrams than there are | |
# characters, like "post", "spot", "stop" and "tops". | |
# | |
# Write a program to find all of the anagrams in a dictionary in which there are at | |
# least 4 letters in the word and at least as many anagrams as there are letters. | |
# | |
# The dictionary will be a file on disk with one line per word. Your operating system | |
# likely already has such a file in /usr/dict/words or /usr/share/dict/words. | |
# | |
# The output produced by your code should be in this format: | |
# | |
# emit, item, mite, time | |
# merit, miter, mitre, remit, timer | |
# reins, resin, rinse, risen, serin, siren | |
# inert, inter, niter, retin, trine | |
# inset, neist, snite, stein, stine, tsine | |
from itertools import permutations | |
from collections import defaultdict | |
def loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999): | |
# Load dictionary found in 'dictFile' | |
# Only save words that are at least 'minLetters' letters long in the dictionary | |
# 'maxLetters' used for debug purposes (from debugging findAnagramsSlow()) | |
return set(wordIn.strip() for wordIn in open(dictFile) if (minLetters <= len(wordIn.strip()) <= maxLetters)) | |
def findAnagramsSlow(realWords): | |
# !!! xXx !!! - Do not run this with long words unless you are trying punish your computer | |
testedWords = set() | |
# Step 2: Find anagrams | |
for word in realWords: | |
if word not in testedWords: | |
# Step 2a: Find all permutations of each string | |
# !!! xXx !!! -- This step alone hangs for a single long word | |
toTest = set(''.join(perm) for perm in permutations(word)) | |
# Step 2b: Search through permutations and test if they are in our word list | |
confirmed = [] | |
for maybeAnagram in toTest: | |
# if maybeAnagram in (realWords - testedWords): | |
if maybeAnagram in realWords: | |
confirmed.append(maybeAnagram) | |
# Consider deleting words from word list as we use them to speed it up | |
# realWords.remove(maybeAnagram) ...nvm changing size during iteration is no bueno | |
# Commenting out the 'testedWords' updates speeds things up but every anagram is repeated | |
# testedWords |= set(confirmed) | |
# Step 2c: If there are enough anagrams, print them out (alphabetized) | |
if (len(confirmed) >= len(word)): | |
confirmed.sort() | |
print ', '.join(confirmed) | |
def groupAnagrams(realWords): | |
# Group all anagrams together | |
anagramsDict = defaultdict(list) | |
for word in realWords: | |
# Load into dictionary according to key that will be common only to anagrams | |
# Assume case sensitive | |
key = ''.join(sorted(word)) | |
anagramsDict[key].append(word) | |
return anagramsDict | |
def printAnagrams(anagramsDict): | |
# Print anagrams when there are "at least as many anagrams as there are letters" | |
for key in anagramsDict.keys(): | |
if len(anagramsDict[key]) >= len(key): | |
anagramsDict[key].sort() | |
print ', '.join(anagramsDict[key]) | |
## Here's how to run it | |
realWords = loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999) | |
anaDict = groupAnagrams(realWords) | |
printAnagrams(anaDict) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment