Skip to content

Instantly share code, notes, and snippets.

@msmoot
Last active August 29, 2015 14:13
Show Gist options
  • Save msmoot/0a379397363cd67cdd1c to your computer and use it in GitHub Desktop.
Save msmoot/0a379397363cd67cdd1c to your computer and use it in GitHub Desktop.
# An anagram is a word formed by rearranging the letters of another, like "topside" and
# "deposit". In some cases, there might be as many (or more) anagrams than there are
# characters, like "post", "spot", "stop" and "tops".
#
# Write a program to find all of the anagrams in a dictionary in which there are at
# least 4 letters in the word and at least as many anagrams as there are letters.
#
# The dictionary will be a file on disk with one line per word. Your operating system
# likely already has such a file in /usr/dict/words or /usr/share/dict/words.
#
# The output produced by your code should be in this format:
#
# emit, item, mite, time
# merit, miter, mitre, remit, timer
# reins, resin, rinse, risen, serin, siren
# inert, inter, niter, retin, trine
# inset, neist, snite, stein, stine, tsine
from itertools import permutations
from collections import defaultdict
def loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999):
# Load dictionary found in 'dictFile'
# Only save words that are at least 'minLetters' letters long in the dictionary
# 'maxLetters' used for debug purposes (from debugging findAnagramsSlow())
return set(wordIn.strip() for wordIn in open(dictFile) if (minLetters <= len(wordIn.strip()) <= maxLetters))
def findAnagramsSlow(realWords):
# !!! xXx !!! - Do not run this with long words unless you are trying punish your computer
testedWords = set()
# Step 2: Find anagrams
for word in realWords:
if word not in testedWords:
# Step 2a: Find all permutations of each string
# !!! xXx !!! -- This step alone hangs for a single long word
toTest = set(''.join(perm) for perm in permutations(word))
# Step 2b: Search through permutations and test if they are in our word list
confirmed = []
for maybeAnagram in toTest:
# if maybeAnagram in (realWords - testedWords):
if maybeAnagram in realWords:
confirmed.append(maybeAnagram)
# Consider deleting words from word list as we use them to speed it up
# realWords.remove(maybeAnagram) ...nvm changing size during iteration is no bueno
# Commenting out the 'testedWords' updates speeds things up but every anagram is repeated
# testedWords |= set(confirmed)
# Step 2c: If there are enough anagrams, print them out (alphabetized)
if (len(confirmed) >= len(word)):
confirmed.sort()
print ', '.join(confirmed)
def groupAnagrams(realWords):
# Group all anagrams together
anagramsDict = defaultdict(list)
for word in realWords:
# Load into dictionary according to key that will be common only to anagrams
# Assume case sensitive
key = ''.join(sorted(word))
anagramsDict[key].append(word)
return anagramsDict
def printAnagrams(anagramsDict):
# Print anagrams when there are "at least as many anagrams as there are letters"
for key in anagramsDict.keys():
if len(anagramsDict[key]) >= len(key):
anagramsDict[key].sort()
print ', '.join(anagramsDict[key])
## Here's how to run it
realWords = loadWords(dictFile = '/usr/share/dict/words', minLetters = 4, maxLetters = 999)
anaDict = groupAnagrams(realWords)
printAnagrams(anaDict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment