Skip to content

Instantly share code, notes, and snippets.

@harrytallbelt
Last active June 10, 2020 17:09
Show Gist options
  • Save harrytallbelt/cd092daf0f36cfb1b4218361bdee7793 to your computer and use it in GitHub Desktop.
Save harrytallbelt/cd092daf0f36cfb1b4218361bdee7793 to your computer and use it in GitHub Desktop.
Finds all anagrams for given words (Python 3).
import math
import itertools
# Tested with dictionary from github.com/dwyl/english-words
WORDS_DICT_FILENAME = 'words_dictionary.json'
__dict = None
def get_valid_words():
global __dict
if __dict:
return __dict
with open(WORDS_DICT_FILENAME, 'r', encoding='utf-8') as dict_file:
__dict = eval(dict_file.read())
return __dict
def character_occurances(word):
res = {}
for c in word:
res[c] = res[c] + 1 if c in res else 1
return res
def are_anagrams(word1, word2):
if len(word1) != len(word2):
return False
return character_occurances(word1) == character_occurances(word2)
def gen_anagrams(word, valid_words):
word = word.lower()
permutation_number = math.factorial(len(word))
# If there are more possible character permutations of the input word,
# than there are known valid words, then it is faster to simply check
# every valid word for whether it's the input word's anagram.
if permutation_number > len(valid_words):
for dict_word in valid_words:
if are_anagrams(dict_word, word):
yield dict_word
return
# The word can have several occurances of one character, which means
# some of its character permutations can be equal to others. (Swapping 'o's
# in 'cowboy' doesn't change the word, but it is a valid permutation.)
# We do not filter those non-unique permutations out, as it would require
# storing all of them, which can take a lot of memory (factorial of
# the word's length). Instead, we use a set to filter out repeated anagrams.
anagrams = set()
permutations = map(''.join, itertools.permutations(word))
for possible_anagram in permutations:
if possible_anagram in valid_words and not possible_anagram in anagrams:
anagrams.add(possible_anagram)
yield possible_anagram
if __name__ == '__main__':
import sys
words = sys.argv[1:]
if not words:
print('No words given in command line arguments.', end='\n\n')
exit()
print('Initializing word list...', end='\n\n')
valid_words = get_valid_words()
for word in words:
word = word.lower()
perm_number = math.factorial(len(word))
print(f'Anagrams for "{word}" (out of {perm_number} options):')
for anagram in gen_anagrams(word, valid_words):
print('-', anagram)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment