Skip to content

Instantly share code, notes, and snippets.

@andrewtremblay
Created December 25, 2015 01:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewtremblay/389da9fb27aac1c386cf to your computer and use it in GitHub Desktop.
Save andrewtremblay/389da9fb27aac1c386cf to your computer and use it in GitHub Desktop.
import nltk
import functools
def memoize_anagrams(obj):
cache = obj.cache = {}
jumbled_args_index = 0
@functools.wraps(obj)
def memoizer(*args, **kwargs):
if args[jumbled_args_index] not in cache:
cache[args[jumbled_args_index]] = obj(*args, **kwargs)
return cache[args[jumbled_args_index]]
return memoizer
@memoize_anagrams
def get_anagrams(jumbled_letters, sorted_wordlist):
anagram_results = []
base_list = list(sorted_wordlist) #copy
letter_distribution = nltk.FreqDist(jumbled_letters)
for w in base_list:
if nltk.FreqDist(w) <= letter_distribution and w is not None:
base_match = [ w ] #copy into a list
if len(w) < len(jumbled_letters):
trim_letters = list(jumbled_letters)
try:
for char in w:
trim_letters.remove(char)
except ValueError: #if a letter is not there:
continue #jump back to line 16
remaining_jumbled = "".join(trim_letters)
# RECURSE the function, get results from the remaining jumbles
remaining_words = sorted_wordlist[sorted_wordlist.index(w):]
trimmed_matches = get_anagrams(remaining_jumbled, remaining_words)
for match in trimmed_matches:
if match is not None:
next_match = list(base_match) #copy
next_match.extend(match)
anagram_results.append(next_match)
#print "next matches appended:",next_match
elif len(w) == len(jumbled_letters) :
#print "intermediate match found:",w
anagram_results.append(base_match)
return anagram_results
def save_results(jumbled_letters, required_words, anagram_results):
filename = jumbled_letters+'_improved_anagrams.csv'
if len(required_words) > 0:
filename = "_".join(required_words)+"_"+filename
anagram_results_file = open(filename, 'w')
for anagram in anagram_results:
#print anagram
line = ','.join(anagram)
anagram_results_file.write("\n"+line)
def main(jumbled_letters):
jumbled_letters = jumbled_letters.lower()
required_words = []
len_full_phrase = len(jumbled_letters) #fileids='en-basic')
min_word_length = 2
trimmed_lowercase_wordlist = [w.lower() for w in nltk.corpus.words.words() if len(w) <= len_full_phrase and len(w) > min_word_length and nltk.FreqDist(w) <= nltk.FreqDist(jumbled_letters)]
trimmed_lowercase_wordlist = list(set(trimmed_lowercase_wordlist))
sorted_wordlist = sorted(trimmed_lowercase_wordlist, key = lambda s: len(s) * -1 )
results = get_anagrams(jumbled_letters, sorted_wordlist)
print results
save_results(jumbled_letters, required_words, results)
print "saved"
if __name__ == "__main__":
main("andrewtremblay")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment