Created
December 25, 2015 01:15
-
-
Save andrewtremblay/389da9fb27aac1c386cf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
import functools | |
def memoize_anagrams(obj): | |
cache = obj.cache = {} | |
jumbled_args_index = 0 | |
@functools.wraps(obj) | |
def memoizer(*args, **kwargs): | |
if args[jumbled_args_index] not in cache: | |
cache[args[jumbled_args_index]] = obj(*args, **kwargs) | |
return cache[args[jumbled_args_index]] | |
return memoizer | |
@memoize_anagrams | |
def get_anagrams(jumbled_letters, sorted_wordlist): | |
anagram_results = [] | |
base_list = list(sorted_wordlist) #copy | |
letter_distribution = nltk.FreqDist(jumbled_letters) | |
for w in base_list: | |
if nltk.FreqDist(w) <= letter_distribution and w is not None: | |
base_match = [ w ] #copy into a list | |
if len(w) < len(jumbled_letters): | |
trim_letters = list(jumbled_letters) | |
try: | |
for char in w: | |
trim_letters.remove(char) | |
except ValueError: #if a letter is not there: | |
continue #jump back to line 16 | |
remaining_jumbled = "".join(trim_letters) | |
# RECURSE the function, get results from the remaining jumbles | |
remaining_words = sorted_wordlist[sorted_wordlist.index(w):] | |
trimmed_matches = get_anagrams(remaining_jumbled, remaining_words) | |
for match in trimmed_matches: | |
if match is not None: | |
next_match = list(base_match) #copy | |
next_match.extend(match) | |
anagram_results.append(next_match) | |
#print "next matches appended:",next_match | |
elif len(w) == len(jumbled_letters) : | |
#print "intermediate match found:",w | |
anagram_results.append(base_match) | |
return anagram_results | |
def save_results(jumbled_letters, required_words, anagram_results): | |
filename = jumbled_letters+'_improved_anagrams.csv' | |
if len(required_words) > 0: | |
filename = "_".join(required_words)+"_"+filename | |
anagram_results_file = open(filename, 'w') | |
for anagram in anagram_results: | |
#print anagram | |
line = ','.join(anagram) | |
anagram_results_file.write("\n"+line) | |
def main(jumbled_letters): | |
jumbled_letters = jumbled_letters.lower() | |
required_words = [] | |
len_full_phrase = len(jumbled_letters) #fileids='en-basic') | |
min_word_length = 2 | |
trimmed_lowercase_wordlist = [w.lower() for w in nltk.corpus.words.words() if len(w) <= len_full_phrase and len(w) > min_word_length and nltk.FreqDist(w) <= nltk.FreqDist(jumbled_letters)] | |
trimmed_lowercase_wordlist = list(set(trimmed_lowercase_wordlist)) | |
sorted_wordlist = sorted(trimmed_lowercase_wordlist, key = lambda s: len(s) * -1 ) | |
results = get_anagrams(jumbled_letters, sorted_wordlist) | |
print results | |
save_results(jumbled_letters, required_words, results) | |
print "saved" | |
if __name__ == "__main__": | |
main("andrewtremblay") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment