Created
December 22, 2015 00:33
-
-
Save andrewtremblay/a4cac2140121366c30c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
all_words = nltk.corpus.words.words() | |
jumbled_letters = "andrewtremblay" | |
anagram_results = [] | |
len_full_phrase = len(jumbled_letters) | |
letter_distribution = nltk.FreqDist(jumbled_letters) | |
trimmed_lowercase_wordlist = [w.lower() for w in all_words if len(w) <= len_full_phrase] | |
matching_wordlist = [w for w in trimmed_lowercase_wordlist if nltk.FreqDist(w) <= letter_distribution] | |
sorted_wordlist = sorted(matching_wordlist, key = lambda s: len(s) * -1 ) | |
len_wordlist = len(sorted_wordlist) | |
#for faster runtime we move these out of the while loops | |
nospace = "".join | |
distribution = nltk.FreqDist | |
# | |
i_first_word = 0 | |
while i_first_word < len_wordlist: | |
first_word = sorted_wordlist[i_first_word] | |
len_first_word = len(first_word) | |
remaining_letters = list(jumbled_letters) | |
if len_first_word == len_full_phrase: | |
sentence = [first_word, "", ""] | |
anagram_results.append(sentence) | |
print "anagram found: ",sentence | |
elif len_first_word < len_full_phrase: | |
#strip the matching word from the remaining letters | |
first_word_chars = list(first_word) | |
for char in first_word_chars: | |
remaining_letters.remove(char) | |
i_second_word = i_first_word | |
while i_second_word < len_wordlist: | |
second_word = sorted_wordlist[i_second_word] | |
len_second_word = len(second_word) | |
if len_second_word + len_first_word == len_full_phrase: | |
sentence = [first_word, second_word, ""] | |
if distribution(nospace(sentence)) == letter_distribution: | |
anagram_results.append(sentence) | |
print "anagram found: ",sentence | |
elif len_second_word + len_first_word < len_full_phrase and nltk.FreqDist(second_word) <= nltk.FreqDist(remaining_letters): | |
#strip the matching word from the remaining letters | |
second_word_chars = list(second_word) | |
for char in second_word_chars: | |
remaining_letters.remove(char) | |
i_third_word = i_second_word | |
while i_third_word < len_wordlist: | |
third_word = sorted_wordlist[i_third_word] | |
len_third_word = len(third_word) | |
if len_first_word + len_second_word + len_third_word == len_full_phrase: | |
sentence = [first_word, second_word, third_word] | |
if distribution(nospace(sentence)) == letter_distribution: | |
anagram_results.append(sentence) | |
print "anagram found: ",sentence | |
#only go up to three words in the anagram | |
i_third_word += 1 | |
i_second_word += 1 | |
i_first_word += 1 | |
# | |
# write anagram results to file | |
# | |
filename = jumbled_letters+'_anagrams.csv' | |
anagram_results_file = open(filename, 'w') | |
for anagram in anagram_results: | |
line = ','.join(anagram) | |
anagram_results_file.write("\n"+line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment