Last active
August 29, 2015 14:13
-
-
Save mfm24/c05d9285b736b59c6cfa to your computer and use it in GitHub Desktop.
Finds words composed of valid Scrabble subwords
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Sat Jan 10 18:34:00 2015 | |
@author: matt | |
""" | |
from __future__ import print_function | |
from functools import wraps | |
from collections import defaultdict | |
path = "../localanagrampage/wordswithfriends.txt" | |
words = {x.strip() | |
for x in open(path, 'rt').readlines()} | |
def memo(func): | |
@wraps(func) | |
def wrapped(*args): | |
if args not in wrapped.cache: | |
wrapped.cache[args] = func(*args) | |
return wrapped.cache[args] | |
wrapped.cache = {} | |
return wrapped | |
@memo | |
def get_n_letter(n): | |
"""Return all words with n letters""" | |
return {x for x in words if len(x)==n} | |
@memo | |
def made_of_subwords_recursive(word, subwordlength=2): | |
lw = len(word) | |
if lw < subwordlength: | |
return False | |
ok = word[:subwordlength] in get_n_letter(subwordlength) | |
if lw > subwordlength: | |
return ok and made_of_subwords_recursive(word[1:], subwordlength) | |
else: | |
return ok | |
@memo | |
def get_composites(subwordlength=2): | |
return {x for x in words if made_of_subwords_recursive(x, subwordlength)} | |
answers = get_composites(2) | |
print("finished, found %s words composed of two-letter words" % len(answers)) | |
print("\n".join(sorted(answers, key=len))) | |
@memo | |
def get_subwords_for_word(word, subwordlength=2): | |
""" | |
Return a set of all subwords in the word | |
""" | |
return set(word[x:x + subwordlength] | |
for x in range(len(word) - subwordlength +1)) | |
# now lets take out the words that match the most pairs | |
def make_sub_word_dicts(words, subwordlength=2, ignorelist=None): | |
""" | |
Splits words into subwords and returns 2 dictionaries: | |
1.mapping subwords to a list of all words with containing the subword. | |
eg [hat, had] -> {ha: [hat, had], at:[hat], ad:[had]} | |
2. Mapping words to subwords, eg | |
eg [hat, had] -> {hat: [ha, at], had: [ha, ad]} | |
If ignore list is specified any subwords in there get ignored. | |
(with subwordlength=2) | |
""" | |
ignorelist = ignorelist or {} | |
sub_to_words = defaultdict(set) | |
words_to_sub = defaultdict(set) | |
for w in words: | |
for subword in get_subwords_for_word(w, subwordlength): | |
if subword in ignorelist: | |
continue | |
sub_to_words[subword].add(w) | |
words_to_sub[w].add(subword) | |
return sub_to_words, words_to_sub | |
def make_from_longest(subwordlength=2): | |
""" We take the shortest word with the most pairs, recalculate and repeat | |
""" | |
ignored = set() | |
out_words = [] | |
while True: | |
sub, words = make_sub_word_dicts(get_composites(subwordlength), | |
subwordlength, | |
ignorelist=ignored) | |
if len(words) == 0: | |
break | |
new_word, subs = sorted(words.items(), key=lambda(k,v): len(v)*100-len(k))[-1] | |
ignored = ignored | set(subs) | |
#print ignored | |
out_words.append(new_word) | |
# print new_word, subs | |
return out_words | |
x = make_from_longest(2) | |
print("Found %s words encomapssing all two-letter words!" % len(x)) | |
print("\n".join(x)) | |
x = make_from_longest(3) | |
print("Found %s words encomapssing all three-letter words!" % len(x)) | |
print("\n".join(x)) | |
both = get_composites(2).intersection(get_composites(3)) | |
print("Found %s words composed of both 2 and 3-letter words" % len(both)) | |
print("\n".join(sorted(both, key=len))) | |
""" | |
One solution is (31 words) | |
kinetosomes set(['me', 'om', 'ki', 'ne', 'to', 'so', 'in', 'et', 'os', 'es']) | |
dishonored set(['on', 'no', 'ed', 'is', 'di', 're', 'sh', 'ho', 'or']) | |
tamoxifen set(['xi', 'mo', 'am', 'en', 'fe', 'ox', 'ta', 'if']) | |
myelitides set(['el', 'de', 'ye', 'it', 'li', 'ti', 'my', 'id']) | |
kabalas set(['ka', 'ab', 'ba', 'la', 'al', 'as']) | |
shadower set(['do', 'we', 'ad', 'ow', 'ha', 'er']) | |
pagoda set(['go', 'da', 'pa', 'ag', 'od']) | |
manumit set(['um', 'mi', 'ma', 'nu', 'an']) | |
boyar set(['ya', 'bo', 'ar', 'oy']) | |
rehemmed set(['em', 'mm', 'eh', 'he']) | |
unai set(['ai', 'na', 'un']) | |
lope set(['lo', 'pe', 'op']) | |
ofay set(['ay', 'of', 'fa']) | |
befit set(['be', 'ef', 'fi']) | |
nexus set(['ex', 'us', 'xu']) | |
ohm set(['hm', 'oh']) | |
ahi set(['ah', 'hi']) | |
joe set(['oe', 'jo']) | |
mut set(['mu', 'ut']) | |
zax set(['ax', 'za']) | |
pinup set(['pi', 'up']) | |
baaed set(['aa', 'ae']) | |
myoid set(['oi', 'yo']) | |
byelaw set(['by', 'aw']) | |
habitat set(['bi', 'at']) | |
si set(['si']) | |
qi set(['qi']) | |
uh set(['uh']) | |
wo set(['wo']) | |
gi set(['gi']) | |
""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment