Skip to content

Instantly share code, notes, and snippets.

@mfm24
Last active August 29, 2015 14:13
Show Gist options
  • Save mfm24/c05d9285b736b59c6cfa to your computer and use it in GitHub Desktop.
Save mfm24/c05d9285b736b59c6cfa to your computer and use it in GitHub Desktop.
Finds words composed of valid Scrabble subwords
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 10 18:34:00 2015
@author: matt
"""
from __future__ import print_function
from functools import wraps
from collections import defaultdict
path = "../localanagrampage/wordswithfriends.txt"
words = {x.strip()
for x in open(path, 'rt').readlines()}
def memo(func):
@wraps(func)
def wrapped(*args):
if args not in wrapped.cache:
wrapped.cache[args] = func(*args)
return wrapped.cache[args]
wrapped.cache = {}
return wrapped
@memo
def get_n_letter(n):
"""Return all words with n letters"""
return {x for x in words if len(x)==n}
@memo
def made_of_subwords_recursive(word, subwordlength=2):
lw = len(word)
if lw < subwordlength:
return False
ok = word[:subwordlength] in get_n_letter(subwordlength)
if lw > subwordlength:
return ok and made_of_subwords_recursive(word[1:], subwordlength)
else:
return ok
@memo
def get_composites(subwordlength=2):
return {x for x in words if made_of_subwords_recursive(x, subwordlength)}
answers = get_composites(2)
print("finished, found %s words composed of two-letter words" % len(answers))
print("\n".join(sorted(answers, key=len)))
@memo
def get_subwords_for_word(word, subwordlength=2):
"""
Return a set of all subwords in the word
"""
return set(word[x:x + subwordlength]
for x in range(len(word) - subwordlength +1))
# now lets take out the words that match the most pairs
def make_sub_word_dicts(words, subwordlength=2, ignorelist=None):
"""
Splits words into subwords and returns 2 dictionaries:
1.mapping subwords to a list of all words with containing the subword.
eg [hat, had] -> {ha: [hat, had], at:[hat], ad:[had]}
2. Mapping words to subwords, eg
eg [hat, had] -> {hat: [ha, at], had: [ha, ad]}
If ignore list is specified any subwords in there get ignored.
(with subwordlength=2)
"""
ignorelist = ignorelist or {}
sub_to_words = defaultdict(set)
words_to_sub = defaultdict(set)
for w in words:
for subword in get_subwords_for_word(w, subwordlength):
if subword in ignorelist:
continue
sub_to_words[subword].add(w)
words_to_sub[w].add(subword)
return sub_to_words, words_to_sub
def make_from_longest(subwordlength=2):
""" We take the shortest word with the most pairs, recalculate and repeat
"""
ignored = set()
out_words = []
while True:
sub, words = make_sub_word_dicts(get_composites(subwordlength),
subwordlength,
ignorelist=ignored)
if len(words) == 0:
break
new_word, subs = sorted(words.items(), key=lambda(k,v): len(v)*100-len(k))[-1]
ignored = ignored | set(subs)
#print ignored
out_words.append(new_word)
# print new_word, subs
return out_words
x = make_from_longest(2)
print("Found %s words encomapssing all two-letter words!" % len(x))
print("\n".join(x))
x = make_from_longest(3)
print("Found %s words encomapssing all three-letter words!" % len(x))
print("\n".join(x))
both = get_composites(2).intersection(get_composites(3))
print("Found %s words composed of both 2 and 3-letter words" % len(both))
print("\n".join(sorted(both, key=len)))
"""
One solution is (31 words)
kinetosomes set(['me', 'om', 'ki', 'ne', 'to', 'so', 'in', 'et', 'os', 'es'])
dishonored set(['on', 'no', 'ed', 'is', 'di', 're', 'sh', 'ho', 'or'])
tamoxifen set(['xi', 'mo', 'am', 'en', 'fe', 'ox', 'ta', 'if'])
myelitides set(['el', 'de', 'ye', 'it', 'li', 'ti', 'my', 'id'])
kabalas set(['ka', 'ab', 'ba', 'la', 'al', 'as'])
shadower set(['do', 'we', 'ad', 'ow', 'ha', 'er'])
pagoda set(['go', 'da', 'pa', 'ag', 'od'])
manumit set(['um', 'mi', 'ma', 'nu', 'an'])
boyar set(['ya', 'bo', 'ar', 'oy'])
rehemmed set(['em', 'mm', 'eh', 'he'])
unai set(['ai', 'na', 'un'])
lope set(['lo', 'pe', 'op'])
ofay set(['ay', 'of', 'fa'])
befit set(['be', 'ef', 'fi'])
nexus set(['ex', 'us', 'xu'])
ohm set(['hm', 'oh'])
ahi set(['ah', 'hi'])
joe set(['oe', 'jo'])
mut set(['mu', 'ut'])
zax set(['ax', 'za'])
pinup set(['pi', 'up'])
baaed set(['aa', 'ae'])
myoid set(['oi', 'yo'])
byelaw set(['by', 'aw'])
habitat set(['bi', 'at'])
si set(['si'])
qi set(['qi'])
uh set(['uh'])
wo set(['wo'])
gi set(['gi'])
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment