mfm24/scrabble_pair_words.py

## scrabble_pair_words.py
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 10 18:34:00 2015

@author: matt
"""
from __future__ import print_function
from functools import wraps
from collections import defaultdict
path = "../localanagrampage/wordswithfriends.txt"
words = {x.strip()
         for x in open(path, 'rt').readlines()}

def memo(func):
    @wraps(func)
    def wrapped(*args):
        if args not in wrapped.cache:
            wrapped.cache[args] = func(*args)
        return wrapped.cache[args]
    wrapped.cache = {}
    return wrapped

@memo
def get_n_letter(n):
    """Return all words with n letters"""
    return {x for x in words if len(x)==n}

@memo
def made_of_subwords_recursive(word, subwordlength=2):
    lw = len(word)
    if lw < subwordlength:
        return False
    ok = word[:subwordlength] in get_n_letter(subwordlength)
    if lw > subwordlength:
        return ok and made_of_subwords_recursive(word[1:], subwordlength)
    else:
        return ok

@memo
def get_composites(subwordlength=2):
    return {x for x in words if made_of_subwords_recursive(x, subwordlength)}
answers = get_composites(2)
print("finished, found %s words composed of two-letter words" % len(answers))
print("\n".join(sorted(answers, key=len)))

@memo
def get_subwords_for_word(word, subwordlength=2):
    """
    Return a set of all subwords in the word
    """
    return set(word[x:x + subwordlength]
               for x in range(len(word) - subwordlength +1))

# now lets take out the words that match the most pairs
def make_sub_word_dicts(words, subwordlength=2, ignorelist=None):
    """
    Splits words into subwords and returns 2 dictionaries:
    1.mapping subwords to a list of all words with containing the subword.
    eg [hat, had] -> {ha: [hat, had], at:[hat], ad:[had]}
    2. Mapping words to subwords, eg
    eg [hat, had] -> {hat: [ha, at], had: [ha, ad]}
    If ignore list is specified any subwords in there get ignored.
    (with subwordlength=2)
    """
    ignorelist = ignorelist or {}
    sub_to_words = defaultdict(set)
    words_to_sub = defaultdict(set)
    for w in words:
        for subword in get_subwords_for_word(w, subwordlength):
            if subword in ignorelist:
                continue
            sub_to_words[subword].add(w)
            words_to_sub[w].add(subword)

    return sub_to_words, words_to_sub

def make_from_longest(subwordlength=2):
    """ We take the shortest word with the most pairs, recalculate and repeat
    """
    ignored = set()
    out_words = []
    while True:
        sub, words = make_sub_word_dicts(get_composites(subwordlength),
                                         subwordlength,
                                         ignorelist=ignored)
        if len(words) == 0:
            break
        new_word, subs = sorted(words.items(), key=lambda(k,v): len(v)*100-len(k))[-1]
        ignored = ignored | set(subs)
        #print ignored
        out_words.append(new_word)
        # print new_word, subs
    return out_words

x = make_from_longest(2)
print("Found %s words encomapssing all two-letter words!" % len(x))
print("\n".join(x))

x = make_from_longest(3)
print("Found %s words encomapssing all three-letter words!" % len(x))
print("\n".join(x))

both = get_composites(2).intersection(get_composites(3))
print("Found %s words composed of both 2 and 3-letter words" % len(both))
print("\n".join(sorted(both, key=len)))

"""
One solution is (31 words)
kinetosomes set(['me', 'om', 'ki', 'ne', 'to', 'so', 'in', 'et', 'os', 'es'])
dishonored set(['on', 'no', 'ed', 'is', 'di', 're', 'sh', 'ho', 'or'])
tamoxifen set(['xi', 'mo', 'am', 'en', 'fe', 'ox', 'ta', 'if'])
myelitides set(['el', 'de', 'ye', 'it', 'li', 'ti', 'my', 'id'])
kabalas set(['ka', 'ab', 'ba', 'la', 'al', 'as'])
shadower set(['do', 'we', 'ad', 'ow', 'ha', 'er'])
pagoda set(['go', 'da', 'pa', 'ag', 'od'])
manumit set(['um', 'mi', 'ma', 'nu', 'an'])
boyar set(['ya', 'bo', 'ar', 'oy'])
rehemmed set(['em', 'mm', 'eh', 'he'])
unai set(['ai', 'na', 'un'])
lope set(['lo', 'pe', 'op'])
ofay set(['ay', 'of', 'fa'])
befit set(['be', 'ef', 'fi'])
nexus set(['ex', 'us', 'xu'])
ohm set(['hm', 'oh'])
ahi set(['ah', 'hi'])
joe set(['oe', 'jo'])
mut set(['mu', 'ut'])
zax set(['ax', 'za'])
pinup set(['pi', 'up'])
baaed set(['aa', 'ae'])
myoid set(['oi', 'yo'])
byelaw set(['by', 'aw'])
habitat set(['bi', 'at'])
si set(['si'])
qi set(['qi'])
uh set(['uh'])
wo set(['wo'])
gi set(['gi'])
"""
	# -- coding: utf-8 --
	"""
	Created on Sat Jan 10 18:34:00 2015

	@author: matt
	"""
	from __future__ import print_function
	from functools import wraps
	from collections import defaultdict
	path = "../localanagrampage/wordswithfriends.txt"
	words = {x.strip()
	for x in open(path, 'rt').readlines()}

	def memo(func):
	@wraps(func)
	def wrapped(*args):
	if args not in wrapped.cache:
	wrapped.cache[args] = func(*args)
	return wrapped.cache[args]
	wrapped.cache = {}
	return wrapped

	@memo
	def get_n_letter(n):
	"""Return all words with n letters"""
	return {x for x in words if len(x)==n}

	@memo
	def made_of_subwords_recursive(word, subwordlength=2):
	lw = len(word)
	if lw < subwordlength:
	return False
	ok = word[:subwordlength] in get_n_letter(subwordlength)
	if lw > subwordlength:
	return ok and made_of_subwords_recursive(word[1:], subwordlength)
	else:
	return ok

	@memo
	def get_composites(subwordlength=2):
	return {x for x in words if made_of_subwords_recursive(x, subwordlength)}
	answers = get_composites(2)
	print("finished, found %s words composed of two-letter words" % len(answers))
	print("\n".join(sorted(answers, key=len)))

	@memo
	def get_subwords_for_word(word, subwordlength=2):
	"""
	Return a set of all subwords in the word
	"""
	return set(word[x:x + subwordlength]
	for x in range(len(word) - subwordlength +1))

	# now lets take out the words that match the most pairs
	def make_sub_word_dicts(words, subwordlength=2, ignorelist=None):
	"""
	Splits words into subwords and returns 2 dictionaries:
	1.mapping subwords to a list of all words with containing the subword.
	eg [hat, had] -> {ha: [hat, had], at:[hat], ad:[had]}
	2. Mapping words to subwords, eg
	eg [hat, had] -> {hat: [ha, at], had: [ha, ad]}
	If ignore list is specified any subwords in there get ignored.
	(with subwordlength=2)
	"""
	ignorelist = ignorelist or {}
	sub_to_words = defaultdict(set)
	words_to_sub = defaultdict(set)
	for w in words:
	for subword in get_subwords_for_word(w, subwordlength):
	if subword in ignorelist:
	continue
	sub_to_words[subword].add(w)
	words_to_sub[w].add(subword)

	return sub_to_words, words_to_sub

	def make_from_longest(subwordlength=2):
	""" We take the shortest word with the most pairs, recalculate and repeat
	"""
	ignored = set()
	out_words = []
	while True:
	sub, words = make_sub_word_dicts(get_composites(subwordlength),
	subwordlength,
	ignorelist=ignored)
	if len(words) == 0:
	break
	new_word, subs = sorted(words.items(), key=lambda(k,v): len(v)*100-len(k))[-1]
	ignored = ignored \| set(subs)
	#print ignored
	out_words.append(new_word)
	# print new_word, subs
	return out_words

	x = make_from_longest(2)
	print("Found %s words encomapssing all two-letter words!" % len(x))
	print("\n".join(x))

	x = make_from_longest(3)
	print("Found %s words encomapssing all three-letter words!" % len(x))
	print("\n".join(x))

	both = get_composites(2).intersection(get_composites(3))
	print("Found %s words composed of both 2 and 3-letter words" % len(both))
	print("\n".join(sorted(both, key=len)))

	"""
	One solution is (31 words)
	kinetosomes set(['me', 'om', 'ki', 'ne', 'to', 'so', 'in', 'et', 'os', 'es'])
	dishonored set(['on', 'no', 'ed', 'is', 'di', 're', 'sh', 'ho', 'or'])
	tamoxifen set(['xi', 'mo', 'am', 'en', 'fe', 'ox', 'ta', 'if'])
	myelitides set(['el', 'de', 'ye', 'it', 'li', 'ti', 'my', 'id'])
	kabalas set(['ka', 'ab', 'ba', 'la', 'al', 'as'])
	shadower set(['do', 'we', 'ad', 'ow', 'ha', 'er'])
	pagoda set(['go', 'da', 'pa', 'ag', 'od'])
	manumit set(['um', 'mi', 'ma', 'nu', 'an'])
	boyar set(['ya', 'bo', 'ar', 'oy'])
	rehemmed set(['em', 'mm', 'eh', 'he'])
	unai set(['ai', 'na', 'un'])
	lope set(['lo', 'pe', 'op'])
	ofay set(['ay', 'of', 'fa'])
	befit set(['be', 'ef', 'fi'])
	nexus set(['ex', 'us', 'xu'])
	ohm set(['hm', 'oh'])
	ahi set(['ah', 'hi'])
	joe set(['oe', 'jo'])
	mut set(['mu', 'ut'])
	zax set(['ax', 'za'])
	pinup set(['pi', 'up'])
	baaed set(['aa', 'ae'])
	myoid set(['oi', 'yo'])
	byelaw set(['by', 'aw'])
	habitat set(['bi', 'at'])
	si set(['si'])
	qi set(['qi'])
	uh set(['uh'])
	wo set(['wo'])
	gi set(['gi'])
	"""