Skip to content

Instantly share code, notes, and snippets.

@baconator
Created February 22, 2015 22:50
Show Gist options
  • Save baconator/40e983017e8c6fa310d3 to your computer and use it in GitHub Desktop.
Save baconator/40e983017e8c6fa310d3 to your computer and use it in GitHub Desktop.
Word Combinations
import sys, argparse
from itertools import product, compress, combinations
from collections import defaultdict
'''
So a (valid) word combination is made up of
1. a series of words that max out at the target length
that for every masked result:
2. do _not_ match a previous masked result
3. _match_ a word in the dictionary
'''
parser = argparse.ArgumentParser(description="Finds word mask subsets.")
parser.add_argument("length", type=int, help="A target word length to fill.")
parser.add_argument("source", type=str, help="The path from where to load the word dictionary.")
args = parser.parse_args()
target_length = args.length
def noRepeats(word):
observed = set()
for letter in word:
if letter in observed:
return False
else:
observed.add(letter)
return True
def indexListToMask(index_list, length):
output = [True] * length
for i in index_list:
output[i] = False
return output
vowels = set("aeiouy")
# Remove non-boundary vowels
def disemvowel(word):
if len(word) <= 2:
yield word
else:
first = word[0]
last = word[-1]
middle = word[1:-1]
middle_vowels = [i for i, v in enumerate(middle) if v in vowels]
# generate every combination of including (or excluding) the vowels
for i in range(len(middle_vowels)+1):
for index_list in combinations(middle_vowels, i):
mask = indexListToMask(index_list, len(middle))
masked_word = first + "".join(list(compress(middle, mask))) + last
yield masked_word
def validCombination(words, masks, lexicon):
masked_results = set()
combination = "".join(words)
for mask in masks:
masked_word = "".join(compress(combination, mask))
if len(masked_word) == 0: # skip the trivial case
continue
if masked_word in masked_results or masked_word not in lexicon or not noRepeats(masked_word):
return False
else:
masked_results.add(masked_word)
return True
# shamelessly stolen from http://stackoverflow.com/questions/10035752/elegant-python-code-for-integer-partitioning
def partitions(number):
answer = set()
answer.add((number, ))
for x in range(1, number):
for y in partitions(number - x):
answer.add((x, ) + y)
return answer
def loadLines(path):
with open(path, "r") as f:
line = f.readline()
while(line):
yield line
line = f.readline()
if __name__ != "__main__":
sys.exit(0)
lines = set(line[0:-1].lower() for line in loadLines(args.source))
disemvoweled_words = (word for line in lines for word in disemvowel(line)) # disemvoweling helps find more possibilities, at the expense of legibility
words = set(word for word in disemvoweled_words if noRepeats(word))
words.update("abcdefghijklmnopqrstuvwxyz") # add in single letters ...
words.add("") # ... and the empty string.
word_lookup = defaultdict(list) # build up an index for finding words of length 'n'
for word in words:
word_lookup[len(word)].append(word)
pleasing_partitions = [(6,)]
# uncomment the below to generate more than just one word combinations -- note that they take a pretty big hit in readability
#pleasing_partitions = (partition for partition in partitions(target_length) if 1 not in partition)
word_lists = ([word_lookup[p] for p in partition] for partition in pleasing_partitions)
# unpack the nested word lists and generate their cartesian products, filtering for valid combinations
valid_words = ("".join(combination) for word_list in word_lists for combination in product(*word_list) if validCombination(combination, product([True, False], repeat=target_length), words))
for valid_word in valid_words:
print(valid_word)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment