Skip to content

Instantly share code, notes, and snippets.

@damzam
Last active August 29, 2015 14:04
Show Gist options
  • Save damzam/499e1eec2a84e558140d to your computer and use it in GitHub Desktop.
Save damzam/499e1eec2a84e558140d to your computer and use it in GitHub Desktop.
solve a word jumble
#!/usr/bin/env python
"""
A script that generates all possible words that can be formed from
the characters in a provided string.
Matches are case-insensitive and the wordlist used is obtained from:
https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist
Usage:
./jumble_it_up.py <string>
* Single-letter words will not be included in the output
** If, in the case that the number of occurrences of a single character
in a word exceeds the number of that character in the string, it will
not be included in the output
"""
import argparse
from collections import defaultdict
from urllib import urlopen
WORDLIST_URL = 'https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist'
def generate_wordlist(string, wordlist):
"""
Return a list of all valid English words that can be generated
from the characters in parameter string (see the __doc__ above)
"""
def _get_char_dict(chars):
"""
A utility function that creates a dictionary with the
respective counts of each character in a string
"""
char_dict = defaultdict(int)
for char in chars:
char_dict[char] += 1
return char_dict
# We want the characters and respective counts of the string provided
string_dict = _get_char_dict(string)
matches = []
# Iterate through the words and see if can create them from the characters in string
for word in wordlist:
if len(word) > len(string):
continue # It's not a match if the word's longer than string
# We also want the characters and respective counts of the word
word_dict = _get_char_dict(word)
# We're using the variable 'possible match' to break out of a nested loop
# without raising exceptions (which are really slow). The other option
# would be to use enumerate, but that would also be significantly slower.
possible_match = True
while possible_match:
for char in word_dict.iterkeys():
if char not in string_dict or word_dict[char] > string_dict[char]:
possible_match = False
break
if possible_match and len(word) != 1: # We're not returning single characters
matches.append(word)
break
return matches
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('string', help='available characters')
p = parser.parse_args()
wordlist = urlopen(WORDLIST_URL).read().lower().split()
for match in generate_wordlist(p.string.lower(), wordlist):
print match
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment