damzam/jumble_it_up.py

## jumble_it_up.py
#!/usr/bin/env python
"""
A script that generates all possible words that can be formed from
the characters in a provided string.

Matches are case-insensitive and the wordlist used is obtained from:

https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist

Usage:

./jumble_it_up.py <string>

* Single-letter words will not be included in the output
** If, in the case that the number of occurrences of a single character
   in a word exceeds the number of that character in the string, it will
   not be included in the output
"""
import argparse
from collections import defaultdict
from urllib import urlopen

WORDLIST_URL = 'https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist'


def generate_wordlist(string, wordlist):
    """
    Return a list of all valid English words that can be generated
    from the characters in parameter string (see the __doc__ above)
    """
    def _get_char_dict(chars):
        """
        A utility function that creates a dictionary with the
        respective counts of each character in a string
        """
        char_dict = defaultdict(int)
        for char in chars:
            char_dict[char] += 1
        return char_dict

    # We want the characters and respective counts of the string provided
    string_dict = _get_char_dict(string)

    matches = []

    # Iterate through the words and see if can create them from the characters in string
    for word in wordlist:
        if len(word) > len(string):
            continue  # It's not a match if the word's longer than string

        # We also want the characters and respective counts of the word
        word_dict = _get_char_dict(word)

        # We're using the variable 'possible match' to break out of a nested loop
        # without raising exceptions (which are really slow). The other option
        # would be to use enumerate, but that would also be significantly slower.
        possible_match = True

        while possible_match:
            for char in word_dict.iterkeys():
                if char not in string_dict or word_dict[char] > string_dict[char]:
                    possible_match = False
                    break
            if possible_match and len(word) != 1:  # We're not returning single characters
                matches.append(word)
            break

    return matches


def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('string', help='available characters')
    p = parser.parse_args()
    wordlist = urlopen(WORDLIST_URL).read().lower().split()
    for match in generate_wordlist(p.string.lower(), wordlist):
        print match

if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	"""
	A script that generates all possible words that can be formed from
	the characters in a provided string.

	Matches are case-insensitive and the wordlist used is obtained from:

	https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist

	Usage:

	./jumble_it_up.py <string>

	* Single-letter words will not be included in the output
	** If, in the case that the number of occurrences of a single character
	in a word exceeds the number of that character in the string, it will
	not be included in the output
	"""
	import argparse
	from collections import defaultdict
	from urllib import urlopen

	WORDLIST_URL = 'https://raw.githubusercontent.com/mattt/mobius/master/data/english-wordlist'


	def generate_wordlist(string, wordlist):
	"""
	Return a list of all valid English words that can be generated
	from the characters in parameter string (see the __doc__ above)
	"""
	def _get_char_dict(chars):
	"""
	A utility function that creates a dictionary with the
	respective counts of each character in a string
	"""
	char_dict = defaultdict(int)
	for char in chars:
	char_dict[char] += 1
	return char_dict

	# We want the characters and respective counts of the string provided
	string_dict = _get_char_dict(string)

	matches = []

	# Iterate through the words and see if can create them from the characters in string
	for word in wordlist:
	if len(word) > len(string):
	continue # It's not a match if the word's longer than string

	# We also want the characters and respective counts of the word
	word_dict = _get_char_dict(word)

	# We're using the variable 'possible match' to break out of a nested loop
	# without raising exceptions (which are really slow). The other option
	# would be to use enumerate, but that would also be significantly slower.
	possible_match = True

	while possible_match:
	for char in word_dict.iterkeys():
	if char not in string_dict or word_dict[char] > string_dict[char]:
	possible_match = False
	break
	if possible_match and len(word) != 1: # We're not returning single characters
	matches.append(word)
	break

	return matches


	def main():
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument('string', help='available characters')
	p = parser.parse_args()
	wordlist = urlopen(WORDLIST_URL).read().lower().split()
	for match in generate_wordlist(p.string.lower(), wordlist):
	print match

	if __name__ == '__main__':
	main()