mstepniowski/anagrams.py

## anagrams.py
import re
import sys
from collections import defaultdict
from itertools import combinations, chain


def powerset(iterable):
    """powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"""
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))


def words(pairs):
    """Given a list of word pairs, return the set of words in these pairs.
    """
    result = set()
    for pair in pairs:
        result.add(pair[0])
        result.add(pair[1])
    return result


def find_anagrams(text):
    """Given text as string, yield all anagram groups in the text, as word pairs."""
    words = set(w.lower() for w in re.split(r'[^a-zA-Z0-9]+', text) if len(w) > 3)

    # Build index
    d = defaultdict(set)
    for w1 in words:
        for w2 in words:
            if w1 < w2:
                d[''.join(sorted(w1 + w2))].add((w1, w2))

    # Find anagrams
    for pair_set in d.values():
        if len(pair_set) >= 2:
            # Possible anagram pair, check if there is no word shared
            pair_list = list(pair_set)
            common_words = set(pair_list[0])
            for word_pair in pair_list[1:]:
                common_words.intersection_update(set(word_pair))
            if len(common_words) == 0:
                yield pair_list


def normalize_group(pairs):
    """Given a list of word pairs, return the longest list of word
    pairs, such that no word in the list is repeated.
    """
    max_word_pairs = []
    for word_pairs in powerset(pairs):
        unique_words = words(word_pairs)
        if len(unique_words) == 2 * len(word_pairs):
            max_word_pairs = word_pairs
    return max_word_pairs


if __name__ == '__main__':
    anagram_groups = find_anagrams(sys.stdin.read())
    by_length = defaultdict(list)
    for group in anagram_groups:
        normalized_group = normalize_group(group)
        by_length[len(normalized_group)].append(normalized_group)
    max_length = max(by_length.keys())
    for group in by_length[max_length]:
        print ', '.join('%s %s' % (w1, w2) for (w1, w2) in group)
	import re
	import sys
	from collections import defaultdict
	from itertools import combinations, chain


	def powerset(iterable):
	"""powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"""
	s = list(iterable)
	return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))


	def words(pairs):
	"""Given a list of word pairs, return the set of words in these pairs.
	"""
	result = set()
	for pair in pairs:
	result.add(pair[0])
	result.add(pair[1])
	return result


	def find_anagrams(text):
	"""Given text as string, yield all anagram groups in the text, as word pairs."""
	words = set(w.lower() for w in re.split(r'[^a-zA-Z0-9]+', text) if len(w) > 3)

	# Build index
	d = defaultdict(set)
	for w1 in words:
	for w2 in words:
	if w1 < w2:
	d[''.join(sorted(w1 + w2))].add((w1, w2))

	# Find anagrams
	for pair_set in d.values():
	if len(pair_set) >= 2:
	# Possible anagram pair, check if there is no word shared
	pair_list = list(pair_set)
	common_words = set(pair_list[0])
	for word_pair in pair_list[1:]:
	common_words.intersection_update(set(word_pair))
	if len(common_words) == 0:
	yield pair_list


	def normalize_group(pairs):
	"""Given a list of word pairs, return the longest list of word
	pairs, such that no word in the list is repeated.
	"""
	max_word_pairs = []
	for word_pairs in powerset(pairs):
	unique_words = words(word_pairs)
	if len(unique_words) == 2 * len(word_pairs):
	max_word_pairs = word_pairs
	return max_word_pairs


	if __name__ == '__main__':
	anagram_groups = find_anagrams(sys.stdin.read())
	by_length = defaultdict(list)
	for group in anagram_groups:
	normalized_group = normalize_group(group)
	by_length[len(normalized_group)].append(normalized_group)
	max_length = max(by_length.keys())
	for group in by_length[max_length]:
	print ', '.join('%s %s' % (w1, w2) for (w1, w2) in group)