Skip to content

Instantly share code, notes, and snippets.

@mstepniowski
Created March 16, 2013 17:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mstepniowski/45f6dc6cd8d9fc288eea to your computer and use it in GitHub Desktop.
Save mstepniowski/45f6dc6cd8d9fc288eea to your computer and use it in GitHub Desktop.
PyCon 2013 Thumbtack Anagram Challenge
import re
import sys
from collections import defaultdict
from itertools import combinations, chain
def powerset(iterable):
"""powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"""
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
def words(pairs):
"""Given a list of word pairs, return the set of words in these pairs.
"""
result = set()
for pair in pairs:
result.add(pair[0])
result.add(pair[1])
return result
def find_anagrams(text):
"""Given text as string, yield all anagram groups in the text, as word pairs."""
words = set(w.lower() for w in re.split(r'[^a-zA-Z0-9]+', text) if len(w) > 3)
# Build index
d = defaultdict(set)
for w1 in words:
for w2 in words:
if w1 < w2:
d[''.join(sorted(w1 + w2))].add((w1, w2))
# Find anagrams
for pair_set in d.values():
if len(pair_set) >= 2:
# Possible anagram pair, check if there is no word shared
pair_list = list(pair_set)
common_words = set(pair_list[0])
for word_pair in pair_list[1:]:
common_words.intersection_update(set(word_pair))
if len(common_words) == 0:
yield pair_list
def normalize_group(pairs):
"""Given a list of word pairs, return the longest list of word
pairs, such that no word in the list is repeated.
"""
max_word_pairs = []
for word_pairs in powerset(pairs):
unique_words = words(word_pairs)
if len(unique_words) == 2 * len(word_pairs):
max_word_pairs = word_pairs
return max_word_pairs
if __name__ == '__main__':
anagram_groups = find_anagrams(sys.stdin.read())
by_length = defaultdict(list)
for group in anagram_groups:
normalized_group = normalize_group(group)
by_length[len(normalized_group)].append(normalized_group)
max_length = max(by_length.keys())
for group in by_length[max_length]:
print ', '.join('%s %s' % (w1, w2) for (w1, w2) in group)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment