Skip to content

Instantly share code, notes, and snippets.

@cberzan
Created Mar 14, 2015
Embed
What would you like to do?
coding quiz for Planet Labs
#!/usr/bin/env python
# Find anagram groups in the given dictionary file.
#
# Example usage: ./anagrams.py /usr/share/dict/words
#
# The input file must have one word per line. We do not do any
# preprocessing, like lowercasing or removing punctuation.
#
# The output (stdout) contains one anagram group per line.
# The groups are output in an arbitrary order. Within each group,
# the words are comma-separated and sorted alphabetically.
from collections import defaultdict
import sys
def get_anagram_groups(words, min_word_len):
"""
Return list of anagram groups for the given words.
Considers only words of length min_word_len or more.
Returns only anagram groups that have at least as many
anagrams in the group as there are letters in each word.
"""
# We need a fingerprint for each word, so that words have
# the same fingerprint iff they are anagrams of each other.
# To obtain this fingerprint, we sort the characters in
# each word. For example, "parsed" and "spared" both have
# the fingerprint "adeprs".
fingerprint_to_words = defaultdict(list)
for word in words:
if len(word) >= min_word_len:
fingerprint = ''.join(sorted(word))
fingerprint_to_words[fingerprint].append(word)
groups = []
for fingerprint, words in fingerprint_to_words.iteritems():
if len(words) >= len(fingerprint):
groups.append(words)
return groups
if __name__ == "__main__":
if len(sys.argv) != 2:
print >>sys.stderr, "Usage: {} dict-file".format(sys.argv[0])
sys.exit(1)
words = [line.strip() for line in open(sys.argv[1])]
groups = get_anagram_groups(words, min_word_len=4)
for group in groups:
print ", ".join(sorted(group))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment