coding quiz for Planet Labs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Find anagram groups in the given dictionary file. | |
# | |
# Example usage: ./anagrams.py /usr/share/dict/words | |
# | |
# The input file must have one word per line. We do not do any | |
# preprocessing, like lowercasing or removing punctuation. | |
# | |
# The output (stdout) contains one anagram group per line. | |
# The groups are output in an arbitrary order. Within each group, | |
# the words are comma-separated and sorted alphabetically. | |
from collections import defaultdict | |
import sys | |
def get_anagram_groups(words, min_word_len): | |
""" | |
Return list of anagram groups for the given words. | |
Considers only words of length min_word_len or more. | |
Returns only anagram groups that have at least as many | |
anagrams in the group as there are letters in each word. | |
""" | |
# We need a fingerprint for each word, so that words have | |
# the same fingerprint iff they are anagrams of each other. | |
# To obtain this fingerprint, we sort the characters in | |
# each word. For example, "parsed" and "spared" both have | |
# the fingerprint "adeprs". | |
fingerprint_to_words = defaultdict(list) | |
for word in words: | |
if len(word) >= min_word_len: | |
fingerprint = ''.join(sorted(word)) | |
fingerprint_to_words[fingerprint].append(word) | |
groups = [] | |
for fingerprint, words in fingerprint_to_words.iteritems(): | |
if len(words) >= len(fingerprint): | |
groups.append(words) | |
return groups | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print >>sys.stderr, "Usage: {} dict-file".format(sys.argv[0]) | |
sys.exit(1) | |
words = [line.strip() for line in open(sys.argv[1])] | |
groups = get_anagram_groups(words, min_word_len=4) | |
for group in groups: | |
print ", ".join(sorted(group)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment