Skip to content

Instantly share code, notes, and snippets.

@jan-g
Created April 26, 2017 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jan-g/51d46f470c639399ab3f42789f5dbae6 to your computer and use it in GitHub Desktop.
Save jan-g/51d46f470c639399ab3f42789f5dbae6 to your computer and use it in GitHub Desktop.
Anagrams
#!/usr/bin/env python3
import collections
import argparse
import itertools
def load_ngraphs(fn, n):
"""Load in a set of ngraph counts from a source corpus"""
# Read in ngraph
c = collections.Counter()
with open(fn) as f:
# Go through each line of the OSW (or your other source of sample words)
# Pad each word with a bracketing character, $apple$
# Split each word into a sequence of ngraphs, $a ap pp pl le e$
# Count up all of those ngraphs.
for line in f:
line = '$' + line.strip() + '$'
for ngraph in [line[i:i + n] for i in range(len(line) - n + 1)]:
c[ngraph] += 1
total = sum(c[k] for k in c)
return {k: c[k] / total for k in c}
def score(freqs, word, n):
"""Word out the probabilistic score for this particular word"""
product = 1
word = '$' + word + '$'
for ngraph in [word[i:i + n] for i in range(len(word) - n + 1)]:
product *= freqs.get(ngraph, 0)
return product
def hunt(freqs, word, n):
return {anag: score(freqs, anag, n)
for anag in set(map(''.join, itertools.permutations(word)))}
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--dict', default='source.txt', help='word list source')
parser.add_argument('-n', default=2, type=int, help='length of n-gram to use')
parser.add_argument('anag', help='word to look for an anagram for')
args = parser.parse_args()
c = load_ngraphs(args.dict, args.n)
results = hunt(c, args.anag, args.n)
m = max(results[k] for k in results)
by_score = sorted([(results[k] / m, k) for k in results if results[k] > 0], reverse=True)
for i, (score, word) in enumerate(by_score[:100]):
print(i, score, word)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment