Skip to content

Instantly share code, notes, and snippets.

@djinn
Created May 2, 2019 04:06
Show Gist options
  • Save djinn/2bb96b200ee31907618d457b6f5fcd01 to your computer and use it in GitHub Desktop.
Save djinn/2bb96b200ee31907618d457b6f5fcd01 to your computer and use it in GitHub Desktop.
Fast algorithm to find anagram of a word using preindex of words
#!/usr/bin/env python3
# Author: Supreet Sethi <supreet.sethi@gmail.com>
# License: Creative Commons BY-SA 2.0
from string import ascii_lowercase
WORDSFILE = '/usr/share/dict/words' # this is specific to mac os
def alphabet_prime_data():
prime = []
for num in range(2,107):
if all(num%i!=0 for i in range(2,num)):
prime.append(num)
return dict(zip(ascii_lowercase, prime[:len(ascii_lowercase)]))
def hash_word(word, alpha):
val = 1
for c in ''.join(filter(str.isalnum, word).lower()): # remove special character and lower case before hashing
val = alpha[c] * val
return val
def preindex_words(fd, alpha):
index = {}
for line in fd.readlines():
line = line.strip()
h = hash_word(line, alpha)
index.setdefault(h, []).append(line)
return index
def find_anagram(word, index, alpha):
hash = hash_word(word, alpha)
return list(filter(lambda x: x != word, index[hash]))
alpha = alphabet_prime_data()
fd = open(WORDSFILE)
index = preindex_words(fd, alpha)
print(find_anagram('salt', index, alpha))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment