Created
May 2, 2019 04:06
-
-
Save djinn/2bb96b200ee31907618d457b6f5fcd01 to your computer and use it in GitHub Desktop.
Fast algorithm to find anagram of a word using preindex of words
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Author: Supreet Sethi <supreet.sethi@gmail.com> | |
# License: Creative Commons BY-SA 2.0 | |
from string import ascii_lowercase | |
WORDSFILE = '/usr/share/dict/words' # this is specific to mac os | |
def alphabet_prime_data(): | |
prime = [] | |
for num in range(2,107): | |
if all(num%i!=0 for i in range(2,num)): | |
prime.append(num) | |
return dict(zip(ascii_lowercase, prime[:len(ascii_lowercase)])) | |
def hash_word(word, alpha): | |
val = 1 | |
for c in ''.join(filter(str.isalnum, word).lower()): # remove special character and lower case before hashing | |
val = alpha[c] * val | |
return val | |
def preindex_words(fd, alpha): | |
index = {} | |
for line in fd.readlines(): | |
line = line.strip() | |
h = hash_word(line, alpha) | |
index.setdefault(h, []).append(line) | |
return index | |
def find_anagram(word, index, alpha): | |
hash = hash_word(word, alpha) | |
return list(filter(lambda x: x != word, index[hash])) | |
alpha = alphabet_prime_data() | |
fd = open(WORDSFILE) | |
index = preindex_words(fd, alpha) | |
print(find_anagram('salt', index, alpha)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment