Skip to content

Instantly share code, notes, and snippets.

@mrestko
Created July 9, 2011 21:30
Show Gist options
  • Select an option

  • Save mrestko/1073976 to your computer and use it in GitHub Desktop.

Select an option

Save mrestko/1073976 to your computer and use it in GitHub Desktop.
Soundex Encoder
from itertools import chain, repeat
drops = 'aehiouwy'
ones = 'bfpv'
twos = 'cgjkqsxz'
threes = 'dt'
fours = 'l'
fives = 'mn'
sixes = 'r'
alpha_list = [(ones, 1), (twos, 2), (threes, 3), (fours, 4), (fives, 5), (sixes, 6)]
code = [zip(letter, repeat(code_number)) for letter, code_number in alpha_list]
code_dict = dict(chain.from_iterable(code))
def soundex(name):
first = name[0].upper()
lowered = name[1:].lower()
dupe_less = rm_dupes(lowered)
dropped = drop(dupe_less)
encoded = encode(dropped)
return (first + encoded + '000')[:4]
def encode(name):
code_nums = [code_dict[c] for c in name]
return ''.join(str(i) for i in code_nums)
def rm_dupes(name):
if len(name) < 2:
return name
elif name[0] == name[1]:
return name[0] + rm_dupes(name[2:])
else:
return name[0] + rm_dupes(name[1:])
def drop(name):
return [c for c in name if c not in drops]
if __name__ == "__main__":
name = raw_input("Enter name:")
print soundex(name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment