Skip to content

Instantly share code, notes, and snippets.

@gonz
Created January 27, 2012 03:28
Show Gist options
  • Save gonz/1686815 to your computer and use it in GitHub Desktop.
Save gonz/1686815 to your computer and use it in GitHub Desktop.
T9 python
import re, sys
keys = ('abc', 'def', 'ghi', 'jkl', 'mno', 'pqrs', 'tuv', 'wxyz')
m = dict((l, str(n)) for n, letters in enumerate(keys, start=2) for l in letters)
data, ocurrences = {}, {}
wmatch = re.compile('[^a-z]+')
def learn(word):
num = ''.join(m[c] for c in word)
for i in xrange(1, len(word) + 1):
inp = num[:i]
if inp not in data:
data[inp] = set([word])
else:
data[inp].add(word)
def train(f):
for word in wmatch.split(f.read().lower()):
if word in ocurrences:
ocurrences[word] += 1
else:
learn(word)
ocurrences[word] = 1
def search(n):
if not n in data:
sys.exit(0)
results = ([], [])
for match in data[n]:
results[int(len(match) != len(n))].append((match, ocurrences[match]))
k = lambda m: m[1]
return (sorted(results[0], key=k, reverse=True),
sorted(results[1], key=k, reverse=True))
if __name__ == '__main__':
with open('alice_in_wonderland.txt', 'r') as f:
train(f)
results = search(sys.argv[1])
for i, t in ((0, 'Exact'), (1, 'Prefix')):
print "%s matches for %s:" % (t, sys.argv[1])
for match, oc in results[i]:
print match
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment