public
Last active — forked from j4mie/autocomplete.py

autocomplete.py - redis autocompleter

  • Download Gist
autocomplete.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
"""
A redis autocomplete example for multi-word phrases.
 
Based on:
Ruby original: http://gist.github.com/574044
Python original: https://gist.github.com/577852
 
See options below for usage
 
Requires http://github.com/andymccurdy/redis-py/
"""
 
from redis import Redis
import sys
 
r = Redis()
ZKEY_COMPL = 'compl'
SKEY_DOCS_PREFIX = 'docs:'
 
def deleteAll():
"""Clear out the completions db"""
return r.zremrangebyrank(ZKEY_COMPL, 0, -1)
 
def addCompletions(text):
"""Create the completion sorted set."""
text = text.strip()
if not text:
return
 
for word in text.split():
word = word.lower()
for end_index in range(1, len(word)+1):
prefix = word[:end_index]
r.zadd(ZKEY_COMPL, prefix, 0)
r.zadd(ZKEY_COMPL, word + '*', 0)
r.sadd(SKEY_DOCS_PREFIX + word, text)
 
r.zadd(ZKEY_COMPL, text + '*', 0)
r.sadd(SKEY_DOCS_PREFIX + text.lower(), text)
 
def addFromFile(filename):
"""Create completions for all lines in filename"""
print "Adding completions for", filename, "...",
sys.stdout.flush()
for line in open(filename):
addCompletions(line)
print "done"
 
def getWordCompletions(r, word, count, rangelen=50):
"""Get up to count completions for the given word"""
prefix = word.lower().strip()
results = set()
if not prefix:
return results
 
start = r.zrank(ZKEY_COMPL, prefix)
if start is None:
return results
 
while len(results) <= count:
entries = r.zrange(ZKEY_COMPL, start, start + rangelen - 1)
start += rangelen
 
if not entries or len(entries) == 0:
break
 
for entry in entries:
minlen = min((len(entry), len(prefix)))
if entry[:minlen] != prefix[:minlen]:
return results
 
if entry[-1] == '*' and len(results) <= count:
results.add(entry[0:-1])
 
return results
 
def getPhraseCompletions(r, text, count):
"""
Get up to @count completions for @text
 
For an input text of N words, uses N+1 redis calls:
One ZRANK per word, and one SUNION at the end.
"""
results = set()
for prefix in text.lower().split():
results.update(getWordCompletions(r, prefix, count))
 
keys = map(lambda k: SKEY_DOCS_PREFIX+k, results)
if keys:
return sorted(r.sunion( keys ), key=str.lower)[:count]
else:
return []
 
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser()
 
parser.add_option("-f", "--file", dest="filename",
help="Create completions for lines in FILE", metavar="FILE")
parser.add_option("-i", "--insert", dest="text",
help="Create completions for TEXT", metavar="TEXT")
parser.add_option("-d", "--delete-all", dest="delete",
action="store_true", default=False,
help="Delete everything in the completions db")
 
(options, args) = parser.parse_args()
if options.delete:
deleteAll()
 
if options.filename:
addFromFile(options.filename)
 
if options.text:
addCompletions(options.text)
 
if args:
for arg in args:
print arg, '==>'
print '\n'.join(getPhraseCompletions(r, arg, 10))

Note that in getWordCompletions on line 57, you'll return early for the first element of the ZSET. That is because ZSETs are indexed from 0. So your test doesn't pass. Change line 57 to read:

if start is None:

and you should be good to go.

Right you are, streeter. Thank you for that catch. Cheers,
j

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.