Skip to content

Instantly share code, notes, and snippets.

@treystout
Created August 2, 2012 21:28
Show Gist options
  • Save treystout/3240818 to your computer and use it in GitHub Desktop.
Save treystout/3240818 to your computer and use it in GitHub Desktop.
General purpose zset union/intersect code (probably too meta)
#!/usr/bin/env python
import hashlib
import lib.akn.config as cfg
cfg.init(False)
from lib.akn.exceptions import CallError
def __zset_fetch(bind_pairs, start=0, end=29, count=False,
reverse=True, ids_only=False, operator="union", ttl=0, **kwargs):
"""General purpose tool for doing zset traversal in redis without abusing
local memory too much. To call it, you send in pairs consisting of
* a string representing a zset's key
* an optional variable to interpolate into the first element of the pair
If there is more than 1 pair, then the operator kwarg comes into play.
Currently, union and intersection are supported via the native ZUNIONSTORE and
ZINTERSTORE redis commands, respectively.
"""
post_ids = []
keys = []
print "zset fetch on pairs:", bind_pairs
for pair in bind_pairs:
if len(pair) == 1:
keys.append(pair[0])
elif len(pair) == 2:
keys.append(pair[0] % pair[1])
else:
raise CallError('each element of bind_pairs must be a 1 or 2 tuple')
with cfg.main_db.connection() as c:
key_hash = "ZCACHE:%s" % hashlib.md5("".join([operator] + keys)).hexdigest()
should_set_expire = False
if len(keys) == 1:
key_hash = keys[0]
elif len(keys) > 1:
# we need to do some basic set operations before proceeding
print "checking to see if %s is cached" % key_hash
if c.exists(key_hash):
print "totally in cache, hitting it"
else:
print "not in cache"
should_set_expire = True
if operator == "intersect":
c.zinterstore(key_hash, keys)
elif operator == "union":
c.zunionstore(key_hash, keys)
if count:
return c.zcard(key_hash)
print "fetching %s to %s from %s of %s (%s) reverse: %s" % (
start, end, operator, ",".join(keys), key_hash, reverse)
if reverse:
post_ids = c.zrevrange(key_hash, start, end)
else:
post_ids = c.zrange(key_hash, start, end)
if should_set_expire and len(keys) > 1:
if not ttl:
print "no ttl, removing temp store"
c.delete(key_hash)
else:
print "setting ttl on %s to %dseconds" % (key_hash, ttl)
c.expire(key_hash, ttl)
return post_ids
# and now some examples...
# a straightforward pull from 1 zset
print __zset_fetch([
('hashtag:%s:posts', 'science'),
])
# an intersection of 2 zsets
print __zset_fetch([
('hashtag:%s:posts', 'science'),
('hashtag:%s:posts', 'derp')
], operator="intersect")
# a lambda that accepts a list of hashtags and inserts them
# each as bind pairs, to give something complicated an easy name
mget_by_hashtags_intersect = lambda hashtags, **kwargs: \
__zset_fetch([("hashtag:%s:posts", ht) for ht in hashtags],
operator="intersect", **kwargs)
print mget_by_hashtags_intersect(['science', 'derp'])
with cfg.main_db.connection() as c:
for i in range(50):
c.zadd('foo', i, i)
print __zset_fetch([
('foo',),
], reverse=False, start=10, end=15)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment