Skip to content

Instantly share code, notes, and snippets.

@denzilc
Last active August 29, 2015 13:55
Show Gist options
  • Save denzilc/8774447 to your computer and use it in GitHub Desktop.
Save denzilc/8774447 to your computer and use it in GitHub Desktop.
How to instantiate 'self._fdists' for storing Conditional Frequency Data in Redis?
import redis
r = redis.StrictRedis(host='localhost', port=6379, db=0)
from redisprob import RedisHashFreqDist
rhfd = RedisHashFreqDist(r, 'test')
print rhfd.items()
# [] -> [('foo', 1)]
print rhfd.values()
print len(rhfd)
print rhfd['foo']
rhfd.inc('foo')
print rhfd['foo']
rhfd.clear()
from redisprob import RedisConditionalHashFreqDist
rchfd = RedisConditionalHashFreqDist(r, 'condhash')
rchfd.N()
rchfd.conditions()
rchfd.inc('foo')
rchfd.N()
rchfd['cond1'].inc('foo')
rchfd.N()
rchfd['cond1']['foo']
rchfd.conditions()
rchfd.clear()
import collections, re
white = r'[\s&]+'
def encode_key(key):
return re.sub(white, '_', key.strip())
class RedisHashMap(collections.MutableMapping):
def __init__(self, r, name):
self._r = r
self._name = encode_key(name)
def __iter__(self):
return iter(self.items())
def __len__(self):
# Uses the hlen command to get the number of elements
in the hash map
return self._r.hlen(self._name)
def __contains__(self, key):
return self._r.hexists(self._name, encode_key(key))
def __getitem__(self, key):
return self._r.hget(self._name, encode_key(key))
def __setitem__(self, key, val):
self._r.hset(self._name, encode_key(key), val)
def __delitem__(self, key):
self._r.hdel(self._name, encode_key(key))
def keys(self):
return self._r.hkeys(self._name)
def values(self):
return self._r.hvals(self._name)
def items(self):
return self._r.hgetall(self._name).items()
def get(self, key, default=0):
return self[key] or default
def iteritems(self):
return iter(self)
def clear(self):
self._r.delete(self._name)
class RedisOrderedDict(collections.MutableMapping):
def __init__(self, r, name):
self._r = r
self._name = encode_key(name)
def __iter__(self):
return iter(self.items())
def __len__(self):
return self._r.zcard(self._name)
def __getitem__(self, key):
return self._r.zscore(self._name, encode_key(key))
def __setitem__(self, key, score):
self._r.zadd(self._name, encode_key(key), score)
def __delitem__(self, key):
self._r.zrem(self._name, encode_key(key))
def keys(self, start=0, end=-1):
return self._r.zrevrange(self._name, start, end)
def values(self, start=0, end=-1):
return [v for (k, v) in self.items(start=start, end=end)]
def items(self, start=0, end=-1):
return self._r.zrevrange(self._name, start, end,
withscores=True)
def get(self, key, default=0):
return self[key] or default
def iteritems(self):
return iter(self)
def clear(self):
self._r.delete(self._name)
if __name__ == '__main__':
import doctest
doctest.testmod()
from rediscollections import RedisHashMap
class RedisHashFreqDist(RedisHashMap):
def inc(self, sample, count=1):
self._r.hincrby(self._name, sample, count)
def N(self):
return int(sum(self.values()))
def __getitem__(self, key):
return int(RedisHashMap.__getitem__(self, key) or 0)
def values(self):
return [int(v) for v in RedisHashMap.values(self)]
def items(self):
return [(k, int(v)) for (k, v) in RedisHashMap.items(self)]
from nltk.probability import ConditionalFreqDist,FreqDist
from rediscollections import encode_key
class RedisConditionalHashFreqDist(ConditionalFreqDist):
def __init__(self, r, name, cond_samples=None):
self._r = r
self._name = name
ConditionalFreqDist.__init__(self, cond_samples)
# initialize self._fdists for all matching keys
for key in self._r.keys(encode_key('%s:*' % name)):
condition = key.split(':')[1]
self[condition] # calls self.__getitem__(condition)
def inc(self, sample, count=1):
self._r.hincrby(self._name, sample, count)
def __contains__(self, condition):
return encode_key(condition) in self._fdists
def __getitem__(self, condition):
if condition not in self._fdists:
key = '%s:%s' % (self._name, condition)
self._fdists[condition] =
RedisHashFreqDist(self._r, key)
return self._fdists[condition
def clear(self):
for fdist in self._fdists.values():
fdist.clear()
if __name__ == '__main__':
import doctest
doctest.testmod()
@marcoippolito
Copy link

Hi Denzil,
thanks a lot for your kind help.

Here: http://www.packtpub.com/python-text-processing-nltk-20-cookbook/book
you can find the code and the errata of the book submitted. only few, and not including the problem I found:
redisprob.py", line 119, in getitem
if condition not in self._fdists:
AttributeError: 'RedisConditionalHashFreqDist' object has no attribute '_fdists'

Marco

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment