Last active
August 29, 2015 13:55
-
-
Save denzilc/8774447 to your computer and use it in GitHub Desktop.
How to instantiate 'self._fdists' for storing Conditional Frequency Data in Redis?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import redis | |
r = redis.StrictRedis(host='localhost', port=6379, db=0) | |
from redisprob import RedisHashFreqDist | |
rhfd = RedisHashFreqDist(r, 'test') | |
print rhfd.items() | |
# [] -> [('foo', 1)] | |
print rhfd.values() | |
print len(rhfd) | |
print rhfd['foo'] | |
rhfd.inc('foo') | |
print rhfd['foo'] | |
rhfd.clear() | |
from redisprob import RedisConditionalHashFreqDist | |
rchfd = RedisConditionalHashFreqDist(r, 'condhash') | |
rchfd.N() | |
rchfd.conditions() | |
rchfd.inc('foo') | |
rchfd.N() | |
rchfd['cond1'].inc('foo') | |
rchfd.N() | |
rchfd['cond1']['foo'] | |
rchfd.conditions() | |
rchfd.clear() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections, re | |
white = r'[\s&]+' | |
def encode_key(key): | |
return re.sub(white, '_', key.strip()) | |
class RedisHashMap(collections.MutableMapping): | |
def __init__(self, r, name): | |
self._r = r | |
self._name = encode_key(name) | |
def __iter__(self): | |
return iter(self.items()) | |
def __len__(self): | |
# Uses the hlen command to get the number of elements | |
in the hash map | |
return self._r.hlen(self._name) | |
def __contains__(self, key): | |
return self._r.hexists(self._name, encode_key(key)) | |
def __getitem__(self, key): | |
return self._r.hget(self._name, encode_key(key)) | |
def __setitem__(self, key, val): | |
self._r.hset(self._name, encode_key(key), val) | |
def __delitem__(self, key): | |
self._r.hdel(self._name, encode_key(key)) | |
def keys(self): | |
return self._r.hkeys(self._name) | |
def values(self): | |
return self._r.hvals(self._name) | |
def items(self): | |
return self._r.hgetall(self._name).items() | |
def get(self, key, default=0): | |
return self[key] or default | |
def iteritems(self): | |
return iter(self) | |
def clear(self): | |
self._r.delete(self._name) | |
class RedisOrderedDict(collections.MutableMapping): | |
def __init__(self, r, name): | |
self._r = r | |
self._name = encode_key(name) | |
def __iter__(self): | |
return iter(self.items()) | |
def __len__(self): | |
return self._r.zcard(self._name) | |
def __getitem__(self, key): | |
return self._r.zscore(self._name, encode_key(key)) | |
def __setitem__(self, key, score): | |
self._r.zadd(self._name, encode_key(key), score) | |
def __delitem__(self, key): | |
self._r.zrem(self._name, encode_key(key)) | |
def keys(self, start=0, end=-1): | |
return self._r.zrevrange(self._name, start, end) | |
def values(self, start=0, end=-1): | |
return [v for (k, v) in self.items(start=start, end=end)] | |
def items(self, start=0, end=-1): | |
return self._r.zrevrange(self._name, start, end, | |
withscores=True) | |
def get(self, key, default=0): | |
return self[key] or default | |
def iteritems(self): | |
return iter(self) | |
def clear(self): | |
self._r.delete(self._name) | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rediscollections import RedisHashMap | |
class RedisHashFreqDist(RedisHashMap): | |
def inc(self, sample, count=1): | |
self._r.hincrby(self._name, sample, count) | |
def N(self): | |
return int(sum(self.values())) | |
def __getitem__(self, key): | |
return int(RedisHashMap.__getitem__(self, key) or 0) | |
def values(self): | |
return [int(v) for v in RedisHashMap.values(self)] | |
def items(self): | |
return [(k, int(v)) for (k, v) in RedisHashMap.items(self)] | |
from nltk.probability import ConditionalFreqDist,FreqDist | |
from rediscollections import encode_key | |
class RedisConditionalHashFreqDist(ConditionalFreqDist): | |
def __init__(self, r, name, cond_samples=None): | |
self._r = r | |
self._name = name | |
ConditionalFreqDist.__init__(self, cond_samples) | |
# initialize self._fdists for all matching keys | |
for key in self._r.keys(encode_key('%s:*' % name)): | |
condition = key.split(':')[1] | |
self[condition] # calls self.__getitem__(condition) | |
def inc(self, sample, count=1): | |
self._r.hincrby(self._name, sample, count) | |
def __contains__(self, condition): | |
return encode_key(condition) in self._fdists | |
def __getitem__(self, condition): | |
if condition not in self._fdists: | |
key = '%s:%s' % (self._name, condition) | |
self._fdists[condition] = | |
RedisHashFreqDist(self._r, key) | |
return self._fdists[condition | |
def clear(self): | |
for fdist in self._fdists.values(): | |
fdist.clear() | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi Denzil,
thanks a lot for your kind help.
Here: http://www.packtpub.com/python-text-processing-nltk-20-cookbook/book
you can find the code and the errata of the book submitted. only few, and not including the problem I found:
redisprob.py", line 119, in getitem
if condition not in self._fdists:
AttributeError: 'RedisConditionalHashFreqDist' object has no attribute '_fdists'
Marco