igniteflow/sharded_cache.py

## sharded_cache.py
def bytes_to(bytes, to, bsize=1024):
    a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6}
    r = float(bytes)
    for i in range(a[to]):
        r /= bsize
    return r


class ShardedCache(object):
    """
    Works around Appengine Memcached 1MB limit for key/values
    """
    def get(self, key):
        keys = self.get_keys_from_stem(key)
        return ''.join([cache.get(key) for key in keys])

    def set(self, key, value, limit=None, overhead=None):
        """
        Allow for the storing of key/value pairs that exceed the Appengine Memcache limit
        https://developers.google.com/appengine/docs/python/memcache/#Python_Limits

        If the size of they combined key, value and overhead is greater than the limit, then
        the value is chunked into smaller parts.  Each is stored with an enumerated key.
        e.g. foo_1, foo_2, foo_3
        """
        keys = []
        if limit is None:
            limit = 1048570  # Appengine limit, minus 6 bytes to allow for longer keys
        if overhead is None:
            overhead = 96  # see

        if len(key) + len(value) + overhead < limit:
            # total size is less than limit so write to cache
            cache.set(key, value)
            keys.append(key)
        else:
            # total size exceeds cache limit, so chunk the values
            space_remaining = limit - (len(key) + 2)  # add 2 here for key counter _0, _1, ...
            chunks = [value[i:i + space_remaining] for i in range(0, len(value), space_remaining)]
            logging.debug('Chunks: %s' % len(chunks))
            for i, chunk in enumerate(chunks):
                key_ref = '%s_%s' % (key, i)
                cache.set(key_ref, chunk)
                keys.append(key_ref)
        return keys

    def get_keys_from_stem(self, stem):
        """
        Returns a list of keys from a stem.  assumes naming convention stem_0, stem_1...
        e.g. get_keys_from_stem('foo') returns ['foo_0', 'foo_1', ...]
        """
        has_key = True
        counter = 0
        keys = []
        if cache.has_key(stem):
            # key exists without appended counter, so just return it
            return [stem]

        while has_key:
            key = '%s_%s' % (stem, counter)
            if cache.has_key(key):
                keys.append(key)
                counter += 1
            else:
                break
        return keys


sharded_cache = ShardedCache()


# tests
from django.test import TestCase
from django.core.cache import cache


class ShardedCacheTests(TestCase):
    """
    key is 4 chars
    value is 26 chars
    limit for the total size of key, value and overhead is 10.  Therefore as keys will
    be of the format key_counter e.g. foo_0, then for the alphabet we should expect 6
    chunks where each key is 6 chars and each value is 4 chars

    Yale_0: abcd
    Yale_1: efgh
    Yale_2: ijkl
    Yale_3: mnop
    Yale_4: qrst
    Yale_5: uvwx
    Yale_6: yz
    """

    key = 'Yale'
    value = 'abcdefghijklmnopqrstuvwxyz'
    limit = 10

    def test_set_cache(self):
        """simplest case: set a key/value < max"""
        keys = sharded_cache.set(self.key, self.value)
        self.assertEqual(keys, [self.key])
        self.assertEqual(cache.get(self.key), self.value)

    def test_set_cache_split(self):
        """exceed the limit, returns multiple keys"""
        keys = sharded_cache.set(self.key, self.value, limit=self.limit, overhead=0)
        self.assertEqual(keys, ['Yale_0', 'Yale_1', 'Yale_2', 'Yale_3', 'Yale_4', 'Yale_5', 'Yale_6'])

    def test_get_keys_from_stem(self):
        keys = sharded_cache.set(self.key, self.value, limit=self.limit, overhead=0)
        self.assertEqual(sharded_cache.get_keys_from_stem(self.key), keys)

    def test_get(self):
        """given a stem returns concatenated value"""
        sharded_cache.set(self.key, self.value, limit=self.limit, overhead=0)
        self.assertEqual(sharded_cache.get(self.key), self.value)
	def bytes_to(bytes, to, bsize=1024):
	a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6}
	r = float(bytes)
	for i in range(a[to]):
	r /= bsize
	return r


	class ShardedCache(object):
	"""
	Works around Appengine Memcached 1MB limit for key/values
	"""
	def get(self, key):
	keys = self.get_keys_from_stem(key)
	return ''.join([cache.get(key) for key in keys])

	def set(self, key, value, limit=None, overhead=None):
	"""
	Allow for the storing of key/value pairs that exceed the Appengine Memcache limit
	https://developers.google.com/appengine/docs/python/memcache/#Python_Limits

	If the size of they combined key, value and overhead is greater than the limit, then
	the value is chunked into smaller parts. Each is stored with an enumerated key.
	e.g. foo_1, foo_2, foo_3
	"""
	keys = []
	if limit is None:
	limit = 1048570 # Appengine limit, minus 6 bytes to allow for longer keys
	if overhead is None:
	overhead = 96 # see

	if len(key) + len(value) + overhead < limit:
	# total size is less than limit so write to cache
	cache.set(key, value)
	keys.append(key)
	else:
	# total size exceeds cache limit, so chunk the values
	space_remaining = limit - (len(key) + 2) # add 2 here for key counter _0, _1, ...
	chunks = [value[i:i + space_remaining] for i in range(0, len(value), space_remaining)]
	logging.debug('Chunks: %s' % len(chunks))
	for i, chunk in enumerate(chunks):
	key_ref = '%s_%s' % (key, i)
	cache.set(key_ref, chunk)
	keys.append(key_ref)
	return keys

	def get_keys_from_stem(self, stem):
	"""
	Returns a list of keys from a stem. assumes naming convention stem_0, stem_1...
	e.g. get_keys_from_stem('foo') returns ['foo_0', 'foo_1', ...]
	"""
	has_key = True
	counter = 0
	keys = []
	if cache.has_key(stem):
	# key exists without appended counter, so just return it
	return [stem]

	while has_key:
	key = '%s_%s' % (stem, counter)
	if cache.has_key(key):
	keys.append(key)
	counter += 1
	else:
	break
	return keys


	sharded_cache = ShardedCache()


	# tests
	from django.test import TestCase
	from django.core.cache import cache


	class ShardedCacheTests(TestCase):
	"""
	key is 4 chars
	value is 26 chars
	limit for the total size of key, value and overhead is 10. Therefore as keys will
	be of the format key_counter e.g. foo_0, then for the alphabet we should expect 6
	chunks where each key is 6 chars and each value is 4 chars

	Yale_0: abcd
	Yale_1: efgh
	Yale_2: ijkl
	Yale_3: mnop
	Yale_4: qrst
	Yale_5: uvwx
	Yale_6: yz
	"""

	key = 'Yale'
	value = 'abcdefghijklmnopqrstuvwxyz'
	limit = 10

	def test_set_cache(self):
	"""simplest case: set a key/value < max"""
	keys = sharded_cache.set(self.key, self.value)
	self.assertEqual(keys, [self.key])
	self.assertEqual(cache.get(self.key), self.value)

	def test_set_cache_split(self):
	"""exceed the limit, returns multiple keys"""
	keys = sharded_cache.set(self.key, self.value, limit=self.limit, overhead=0)
	self.assertEqual(keys, ['Yale_0', 'Yale_1', 'Yale_2', 'Yale_3', 'Yale_4', 'Yale_5', 'Yale_6'])

	def test_get_keys_from_stem(self):
	keys = sharded_cache.set(self.key, self.value, limit=self.limit, overhead=0)
	self.assertEqual(sharded_cache.get_keys_from_stem(self.key), keys)

	def test_get(self):
	"""given a stem returns concatenated value"""
	sharded_cache.set(self.key, self.value, limit=self.limit, overhead=0)
	self.assertEqual(sharded_cache.get(self.key), self.value)