MercuryRising/redisCache.py

## redisCache.py
import redis
import time
import random


def load_file(fp, fpKey, r, expiry):
    with open(fp, "rb") as f:
        data = f.read()
    p = r.pipeline()
    p.set(fpKey, data)
    p.expire(fpKey, expiry)
    p.execute()
    return data

# For use with a file list
def cache_or_get_gen_files(fps, expiry=300, r=redis.Redis(db=5)):
    fpKeys = []

    for fp in fps:
        fpKey = "cached:"+fp
        fpKeys.append(fpKey)
        load_file(fp, fpKey, r, expiry)

    while True:
        yield load_file(fp, fpKey, r, expiry)
        t = time.time()
        while time.time() - t - expiry < 0:
            yield r.get(random.choice(fpKeys))

def cache_or_get_gen(fp, expiry=300, r=redis.Redis(db=5)):
    fpKey = "cached:"+fp

    while True:
        yield load_file(fp, fpKey, r, expiry)
        t = time.time()
        while time.time() - t - expiry < 0:
            yield r.get(fpKey)


def cache_or_get(fp, expiry=300, r=redis.Redis(db=5)):
    fpKey = "cached:"+fp

    data = r.get(fpKey)
    if data:
        return data

    else:
        with open(fp, "rb") as f:
            data = f.read()
        p = r.pipeline()
        p.set(fpKey, data)
        p.expire(fpKey, expiry)
        p.execute()
        return data

def mem_cache(fp):
    files = {}
    while True:
        if files.get(fp):
            yield files[fp]
        else:
            with open(fp, "rb") as f:
                data = f.readlines()
                yield data
                files[fp] = data

def stressTest(fp, trials = 40000):
    r = redis.Redis(db=5)

    # Read the file x number of times
    a = time.time()
    for x in range(trials):
        with open(random.choice(fps), "rb") as f:
            data = f.read()
    b = time.time()
    readAvg = trials/(b-a)


    # Generator version

    # Read the file, cache it, read it with a new instance each time
    a = time.time()
    gen = cache_or_get_gen(random.choice(fps), r=r)
    for x in range(trials):
        data = next(gen)
    b = time.time()
    cachedAvgGen = trials/(b-a)


    # Non generator version

    # Read the file, cache it, read it with a new instance each time

    a = time.time()
    for x in range(trials):
        data = cache_or_get(random.choice(fps), r=r)
    b = time.time()
    cachedAvg = trials/(b-a)

    # Read file, cache it in python object
    a = time.time()
    for x in range(trials):
        data = mem_cache(random.choice(fps))
    b = time.time()
    memCachedAvg = trials/(b-a)


    print "Total number of files: %s" %len(fps)
    print "%s file reads: %.2f reads/second\n" %(trials, readAvg)
    print "Yielding from generators for data:"
    print "single redis instance: %.2f reads/second (%.2f percent)" %(cachedAvgGen, (100*(cachedAvgGen-readAvg)/(readAvg)))
    print "\nFunction calls to get data:"
    print "single redis instance: %.2f reads/second (%.2f percent)" %(cachedAvg, (100*(cachedAvg-readAvg)/(readAvg)))
    print "\npython cached object: %.2f reads/second (%.2f percent)" %(memCachedAvg, (100*(memCachedAvg-readAvg)/(readAvg)))

if __name__ == "__main__":

    numFiles = 700

    # Input: a file that contains a list of file names (you could do this any other way too)
    pathToFileList = ''

    with open(, "rb") as f:
        files = f.readlines()

    # The file list I have has way too many files in it
    fil = [random.choice(files) for x in range(numFiles)]

    fps = []
    for fp in fil:
        fp = "FilePathBase/%s" %fp.strip()
        fps.append(fp)


    stressTest(fps)
	import redis
	import time
	import random


	def load_file(fp, fpKey, r, expiry):
	with open(fp, "rb") as f:
	data = f.read()
	p = r.pipeline()
	p.set(fpKey, data)
	p.expire(fpKey, expiry)
	p.execute()
	return data

	# For use with a file list
	def cache_or_get_gen_files(fps, expiry=300, r=redis.Redis(db=5)):
	fpKeys = []

	for fp in fps:
	fpKey = "cached:"+fp
	fpKeys.append(fpKey)
	load_file(fp, fpKey, r, expiry)

	while True:
	yield load_file(fp, fpKey, r, expiry)
	t = time.time()
	while time.time() - t - expiry < 0:
	yield r.get(random.choice(fpKeys))

	def cache_or_get_gen(fp, expiry=300, r=redis.Redis(db=5)):
	fpKey = "cached:"+fp

	while True:
	yield load_file(fp, fpKey, r, expiry)
	t = time.time()
	while time.time() - t - expiry < 0:
	yield r.get(fpKey)


	def cache_or_get(fp, expiry=300, r=redis.Redis(db=5)):
	fpKey = "cached:"+fp

	data = r.get(fpKey)
	if data:
	return data

	else:
	with open(fp, "rb") as f:
	data = f.read()
	p = r.pipeline()
	p.set(fpKey, data)
	p.expire(fpKey, expiry)
	p.execute()
	return data

	def mem_cache(fp):
	files = {}
	while True:
	if files.get(fp):
	yield files[fp]
	else:
	with open(fp, "rb") as f:
	data = f.readlines()
	yield data
	files[fp] = data

	def stressTest(fp, trials = 40000):
	r = redis.Redis(db=5)

	# Read the file x number of times
	a = time.time()
	for x in range(trials):
	with open(random.choice(fps), "rb") as f:
	data = f.read()
	b = time.time()
	readAvg = trials/(b-a)


	# Generator version

	# Read the file, cache it, read it with a new instance each time
	a = time.time()
	gen = cache_or_get_gen(random.choice(fps), r=r)
	for x in range(trials):
	data = next(gen)
	b = time.time()
	cachedAvgGen = trials/(b-a)


	# Non generator version

	# Read the file, cache it, read it with a new instance each time

	a = time.time()
	for x in range(trials):
	data = cache_or_get(random.choice(fps), r=r)
	b = time.time()
	cachedAvg = trials/(b-a)

	# Read file, cache it in python object
	a = time.time()
	for x in range(trials):
	data = mem_cache(random.choice(fps))
	b = time.time()
	memCachedAvg = trials/(b-a)


	print "Total number of files: %s" %len(fps)
	print "%s file reads: %.2f reads/second\n" %(trials, readAvg)
	print "Yielding from generators for data:"
	print "single redis instance: %.2f reads/second (%.2f percent)" %(cachedAvgGen, (100*(cachedAvgGen-readAvg)/(readAvg)))
	print "\nFunction calls to get data:"
	print "single redis instance: %.2f reads/second (%.2f percent)" %(cachedAvg, (100*(cachedAvg-readAvg)/(readAvg)))
	print "\npython cached object: %.2f reads/second (%.2f percent)" %(memCachedAvg, (100*(memCachedAvg-readAvg)/(readAvg)))

	if __name__ == "__main__":

	numFiles = 700

	# Input: a file that contains a list of file names (you could do this any other way too)
	pathToFileList = ''

	with open(, "rb") as f:
	files = f.readlines()

	# The file list I have has way too many files in it
	fil = [random.choice(files) for x in range(numFiles)]

	fps = []
	for fp in fil:
	fp = "FilePathBase/%s" %fp.strip()
	fps.append(fp)


	stressTest(fps)