Skip to content

Instantly share code, notes, and snippets.

@mgp
Created April 6, 2011 04:27
Show Gist options
  • Save mgp/905129 to your computer and use it in GitHub Desktop.
Save mgp/905129 to your computer and use it in GitHub Desktop.
Quantifies the memory optimizations of Redis 2.2, and memory costs of zsets versus sets.
"""
Copyright Michael Parker 2011.
Program to quantify the memory usage of:
a. The compact hash and set implementations in Redis 2.2 described at
http://redis.io/topics/memory-optimization and
http://redis.io/presentation/Pnoordhuis_whats_new_in_2_2.pdf versus their
counterparts that do not contain integers. For the comparisons to be
meaningful, be sure to have hash-max-zipmap-entries and
set-max-intset-entries present in your redis.conf file.
b. zsets compared to sets
If your redis.conf file is in the same directory as your redis server
executable, simply run:
python memory_benchmark.py /path/to/redis/server
Note that the default number of elements to add per set, 500, is just below the
set-max-intset-entries directive default value of 512. To observe the gains of
this encoding, double the set_size argument and halve num_sets, so you are still
adding the same number of elements across all sets but now exceed the
set-max-intset-entries threshold. (If you are not using the default of 512,
adjust these two arguments accordingly.)
"""
import argparse
import functools
import os
import redis
import subprocess
import time
import uuid
def set_up_server(parser_args):
if parser_args.conf_path:
args = [parser_args.server_path, parser_args.conf_path]
else:
server_path = parser_args.server_path
dirname = os.path.dirname(server_path)
conf_path = os.path.join(dirname, 'redis.conf')
args = [server_path, conf_path]
# Suppress output from redis server.
server = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
return server
def get_client(port):
return redis.Redis(port=port)
def get_memory_used(client):
info = client.info()
return int(info['used_memory'])
def tear_down_server(server):
server.terminate()
server.wait()
def approximate_size(size):
# Adapted from http://diveintopython3.org/strings.html.
multiple = 1024.0
for suffix in ['KB', 'MB', 'GB', 'TB', 'PB']:
size /= multiple
if size < multiple:
return '%.2f %s' % (size, suffix)
raise ValueError('number too large')
def profile(runner, args):
server = set_up_server(args)
client = get_client(args.port)
# The client library will raise ConnectionErrors until it finally connects.
while True:
try:
start_memory = get_memory_used(client)
except redis.exceptions.ConnectionError:
# Not ready to accept connections yet, so retry.
pass
else:
break
start_time = time.time()
runner(client, args)
end_time = time.time()
end_memory = get_memory_used(client)
diff_time = end_time - start_time
diff_memory = end_memory - start_memory
diff_memory_readable = approximate_size(diff_memory)
print '%s: diff_time=%.2f s, diff_memory=%d bytes (%s)' % (runner.func_name,
diff_time, diff_memory, diff_memory_readable)
tear_down_server(server)
def set_runner(fill, client, args):
for i in xrange(args.num_sets):
key = 's:%d' % i
pipeline = client.pipeline()
fill(pipeline, key)
pipeline.execute()
def set_ints(client, args):
def fill(pipeline, key):
for i in xrange(args.set_size):
pipeline.sadd(key, i * 5)
set_runner(fill, client, args)
def set_uuids(client, args):
def fill(pipeline, key):
for i in xrange(args.set_size):
u = uuid.uuid4()
pipeline.sadd(key, u.hex)
set_runner(fill, client, args)
def zset_ints(client, args):
def fill(pipeline, key):
for i in xrange(args.set_size):
pipeline.zadd(key, i * 5, i / 10)
set_runner(fill, client, args)
def zset_uuids(client, args):
def fill(pipeline, key):
for i in xrange(args.set_size):
u = uuid.uuid4()
pipeline.zadd(key, u.hex, i / 10)
set_runner(fill, client, args)
def standard_map_add(client, args):
# For a relevant comparison in timing to compact_map_add, don't pipeline.
for i in xrange(args.hash_size):
uuid_bytes = uuid.uuid4().bytes
key = 'id:%s' % uuid_bytes
client.set(uuid_bytes, 'foo')
def compact_map_add(client, args):
threshold = 500
for i in xrange(args.hash_size):
uuid_bytes = uuid.uuid4().bytes
key = 'id:%s' % uuid_bytes[0]
# Add value to hash, optimistically assuming will not need to split it.
pipeline = client.pipeline()
pipeline.hset(key, uuid_bytes[1:], 'foo')
pipeline.hlen(key)
hash_length = pipeline.execute()[1]
if hash_length == threshold:
# The hash must be split to remain compact; get its values to move.
h = client.hgetall(key)
mappings = {}
# Group values sharing the same prefix of two characters.
for remainder in h:
next_key = key + remainder[0]
next_remainder = remainder[1:]
pairs = mappings.get(next_key, None)
if pairs is None:
pairs = []
mappings[next_key] = pairs
pairs.append((next_remainder, h[remainder]))
pipeline = client.pipeline()
# Add each group of values to the hash named after their common prefix.
for next_key in mappings:
pairs = mappings[next_key]
new_values = dict(pairs)
pipeline.hmset(next_key, new_values)
# Delete the hash; any values added since retrieving it will also be
# added to the hash with a prefix of two characters.
pipeline.delete(key)
pipeline.execute()
elif hash_length > threshold:
# Assuming a concurrent setting, some client found hash_length equal to
# threshold and is now splitting it, followed by deleting it which will
# will delete the value added. Therefore also add the value to the hash
# with a prefix of two characters so it is not lost.
next_key = key + uuid_bytes[1]
client.hset(next_key, uuid_bytes[2:], 'foo')
def run():
parser = argparse.ArgumentParser(description='Perform memory benchmarks')
parser.add_argument('--num_sets', type=int, default=1000,
help='number of sets to create')
parser.add_argument('--set_size', type=int, default=500,
help='number of elements per set')
parser.add_argument('--hash_size', type=int, default=2000000,
help='number of keys to add to the hash')
parser.add_argument('--port', type=int, default=6379,
help='port of the Redis server')
parser.add_argument('--conf_path', help='path to the redis.conf file')
parser.add_argument('server_path', help='path to the redis server')
args = parser.parse_args()
# Profile sets versus zsets, and impact of set-max-intset-entries.
profile(set_ints, args)
profile(set_uuids, args)
profile(zset_ints, args)
profile(zset_uuids, args)
# Test impact of hash-max-zipmap-entries.
profile(standard_map_add, args)
profile(compact_map_add, args)
if __name__ == '__main__':
# Defining any variables here will create global variables.
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment