peterbe/redis-buckets.py

## redis-buckets.py
#! /usr/bin/env python

import time
import redis
import random
import sys

r = redis.Redis(host='redis-store')

NUM_ENTRIES = 1_000_000
MAX_VAL = 12_000_000

buckets = int(sys.argv[1])
assert buckets > 0 and buckets < 10_000

r.flushall()
time.sleep(1)  # give it a chance to settle
# print("DBSIZE before", r.dbsize())
assert r.dbsize() == 0, r.dbsize()


t0 = time.time()
p = r.pipeline()

hmsets = {}
for i in range(0, NUM_ENTRIES):
    value = random.randint(0, MAX_VAL)
    bucket = int(i / buckets)
    if bucket not in hmsets:
        hmsets[bucket] = {}
    hmsets[bucket][i] = value

    if i and not i % (NUM_ENTRIES // 10):
        p = r.pipeline()
        for name, mapping in hmsets.items():
            r.hmset(name, mapping)
        p.execute()
        hmsets = {}
        # print(format(i, ','))

# one final clear out
p = r.pipeline()
for name, mapping in hmsets.items():
    r.hmset(name, mapping)
p.execute()
t1 = time.time()

# print("THE WHOLE THING TOOK", t1 - t0, "SECONDS")

# get size
size = int(r.info()['used_memory'])

mb = size / 1024 / 1024
# print('{} bytes, {:.2f} MB'.format(size, mb))
print('MEMORY SIZE      {:.2f} MB'.format(mb))
print("DBSIZE after    ", r.dbsize())

print('KEYS PER BUCKET ', int(NUM_ENTRIES / r.dbsize()))

import csv

with open('redisbuckets.csv', 'a') as f:
    writer = csv.writer(f)
    writer.writerow([buckets, mb])
	#! /usr/bin/env python

	import time
	import redis
	import random
	import sys

	r = redis.Redis(host='redis-store')

	NUM_ENTRIES = 1_000_000
	MAX_VAL = 12_000_000

	buckets = int(sys.argv[1])
	assert buckets > 0 and buckets < 10_000

	r.flushall()
	time.sleep(1) # give it a chance to settle
	# print("DBSIZE before", r.dbsize())
	assert r.dbsize() == 0, r.dbsize()


	t0 = time.time()
	p = r.pipeline()

	hmsets = {}
	for i in range(0, NUM_ENTRIES):
	value = random.randint(0, MAX_VAL)
	bucket = int(i / buckets)
	if bucket not in hmsets:
	hmsets[bucket] = {}
	hmsets[bucket][i] = value

	if i and not i % (NUM_ENTRIES // 10):
	p = r.pipeline()
	for name, mapping in hmsets.items():
	r.hmset(name, mapping)
	p.execute()
	hmsets = {}
	# print(format(i, ','))

	# one final clear out
	p = r.pipeline()
	for name, mapping in hmsets.items():
	r.hmset(name, mapping)
	p.execute()
	t1 = time.time()

	# print("THE WHOLE THING TOOK", t1 - t0, "SECONDS")

	# get size
	size = int(r.info()['used_memory'])

	mb = size / 1024 / 1024
	# print('{} bytes, {:.2f} MB'.format(size, mb))
	print('MEMORY SIZE {:.2f} MB'.format(mb))
	print("DBSIZE after ", r.dbsize())

	print('KEYS PER BUCKET ', int(NUM_ENTRIES / r.dbsize()))

	import csv

	with open('redisbuckets.csv', 'a') as f:
	writer = csv.writer(f)
	writer.writerow([buckets, mb])