Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from uuid import uuid4
from time import time
from datetime import datetime, timedelta
import lmdb
def get_random(size):
def get_bytes():
return bytearray(size)
return get_bytes
def get_seq():
if not hasattr(get_seq, 'index'):
get_seq.index = 0
get_seq.index += 1
return get_seq.index.to_bytes(16, byteorder='big')
def get_uuid():
return uuid4().bytes
def bench(
name,
size, # items
key_fn=get_uuid,
value_fn=get_random(150*1024), # 150 KiB
key_len=16, # 16 Bytes
value_len=150 * 1024, # 150 KiB
chunk_mem=300 * 1024 * 1024 # 300 MiB RAM
):
filename = 'test_{}_{}'.format(int(time()), name)
db_length = size * (key_len + value_len)
chunk_size = int(chunk_mem / (key_len + value_len))
print('\n{}: begin benchmark'.format(name))
print('{}: chunk size set to {} items, {} MB'.format(
name, chunk_size, round(chunk_size * (key_len*value_len) / 1025 / 1024)
))
def create():
items = 0
start = datetime.now()
print('{}: write lmdb with {} records ({}B + {}B = ~{} MB)'.format(
name, size, key_len, value_len, round(db_length / 1024 / 1024)))
db = lmdb.open(filename, map_size=1024**4)
while items < size:
with db.begin(write=True) as txn:
chunk_items = 0
while True:
txn.put(key_fn(), value_fn())
items += 1
chunk_items += 1
if chunk_items > chunk_size:
break # next chunk
duration = datetime.now() - start
print('{}: file written in {} (~{} MB/s)'.format(
name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
db.close()
def read():
start = datetime.now()
print('{}: read lmdb with {} records ({}B + {}B = ~{} MB)'.format(
name, size, key_len, value_len, round(db_length / 1024 / 1024)))
db = lmdb.open(filename, map_size=1024**4)
print('{}: LMDB stats: {}'.format(name, db.stat()))
with db.begin(write=False) as txn:
cursor = txn.cursor()
for key, value in cursor:
pass
duration = datetime.now() - start
print('{}: file read in {} (~{} MB/s)'.format(
name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
db.close()
create()
read()
bench('10k_uuid', 10000, key_fn=get_uuid)
bench('10k_seq', 10000, key_fn=get_seq)
bench('50k_uuid', 50000, key_fn=get_uuid)
bench('50k_seq', 50000, key_fn=get_seq)
bench('100k_uuid', 100000, key_fn=get_uuid)
bench('100k_seq', 100000, key_fn=get_seq)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment