Instantly share code, notes, and snippets.

Embed
What would you like to do?
from uuid import uuid4
from time import time
from datetime import datetime, timedelta
import lmdb
def get_random(size):
def get_bytes():
return bytearray(size)
return get_bytes
def get_seq():
if not hasattr(get_seq, 'index'):
get_seq.index = 0
get_seq.index += 1
return get_seq.index.to_bytes(16, byteorder='big')
def get_uuid():
return uuid4().bytes
def bench(
name,
size, # items
key_fn=get_uuid,
value_fn=get_random(150*1024), # 150 KiB
key_len=16, # 16 Bytes
value_len=150 * 1024, # 150 KiB
chunk_mem=300 * 1024 * 1024 # 300 MiB RAM
):
filename = 'test_{}_{}'.format(int(time()), name)
db_length = size * (key_len + value_len)
chunk_size = int(chunk_mem / (key_len + value_len))
print('\n{}: begin benchmark'.format(name))
print('{}: chunk size set to {} items, {} MB'.format(
name, chunk_size, round(chunk_size * (key_len*value_len) / 1025 / 1024)
))
def create():
items = 0
start = datetime.now()
print('{}: write lmdb with {} records ({}B + {}B = ~{} MB)'.format(
name, size, key_len, value_len, round(db_length / 1024 / 1024)))
db = lmdb.open(filename, map_size=1024**4)
while items < size:
with db.begin(write=True) as txn:
chunk_items = 0
while True:
txn.put(key_fn(), value_fn())
items += 1
chunk_items += 1
if chunk_items > chunk_size:
break # next chunk
duration = datetime.now() - start
print('{}: file written in {} (~{} MB/s)'.format(
name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
db.close()
def read():
start = datetime.now()
print('{}: read lmdb with {} records ({}B + {}B = ~{} MB)'.format(
name, size, key_len, value_len, round(db_length / 1024 / 1024)))
db = lmdb.open(filename, map_size=1024**4)
print('{}: LMDB stats: {}'.format(name, db.stat()))
with db.begin(write=False) as txn:
cursor = txn.cursor()
for key, value in cursor:
pass
duration = datetime.now() - start
print('{}: file read in {} (~{} MB/s)'.format(
name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
db.close()
create()
read()
bench('10k_uuid', 10000, key_fn=get_uuid)
bench('10k_seq', 10000, key_fn=get_seq)
bench('50k_uuid', 50000, key_fn=get_uuid)
bench('50k_seq', 50000, key_fn=get_seq)
bench('100k_uuid', 100000, key_fn=get_uuid)
bench('100k_seq', 100000, key_fn=get_seq)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment