4poc/lmdb_bench.py

## lmdb_bench.py
from uuid import uuid4
from time import time
from datetime import datetime, timedelta
import lmdb

def get_random(size):
    def get_bytes():
        return bytearray(size)
    return get_bytes

def get_seq():
    if not hasattr(get_seq, 'index'):
        get_seq.index = 0
    get_seq.index += 1
    return get_seq.index.to_bytes(16, byteorder='big')

def get_uuid():
    return uuid4().bytes

def bench(
        name,
        size, # items
        key_fn=get_uuid,
        value_fn=get_random(150*1024), # 150 KiB
        key_len=16, # 16 Bytes
        value_len=150 * 1024, # 150 KiB
        chunk_mem=300 * 1024 * 1024 # 300 MiB RAM
        ):
    filename = 'test_{}_{}'.format(int(time()), name)
    db_length = size * (key_len + value_len)
    chunk_size = int(chunk_mem / (key_len + value_len))

    print('\n{}: begin benchmark'.format(name))
    print('{}: chunk size set to {} items, {} MB'.format(
        name, chunk_size, round(chunk_size * (key_len*value_len) / 1025 / 1024)
        ))

    def create():
        items = 0
        start = datetime.now()
        print('{}: write lmdb with {} records ({}B + {}B = ~{} MB)'.format(
            name, size, key_len, value_len, round(db_length / 1024 / 1024)))

        db = lmdb.open(filename, map_size=1024**4)
        while items < size:
            with db.begin(write=True) as txn:
                chunk_items = 0
                while True:
                    txn.put(key_fn(), value_fn())
                    items += 1
                    chunk_items += 1
                    if chunk_items > chunk_size:
                        break # next chunk

        duration = datetime.now() - start
        print('{}: file written in {} (~{} MB/s)'.format(
            name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
        db.close()

    def read():
        start = datetime.now()
        print('{}: read lmdb with {} records ({}B + {}B = ~{} MB)'.format(
            name, size, key_len, value_len, round(db_length / 1024 / 1024)))

        db = lmdb.open(filename, map_size=1024**4)
        print('{}: LMDB stats: {}'.format(name, db.stat()))
        with db.begin(write=False) as txn:
            cursor = txn.cursor()
            for key, value in cursor:
                pass

        duration = datetime.now() - start
        print('{}: file read in {} (~{} MB/s)'.format(
            name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
        db.close()

    create()
    read()

bench('10k_uuid', 10000, key_fn=get_uuid)
bench('10k_seq', 10000, key_fn=get_seq)

bench('50k_uuid', 50000, key_fn=get_uuid)
bench('50k_seq', 50000, key_fn=get_seq)

bench('100k_uuid', 100000, key_fn=get_uuid)
bench('100k_seq', 100000, key_fn=get_seq)
	from uuid import uuid4
	from time import time
	from datetime import datetime, timedelta
	import lmdb

	def get_random(size):
	def get_bytes():
	return bytearray(size)
	return get_bytes

	def get_seq():
	if not hasattr(get_seq, 'index'):
	get_seq.index = 0
	get_seq.index += 1
	return get_seq.index.to_bytes(16, byteorder='big')

	def get_uuid():
	return uuid4().bytes

	def bench(
	name,
	size, # items
	key_fn=get_uuid,
	value_fn=get_random(150*1024), # 150 KiB
	key_len=16, # 16 Bytes
	value_len=150 * 1024, # 150 KiB
	chunk_mem=300 * 1024 * 1024 # 300 MiB RAM
	):
	filename = 'test_{}_{}'.format(int(time()), name)
	db_length = size * (key_len + value_len)
	chunk_size = int(chunk_mem / (key_len + value_len))

	print('\n{}: begin benchmark'.format(name))
	print('{}: chunk size set to {} items, {} MB'.format(
	name, chunk_size, round(chunk_size * (key_len*value_len) / 1025 / 1024)
	))

	def create():
	items = 0
	start = datetime.now()
	print('{}: write lmdb with {} records ({}B + {}B = ~{} MB)'.format(
	name, size, key_len, value_len, round(db_length / 1024 / 1024)))

	db = lmdb.open(filename, map_size=1024**4)
	while items < size:
	with db.begin(write=True) as txn:
	chunk_items = 0
	while True:
	txn.put(key_fn(), value_fn())
	items += 1
	chunk_items += 1
	if chunk_items > chunk_size:
	break # next chunk

	duration = datetime.now() - start
	print('{}: file written in {} (~{} MB/s)'.format(
	name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
	db.close()

	def read():
	start = datetime.now()
	print('{}: read lmdb with {} records ({}B + {}B = ~{} MB)'.format(
	name, size, key_len, value_len, round(db_length / 1024 / 1024)))

	db = lmdb.open(filename, map_size=1024**4)
	print('{}: LMDB stats: {}'.format(name, db.stat()))
	with db.begin(write=False) as txn:
	cursor = txn.cursor()
	for key, value in cursor:
	pass

	duration = datetime.now() - start
	print('{}: file read in {} (~{} MB/s)'.format(
	name, str(duration), round(db_length / duration.total_seconds() / 1024 / 1024)))
	db.close()

	create()
	read()

	bench('10k_uuid', 10000, key_fn=get_uuid)
	bench('10k_seq', 10000, key_fn=get_seq)

	bench('50k_uuid', 50000, key_fn=get_uuid)
	bench('50k_seq', 50000, key_fn=get_seq)

	bench('100k_uuid', 100000, key_fn=get_uuid)
	bench('100k_seq', 100000, key_fn=get_seq)