Skip to content

Instantly share code, notes, and snippets.

@achalddave
Last active April 24, 2016 19:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save achalddave/8e9490efd348cdcabf21232358ec55eb to your computer and use it in GitHub Desktop.
Save achalddave/8e9490efd348cdcabf21232358ec55eb to your computer and use it in GitHub Desktop.
import argparse
import logging
import random
import os
import lmdb
from tqdm import tqdm
logging.getLogger().setLevel(logging.INFO)
logging.basicConfig(format='%(asctime)s.%(msecs).03d: %(message)s',
datefmt='%H:%M:%S')
# Allow adding @profile decorator even when kernprof isn't being used.
try:
profile
except NameError:
profile = lambda x: x
def mkdir_p(path):
"""Taken from <http://stackoverflow.com/a/600612/1291812>"""
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
@profile
def main():
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('lmdb_path')
parser.add_argument('tmpdir',
help='LMDB values will be stored in tmpdir/key')
args = parser.parse_args()
lmdb_path = args.lmdb_path
logging.info('Reading keys from LMDB.')
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
with lmdb_environment.begin() as transaction:
cursor = transaction.cursor()
keys = [key for (key, _) in cursor]
# Copy keys; we do this in a separate line so we can profile the
# above line separately.
keys = [str(key) for key in keys]
logging.info('Read keys from LMDB.')
logging.info('Writing key-values to disk as files.')
mkdir_p(args.tmpdir)
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
with lmdb_environment.begin() as transaction:
cursor = transaction.cursor()
for key in tqdm(keys):
with open('{}/{}'.format(args.tmpdir, key), 'wb') as f:
f.write(cursor.get(key))
logging.info('Wrote key-values to disk as files.')
# Randomly shuffle the keys, then search for them in the database.
random.shuffle(keys)
logging.info('Reading keys in random order from LMDB.')
with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
with lmdb_environment.begin() as transaction:
cursor = transaction.cursor()
for key in tqdm(keys):
cursor.get(key)
logging.info('Reading keys in random order from file system.')
for key in tqdm(keys):
filename = '{}/{}'.format(args.tmpdir, key)
with open(filename) as f:
f.read()
logging.info('Finished reading keys.')
if __name__ == "__main__":
main()
Timer unit: 1e-06 s
Total time: 3852.08 s
File: time_lmdb.py
Function: main at line 30
Line # Hits Time Per Hit % Time Line Contents
==============================================================
30 @profile
31 def main():
32 1 2 2.0 0.0 parser = argparse.ArgumentParser(
33 1 1 1.0 0.0 description=__doc__,
34 1 1482 1482.0 0.0 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
35 1 59 59.0 0.0 parser.add_argument('lmdb_path')
36 1 1 1.0 0.0 parser.add_argument('tmpdir',
37 1 48 48.0 0.0 help='LMDB values will be stored in tmpdir/key')
38
39 1 550 550.0 0.0 args = parser.parse_args()
40 1 1 1.0 0.0 lmdb_path = args.lmdb_path
41
42 1 144 144.0 0.0 logging.info('Reading keys from LMDB.')
43 1 63 63.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
44 1 4 4.0 0.0 with lmdb_environment.begin() as transaction:
45 1 4 4.0 0.0 cursor = transaction.cursor()
46 138135 354683674 2567.7 9.2 keys = [key for (key, _) in cursor]
47 # Copy keys; we do this in a separate line so we can profile the
48 # above line separately.
49 138135 610476 4.4 0.0 keys = [str(key) for key in keys]
50 1 301 301.0 0.0 logging.info('Read keys from LMDB.')
51
52 1 141 141.0 0.0 logging.info('Writing key-values to disk as files.')
53 1 10310 10310.0 0.0 mkdir_p(args.tmpdir)
54 1 48 48.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
55 1 3 3.0 0.0 with lmdb_environment.begin() as transaction:
56 1 2 2.0 0.0 cursor = transaction.cursor()
57 138135 1400936 10.1 0.0 for key in tqdm(keys):
58 138134 7496652 54.3 0.2 with open('{}/{}'.format(args.tmpdir, key), 'wb') as f:
59 138134 645682628 4674.3 16.8 f.write(cursor.get(key))
60 1 252 252.0 0.0 logging.info('Wrote key-values to disk as files.')
61
62 # Randomly shuffle the keys, then search for them in the database.
63 1 117269 117269.0 0.0 random.shuffle(keys)
64
65 1 178 178.0 0.0 logging.info('Reading keys in random order from LMDB.')
66 1 67 67.0 0.0 with lmdb.open(lmdb_path, readonly=True) as lmdb_environment:
67 1 5 5.0 0.0 with lmdb_environment.begin() as transaction:
68 1 2 2.0 0.0 cursor = transaction.cursor()
69 138135 2275492 16.5 0.1 for key in tqdm(keys):
70 138134 1316020659 9527.1 34.2 cursor.get(key)
71
72 1 265 265.0 0.0 logging.info('Reading keys in random order from file system.')
73 138135 2024011 14.7 0.1 for key in tqdm(keys):
74 138134 349713 2.5 0.0 filename = '{}/{}'.format(args.tmpdir, key)
75 138134 3150209 22.8 0.1 with open(filename) as f:
76 138134 1518250676 10991.1 39.4 f.read()
77
78 1 229 229.0 0.0 logging.info('Finished reading keys.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment