Skip to content

Instantly share code, notes, and snippets.

@FrancescAlted
Created January 14, 2016 12:54
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FrancescAlted/8e87c8762a49cf5fc897 to your computer and use it in GitHub Desktop.
Save FrancescAlted/8e87c8762a49cf5fc897 to your computer and use it in GitHub Desktop.
A demonstration of a simple key-value store using numpy (.npz) and bcolz (ctable)
# Benchmark to compare the times for storing numpy arrays in a key-value.
# The main point is to compare numpy serialization vs a bcolz approach.
from __future__ import print_function
import sys
import os
import os.path
import subprocess
import getopt
import shutil
from time import time
import numpy as np
import bcolz
from bcolz.py2help import xrange
# default for options
keystore_dir = None
flavor = "numpy"
nkeys = 100
max_entries = 1e5
cname = "blosclz"
clevel = 9
# A time reference
tref = 0
class KeyStore(object):
"""Class that provides a key-value store on-disk for numpy arrays.
"""
def __init__(self, flavor, keystore_dir):
if os.path.exists(keystore_dir):
shutil.rmtree(keystore_dir)
os.mkdir(keystore_dir)
self.keystore_dir = keystore_dir
self.flavor = flavor
def __getitem__(self, key):
keypath = os.path.join(self.keystore_dir, key)
if self.flavor == "numpy":
keypath += ".npz"
if not os.path.exists(keypath):
raise KeyError
if self.flavor == "numpy":
diskobj = np.load(keypath)
elif self.flavor == "bcolz":
diskobj = bcolz.ctable(rootdir=keypath)
return (diskobj['arr1'][:], diskobj['arr2'][:])
def __setitem__(self, key, arrs):
keypath = os.path.join(self.keystore_dir, key)
if os.path.exists(keypath):
# Remove the existing entry
shutil.rmtree(keystore_dir)
arr1, arr2 = arrs
if self.flavor == "numpy":
if clevel > 0:
np.savez_compressed(keypath, arr1=arr1, arr2=arr2)
else:
np.savez(keypath, arr1=arr1, arr2=arr2)
elif self.flavor == "bcolz":
bcolz.ctable(columns=(arr1, arr2),
names=("arr1", "arr2"),
rootdir=keypath,
cparams=bcolz.cparams(clevel=clevel, cname=cname))
def show_time(explain):
"Show the used time and RSS memory (only works for Linux > 2.6.x)."
global tref
# Build the command to obtain memory info
newtref = time()
print("Time (%20s) --> %.3f" % (explain, newtref - tref), end="")
tref = newtref
print()
def enter():
global tref
tref = time()
def after_create(mess=""):
global tref
if mess: mess = ", " + mess
show_time("creation" + mess)
def after_query(mess=""):
global tref
if mess: mess = ", " + mess
show_time("query" + mess)
def test_flavor():
enter()
print("Building database. Wait please...")
keystore = KeyStore(flavor, keystore_dir)
for key in range(nkeys):
nentries = np.random.randint(0, max_entries)
arr1 = np.random.randint(0, 1000, nentries)
arr2 = 1e9 + np.random.rand(nentries)
keystore[str(key)] = (arr1, arr2)
after_create()
# Query 100 arbiratry keys
keys = np.random.randint(0, nkeys, 100)
print("Retrieving 100 keys in arbitrary order...")
elem_out = 0
for key in keys:
out = keystore[str(key)]
elem_out += len(out[0]) + len(out[1])
after_query()
return elem_out
if __name__ == "__main__":
#global flavor, keystore_dir, nkeys, max_entries, clevel, cname
usage = """\
usage: %s [-f flavor] [-d dir ][-k nkeys] [-n max_entries] [-c cname] [-l clevel]
-f select the flavor: '%s' (def.), 'bcolz'
-d The directory for doing the bench (def: '%s')
-k the number of keys in store (def. '%d')
-m the maximum number of elements in arrays (def. '%d')
-c the compressor name (def. '%s')
-l the compression level (def. %d)
""" % (sys.argv[0], flavor, keystore_dir, nkeys, max_entries, cname, clevel)
try:
opts, pargs = getopt.getopt(sys.argv[1:], 'sf:d:k:m:c:l:')
except:
sys.stderr.write(usage)
sys.exit(1)
# Get the options
for option in opts:
if option[0] == '-s':
show = True
elif option[0] == '-f':
flavor = option[1]
elif option[0] == '-d':
keystore_dir = option[1]
elif option[0] == '-k':
nkeys = int(option[1])
elif option[0] == '-m':
max_entries = int(option[1])
elif option[0] == '-c':
cname = option[1]
elif option[0] == '-l':
clevel = int(option[1])
if not keystore_dir:
raise ValueError(
"Please specify into which directory the keystore will go. "
"BEWARE: all its contents will be nuked!")
np.random.seed(12) # so as to get reproducible results
if flavor == "numpy":
mess = "numpy (via .npz files)"
elif flavor == "bcolz":
mess = "bcolz (via ctable(clevel=%d, cname='%s')" % (clevel, cname)
else:
raise ValueError()
print("########## Checking method: %s ############" % mess)
out = test_flavor()
print("Number of elements out of getitem:", out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment