Skip to content

Instantly share code, notes, and snippets.

@FrancescAlted
Last active April 22, 2018 19:03
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FrancescAlted/4595225 to your computer and use it in GitHub Desktop.
Save FrancescAlted/4595225 to your computer and use it in GitHub Desktop.
A comparison of variable length storage speed of Blaze vs PyTables
"""
Benchmark that compares the storing of objects in both Blaze and PyTables
"""
import os.path
import shutil
from time import time
import blaze
import tables
N = 500
if os.path.exists('c'):
shutil.rmtree('c')
a = [ u"s"*N*i for i in range(N) ]
t0 = time()
c = blaze.Array(a, 'x, object', params=blaze.params(storage='c', clevel=5))
print "time taken for writing in Blaze: %.3f" % (time() - t0)
t0 = time()
#c2 = blaze.open('c')
c2 = c
#print c2.datashape
tlen = 0
for i in range(N):
#print "i:", i, repr(c2[i]), type(c2[i])
tlen += len(c2[i][()])
print "time taken for reading in Blaze: %.3f" % (time() - t0)
print "tlen", tlen
# Create a VLArray:
t0 = time()
f = tables.openFile('vlarray.h5', mode='w')
vlarray = f.createVLArray(f.root, 'vlarray',
tables.ObjectAtom(),
"array of objects",
filters=tables.Filters(5))
for obj in a:
vlarray.append(obj)
f.close()
print "time taken for writing in HDF5: %.3f" % (time() - t0)
# Read the VLArray:
t0 = time()
f = tables.openFile('vlarray.h5', mode='r')
vlarray = f.root.vlarray
tlen = 0
for obj in vlarray:
tlen += len(obj)
f.close()
print "time taken for reading in HDF5: %.3f" % (time() - t0)
print "tlen", tlen
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment