Skip to content

Instantly share code, notes, and snippets.

@Dapid
Created February 4, 2020 20:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dapid/7a5cdb04c2ababbd86d6513002a37d69 to your computer and use it in GitHub Desktop.
Save Dapid/7a5cdb04c2ababbd86d6513002a37d69 to your computer and use it in GitHub Desktop.
# Run this to get the example file
import tables
import numpy as np
h5 = tables.open_file('onefile.h5', 'w', filters=tables.Filters(8, 'lzo'))
g = h5.create_group(h5.root, 'data')
h5.create_carray(g, 'data_array', obj=np.random.random((int(1e6), 20)))
h5.close()
# This crashes
import random
import multiprocessing
import tables
def read():
for _ in range(100):
h5.root.data.data_array[random.randint(0, int(1e6)-1)]
h5 = tables.open_file('onefile.h5')
read() # It works!
# Stress-test
processes = [multiprocessing.Process(target=read) for _ in range(1000)]
for p in processes:
p.start()
print('Joining')
for p in processes:
p.join()
h5.close()
# Try re-opening on each thread
import random
import multiprocessing
import tables
def read():
_h5 = tables.open_file('onefile.h5')
for _ in range(100):
_h5.root.data.data_array[random.randint(0, int(1e6)-1)]
_h5.close()
h5 = tables.open_file('onefile.h5')
read() # It works!
# Stress-test
processes = [multiprocessing.Process(target=read) for _ in range(2)]
for p in processes:
p.start()
print('Joining')
for p in processes:
p.join()
h5.close()
# In memory seems to work
import random
import multiprocessing
import tables
def read():
_h5 = tables.open_file('onefile.h5', driver='H5FD_CORE')
for _ in range(100):
_h5.root.data.data_array[random.randint(0, int(1e6)-1)]
_h5.close()
h5 = tables.open_file('onefile.h5')
read()
processes = [multiprocessing.Process(target=read) for _ in range(20)]
for p in processes:
p.start()
print('Joining')
for p in processes:
p.join()
h5.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment