pantaray/acme_results_collection.py

## acme_results_collection.py
# Consider the following function
def arr_func(x, y, z=3):
    return x * (y + z)

# We want to evaluate `arr_func` for three different values of `x` and the same `y`:
xList = [np.array([1, 2, 3]), np.array([-1, -2, -3]), np.array([-1, 2, -3])]
y = np.array([4, 5, 6])

# Firing off `ParallelMap` and collecting results in memory yields
with ParallelMap(arr_func, xList, y, z=0.5, write_worker_results=False) as pmap:
    res = pmap.compute()

>>> res
[array([ 7, 14, 21]), array([ -8, -16, -24]), array([ -9,  18, -27])]

# Now, with `write_worker_results = True` (the default) ACME currently
# creates `n_inputs` HDF5 containers:
with ParallelMap(arr_func, xList, y, z=0.5) as pmap:
    res = pmap.compute()

>>> res
['/cs/home/username/ACME_20220906-1135-448825/arr_func_0.h5',
 '/cs/home/username/ACME_20220906-1135-448825/arr_func_1.h5',
 '/cs/home/username/ACME_20220906-1135-448825/arr_func_2.h5']

# How about, ACME as a new default (additionally) generates a single HDF5
# container that collects all worker-results:
>>> res
'/cs/home/username/ACME_20220906-1135-448825/arr_func.h5'

# Each worker-result can be accessed using a by-worker HDF group:
>>> h5f = h5py.File('/cs/home/username/ACME_20220906-1135-448825/arr_func.h5', 'r')
>>> h5f.keys()
<KeysViewHDF5 ['worker_0', 'worker_1', 'worker_2']>
>>> h5f['worker_0'].keys()
<KeysViewHDF5 ['result_0']>
>>> h5f['worker_1'].keys()
<KeysViewHDF5 ['result_0']>
>>> h5f['worker_2'].keys()
<KeysViewHDF5 ['result_0']>

# The actual results can then be accessed directly from the single container
>>> h5f['worker_0']['result_0'][()]
array([ 7, 14, 21])
>>> h5f['worker_1']['result_0'][()]
array([ -8, -16, -24])
>>> h5f['worker_2']['result_0'][()]
array([ -9,  18, -27])

# In addition, ACME could (optionally) support a `outShape` keyword to not
# store results in different HDF groups/datasets but collect everything in one
# array-like structure. By specifying `outShape = (None, 3)` you could tell
# ACME to stack all resulting 3-element arrays along the first dimension:
with ParallelMap(arr_func, xList, y, z=0.5, outShape=(None, 3)) as pmap:
    res = pmap.compute()
>>> h5f = h5py.File('/cs/home/username/ACME_20220906-1135-448825/arr_func.h5', 'r')
>>> h5f['result_0'][()]
array([[  7,  14,  21],
       [ -8, -16, -24],
       [ -9,  18, -27]])
	# Consider the following function
	def arr_func(x, y, z=3):
	return x * (y + z)

	# We want to evaluate `arr_func` for three different values of `x` and the same `y`:
	xList = [np.array([1, 2, 3]), np.array([-1, -2, -3]), np.array([-1, 2, -3])]
	y = np.array([4, 5, 6])

	# Firing off `ParallelMap` and collecting results in memory yields
	with ParallelMap(arr_func, xList, y, z=0.5, write_worker_results=False) as pmap:
	res = pmap.compute()

	>>> res
	[array([ 7, 14, 21]), array([ -8, -16, -24]), array([ -9, 18, -27])]

	# Now, with `write_worker_results = True` (the default) ACME currently
	# creates `n_inputs` HDF5 containers:
	with ParallelMap(arr_func, xList, y, z=0.5) as pmap:
	res = pmap.compute()

	>>> res
	['/cs/home/username/ACME_20220906-1135-448825/arr_func_0.h5',
	'/cs/home/username/ACME_20220906-1135-448825/arr_func_1.h5',
	'/cs/home/username/ACME_20220906-1135-448825/arr_func_2.h5']

	# How about, ACME as a new default (additionally) generates a single HDF5
	# container that collects all worker-results:
	>>> res
	'/cs/home/username/ACME_20220906-1135-448825/arr_func.h5'

	# Each worker-result can be accessed using a by-worker HDF group:
	>>> h5f = h5py.File('/cs/home/username/ACME_20220906-1135-448825/arr_func.h5', 'r')
	>>> h5f.keys()
	<KeysViewHDF5 ['worker_0', 'worker_1', 'worker_2']>
	>>> h5f['worker_0'].keys()
	<KeysViewHDF5 ['result_0']>
	>>> h5f['worker_1'].keys()
	<KeysViewHDF5 ['result_0']>
	>>> h5f['worker_2'].keys()
	<KeysViewHDF5 ['result_0']>

	# The actual results can then be accessed directly from the single container
	>>> h5f['worker_0']['result_0'][()]
	array([ 7, 14, 21])
	>>> h5f['worker_1']['result_0'][()]
	array([ -8, -16, -24])
	>>> h5f['worker_2']['result_0'][()]
	array([ -9, 18, -27])

	# In addition, ACME could (optionally) support a `outShape` keyword to not
	# store results in different HDF groups/datasets but collect everything in one
	# array-like structure. By specifying `outShape = (None, 3)` you could tell
	# ACME to stack all resulting 3-element arrays along the first dimension:
	with ParallelMap(arr_func, xList, y, z=0.5, outShape=(None, 3)) as pmap:
	res = pmap.compute()
	>>> h5f = h5py.File('/cs/home/username/ACME_20220906-1135-448825/arr_func.h5', 'r')
	>>> h5f['result_0'][()]
	array([[ 7, 14, 21],
	[ -8, -16, -24],
	[ -9, 18, -27]])