Skip to content

Instantly share code, notes, and snippets.

@emmanuelle
Created April 10, 2018 14:50
Show Gist options
  • Save emmanuelle/91db4a366496ecb13693c8b513235c55 to your computer and use it in GitHub Desktop.
Save emmanuelle/91db4a366496ecb13693c8b513235c55 to your computer and use it in GitHub Desktop.
Apply function in parallel to overlapping chunks of an array, for example for image processing
import numpy as np
from sklearn.externals.joblib import Parallel, delayed
def apply_parallel(func, data, *args, chunk=100, overlap=10, n_jobs=4,
**kwargs):
"""
Apply a function in parallel to overlapping chunks of an array.
joblib is used for parallel processing.
Parameters
----------
func : function
name of function. Its first argument needs to be ``data``
data : ndarray
data to be chunked
chunk : int
chunk size (default value 100)
overlap : int
size of overlap between consecutive chunks
n_jobs : int
number of jobs to be used by joblib for parallel processing
*args, **kwargs : other arguments to be passed to func
Examples
--------
>>> from skimage import data, filters
>>> coins = data.coins()
>>> res = apply_parallel(filters.gaussian, coins, 2)
"""
sh0 = data.shape[0]
nb_chunks = sh0 // chunk
end_chunk = sh0 % chunk
arg_list = [data[max(0, i*chunk - overlap):
min((i+1)*chunk + overlap, sh0)]
for i in range(0, nb_chunks)]
if end_chunk > 0:
arg_list.append(data[-end_chunk - overlap:])
res_list = Parallel(n_jobs=n_jobs)(delayed(func)(sub_im, *args, **kwargs)
for sub_im in arg_list)
output_dtype = res_list[0].dtype
out_data = np.empty(data.shape, dtype=output_dtype)
for i in range(1, nb_chunks):
out_data[i*chunk:(i+1)*chunk] = res_list[i][overlap:overlap+chunk]
out_data[:chunk] = res_list[0][:-overlap]
if end_chunk > 0:
out_data[-end_chunk:] = res_list[-1][overlap:]
return out_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment