Skip to content

Instantly share code, notes, and snippets.

@dnkirill
Last active July 23, 2017 21:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dnkirill/ab2ae89cfc76e7e6fa9dc68581bc644e to your computer and use it in GitHub Desktop.
Save dnkirill/ab2ae89cfc76e7e6fa9dc68581bc644e to your computer and use it in GitHub Desktop.
import multiprocessing
import ipyparallel as ipp
import numpy as np
def preprocess_equalize(X, y, bins=256, cpu=multiprocessing.cpu_count()):
""" A simplified version of a function which manages multiprocessing logic.
This function always grayscales input images, though it can be generalized
to apply any arbitrary function to batches.
Args:
X: numpy array of all images in dataset.
y: a list of corresponding labels.
bins: the amount of bins to be used in histogram equalization.
cpu: the number of cpu cores to use. Default: use all.
Returns:
Numpy array of processed images and a list of labels.
"""
rc = ipp.Client()
# Use a DirectView object to broadcast imports to all engines
with rc[:].sync_imports():
import numpy
from skimage import exposure, transform, color
# Use a DirectView object to set up the amount of bins on all engines
rc[:]['num_bins'] = bins
X_processed = np.zeros(X.shape[:-1])
y_processed = np.zeros(y.shape)
# Number of batches is equal to cpu count
batches_x = np.array_split(X, cpu)
batches_y = np.array_split(y, cpu)
batches_x_y = zip(batches_x, batches_y)
# Applying our function of choice to each batch with a DirectView method
preprocessed_subs = rc[:].map(grayscale_exposure_equalize, batches_x_y).get_dict()
# Combining the output batches into a single dataset
cnt = 0
for _,v in preprocessed_subs.items():
x_, y_ = v[0], v[1]
X_processed[cnt:cnt+len(x_)] = x_
y_processed[cnt:cnt+len(y_)] = y_
cnt += len(x_)
return X_processed.reshape(X_processed.shape + (1,)), y_processed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment