Last active
July 23, 2017 21:59
-
-
Save dnkirill/ab2ae89cfc76e7e6fa9dc68581bc644e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
import ipyparallel as ipp | |
import numpy as np | |
def preprocess_equalize(X, y, bins=256, cpu=multiprocessing.cpu_count()): | |
""" A simplified version of a function which manages multiprocessing logic. | |
This function always grayscales input images, though it can be generalized | |
to apply any arbitrary function to batches. | |
Args: | |
X: numpy array of all images in dataset. | |
y: a list of corresponding labels. | |
bins: the amount of bins to be used in histogram equalization. | |
cpu: the number of cpu cores to use. Default: use all. | |
Returns: | |
Numpy array of processed images and a list of labels. | |
""" | |
rc = ipp.Client() | |
# Use a DirectView object to broadcast imports to all engines | |
with rc[:].sync_imports(): | |
import numpy | |
from skimage import exposure, transform, color | |
# Use a DirectView object to set up the amount of bins on all engines | |
rc[:]['num_bins'] = bins | |
X_processed = np.zeros(X.shape[:-1]) | |
y_processed = np.zeros(y.shape) | |
# Number of batches is equal to cpu count | |
batches_x = np.array_split(X, cpu) | |
batches_y = np.array_split(y, cpu) | |
batches_x_y = zip(batches_x, batches_y) | |
# Applying our function of choice to each batch with a DirectView method | |
preprocessed_subs = rc[:].map(grayscale_exposure_equalize, batches_x_y).get_dict() | |
# Combining the output batches into a single dataset | |
cnt = 0 | |
for _,v in preprocessed_subs.items(): | |
x_, y_ = v[0], v[1] | |
X_processed[cnt:cnt+len(x_)] = x_ | |
y_processed[cnt:cnt+len(y_)] = y_ | |
cnt += len(x_) | |
return X_processed.reshape(X_processed.shape + (1,)), y_processed |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment