Created
July 20, 2018 03:38
-
-
Save TanayGahlot/d1b25e76ba520a5c6934d4808c0f78b1 to your computer and use it in GitHub Desktop.
Answer to how to parallelize inference in sklearn using Python's multiprocessing?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing import Process, Pool, Queue | |
from time import sleep | |
class Predictor(Process): | |
"""it wraps your prediction object.""" | |
def __init__(self, args, model_filepath): | |
"""intialize the super class and load the model from the pickle file.""" | |
# intialize the super class. | |
super(Predictor, self).__init__(args=args) | |
# load the model. | |
self.classifier = pickle.load(open(model_filepath, "rb")) | |
self.queue = args[0] | |
def run(self): | |
"""this is where we perform prediction.""" | |
while not queue.empty(): | |
# load an object from the queue. | |
image = self.queue.get() | |
# perform inference. | |
print(self.classifier.predict([image])) | |
# dummy sleep to guage the effect of paralleization. | |
sleep(1) | |
model_filepath = "svm_model.pkl" | |
# intialize queue and insert 100 images. You can do better by writing | |
# another process that acts as producer and populates the queue. | |
queue = Queue() | |
for image in data[:10]: | |
queue.put(image) | |
# create two cosumer process that performs prediction. | |
predictor_1 = Predictor(args=(queue,), model_filepath=model_filepath) | |
predictor_2 = Predictor(args=(queue,), model_filepath=model_filepath) | |
# lets the prediction begin | |
predictor_1.start() | |
predictor_2.start() | |
# stop once queue is empty | |
predictor_1.join() | |
predictor_2.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment