Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Answer to how to parallelize inference in sklearn using Python's multiprocessing?
from multiprocessing import Process, Pool, Queue
from time import sleep
class Predictor(Process):
"""it wraps your prediction object."""
def __init__(self, args, model_filepath):
"""intialize the super class and load the model from the pickle file."""
# intialize the super class.
super(Predictor, self).__init__(args=args)
# load the model.
self.classifier = pickle.load(open(model_filepath, "rb"))
self.queue = args[0]
def run(self):
"""this is where we perform prediction."""
while not queue.empty():
# load an object from the queue.
image = self.queue.get()
# perform inference.
print(self.classifier.predict([image]))
# dummy sleep to guage the effect of paralleization.
sleep(1)
model_filepath = "svm_model.pkl"
# intialize queue and insert 100 images. You can do better by writing
# another process that acts as producer and populates the queue.
queue = Queue()
for image in data[:10]:
queue.put(image)
# create two cosumer process that performs prediction.
predictor_1 = Predictor(args=(queue,), model_filepath=model_filepath)
predictor_2 = Predictor(args=(queue,), model_filepath=model_filepath)
# lets the prediction begin
predictor_1.start()
predictor_2.start()
# stop once queue is empty
predictor_1.join()
predictor_2.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment