Answer to how to parallelize inference in sklearn using Python's multiprocessing?
from multiprocessing import Process, Pool, Queue
from time import sleep
class Predictor(Process):
"""it wraps your prediction object."""
def __init__(self, args, model_filepath):
"""intialize the super class and load the model from the pickle file."""
# intialize the super class.
super(Predictor, self).__init__(args=args)
# load the model.
self.classifier = pickle.load(open(model_filepath, "rb"))
self.queue = args[0]
def run(self):
"""this is where we perform prediction."""
while not queue.empty():
# load an object from the queue.
image = self.queue.get()
# perform inference.
# dummy sleep to guage the effect of paralleization.
model_filepath = "svm_model.pkl"
# intialize queue and insert 100 images. You can do better by writing
# another process that acts as producer and populates the queue.
queue = Queue()
for image in data[:10]:
# create two cosumer process that performs prediction.
predictor_1 = Predictor(args=(queue,), model_filepath=model_filepath)
predictor_2 = Predictor(args=(queue,), model_filepath=model_filepath)
# lets the prediction begin
# stop once queue is empty
