TanayGahlot/parallelize_sklearn_inference.py

## parallelize_sklearn_inference.py
from multiprocessing import Process, Pool, Queue
from time import sleep

class Predictor(Process):
    """it wraps your prediction object."""

    def __init__(self, args, model_filepath):
        """intialize the super class and load the model from the pickle file."""
        # intialize the super class.
        super(Predictor, self).__init__(args=args)
        # load the model.
        self.classifier = pickle.load(open(model_filepath, "rb"))
        self.queue = args[0]

    def run(self):
        """this is where we perform prediction."""
        while not queue.empty():
            # load an object from the queue.
            image = self.queue.get()
            # perform inference.
            print(self.classifier.predict([image]))
            # dummy sleep to guage the effect of paralleization.
            sleep(1)


model_filepath = "svm_model.pkl"

# intialize queue and insert 100 images. You can do better by writing
# another process that acts as producer and populates the queue.
queue = Queue()
for image in data[:10]:
    queue.put(image)

# create two cosumer process that performs prediction.
predictor_1 = Predictor(args=(queue,), model_filepath=model_filepath)
predictor_2 = Predictor(args=(queue,), model_filepath=model_filepath)
# lets the prediction begin
predictor_1.start()
predictor_2.start()
# stop once queue is empty
predictor_1.join()
predictor_2.join()
	from multiprocessing import Process, Pool, Queue
	from time import sleep

	class Predictor(Process):
	"""it wraps your prediction object."""

	def __init__(self, args, model_filepath):
	"""intialize the super class and load the model from the pickle file."""
	# intialize the super class.
	super(Predictor, self).__init__(args=args)
	# load the model.
	self.classifier = pickle.load(open(model_filepath, "rb"))
	self.queue = args[0]

	def run(self):
	"""this is where we perform prediction."""
	while not queue.empty():
	# load an object from the queue.
	image = self.queue.get()
	# perform inference.
	print(self.classifier.predict([image]))
	# dummy sleep to guage the effect of paralleization.
	sleep(1)


	model_filepath = "svm_model.pkl"

	# intialize queue and insert 100 images. You can do better by writing
	# another process that acts as producer and populates the queue.
	queue = Queue()
	for image in data[:10]:
	queue.put(image)

	# create two cosumer process that performs prediction.
	predictor_1 = Predictor(args=(queue,), model_filepath=model_filepath)
	predictor_2 = Predictor(args=(queue,), model_filepath=model_filepath)
	# lets the prediction begin
	predictor_1.start()
	predictor_2.start()
	# stop once queue is empty
	predictor_1.join()
	predictor_2.join()