ahmedbesbes/concurrent_runs.py

## concurrent_runs.py
import asyncio
import PIL.Image

import bentoml
from bentoml.io import Image, Text

preprocess_runner = bentoml.Runner(MyPreprocessRunnable)
model_a_runner = bentoml.xgboost.get('model_a:latest').to_runner()
model_b_runner = bentoml.pytorch.get('model_b:latest').to_runner()

svc = bentoml.Service('inference_graph_demo', runners=[
    preprocess_runner,
    model_a_runner,
    model_b_runner
])

@svc.api(input=Image(), output=Text())
async def predict(input_image: PIL.Image.Image) -> str:
    model_input = await preprocess_runner.async_run(input_image)

    results = await asyncio.gather(
        model_a_runner.async_run(model_input),
        model_b_runner.async_run(model_input),
    )

    return post_process(
        results[0], # model a result
        results[1], # model b result
    )
	import asyncio
	import PIL.Image

	import bentoml
	from bentoml.io import Image, Text

	preprocess_runner = bentoml.Runner(MyPreprocessRunnable)
	model_a_runner = bentoml.xgboost.get('model_a:latest').to_runner()
	model_b_runner = bentoml.pytorch.get('model_b:latest').to_runner()

	svc = bentoml.Service('inference_graph_demo', runners=[
	preprocess_runner,
	model_a_runner,
	model_b_runner
	])

	@svc.api(input=Image(), output=Text())
	async def predict(input_image: PIL.Image.Image) -> str:
	model_input = await preprocess_runner.async_run(input_image)

	results = await asyncio.gather(
	model_a_runner.async_run(model_input),
	model_b_runner.async_run(model_input),
	)

	return post_process(
	results[0], # model a result
	results[1], # model b result
	)