Skip to content

Instantly share code, notes, and snippets.

@patrickhuy
Created April 22, 2024 07:26
Show Gist options
  • Save patrickhuy/3fbdf9c4f4d483826f838aac859ebbbb to your computer and use it in GitHub Desktop.
Save patrickhuy/3fbdf9c4f4d483826f838aac859ebbbb to your computer and use it in GitHub Desktop.
mlserver adaptive batching issues
import asyncio
import grequests
#import requests
import json
from mlserver.types import InferenceResponse
from mlserver.codecs.string import StringRequestCodec
from pprint import PrettyPrinter
pp = PrettyPrinter(indent=1)
async def main():
inputs = {"name": "Foo Bar", "message": "Hello from Client (REST)!"}
inputs2 = {"name": "Foo Bar 2", "message": "Hello from Client (REST) 2!"}
# NOTE: this uses characters rather than encoded bytes. It is recommended that you use the `mlserver` types to assist in the correct encoding.
inputs_string = json.dumps(inputs)
inputs_string2 = json.dumps(inputs2)
inference_request = {
"inputs": [
{
"name": "echo_request",
"shape": [len(inputs_string)],
"datatype": "BYTES",
"data": [inputs_string],
}
]
}
inference_request2 = {
"inputs": [
{
"name": "echo_request",
"shape": [len(inputs_string2)],
"datatype": "BYTES",
"data": [inputs_string2],
}
]
}
print("Sending request to server...")
endpoint = "http://localhost:8080/v2/models/json-hello-world/infer"
# make post request asynchonously to send both requests at once
t1 = grequests.post(endpoint, json=inference_request)
t2 = grequests.post(endpoint, json=inference_request2)
[response,response2] = grequests.map([t1, t2])
print(f"full response:\n")
print(response)
print(response.text)
# retrieve text output as dictionary
inference_response = InferenceResponse.parse_raw(response.text)
raw_json = StringRequestCodec.decode_response(inference_response)
print (f"raw_json: {raw_json}")
output = json.loads(raw_json[0])
print(f"\ndata part:\n")
pp.pprint(output)
print(f"full response2:\n")
print(response2)
print(response2.text)
# retrieve text output as dictionary
inference_response2 = InferenceResponse.parse_raw(response2.text)
raw_json2 = StringRequestCodec.decode_response(inference_response2)
print (f"raw_json2: {raw_json2}")
output2 = json.loads(raw_json2[0])
print(f"\ndata part:\n")
pp.pprint(output2)
asyncio.run(main())
import json
from typing import Dict, Any
from mlserver import MLModel, types
from mlserver.codecs import StringCodec
from pprint import PrettyPrinter
class JsonHelloWorldModel(MLModel):
async def load(self) -> bool:
# Perform additional custom initialization here.
print("Initialize model")
# Set readiness flag for model
return await super().load()
async def unload(self) -> bool:
return await super().unload()
async def predict(self, payload: types.InferenceRequest) -> types.InferenceResponse:
print("Received request", json.dumps(payload.dict(), indent=2))
request = self._extract_json(payload)
print (f"Extracted request: {request}")
response = map(lambda x: {"request": x, "server_response": "Got your request. Hello from the server."}, request)
response_bytes = list(map(lambda x: json.dumps(x).encode("UTF-8"), response))
response_total_bytes_len = sum(map(len, response_bytes))
inferenceResponse = types.InferenceResponse(
id=payload.id,
model_name=self.name,
model_version=self.version,
outputs=[
types.ResponseOutput(
name="echo_response",
shape=[response_total_bytes_len],
datatype="BYTES",
data=response_bytes,
parameters=types.Parameters(content_type="str"),
)
],
)
print (f"Returning response: {inferenceResponse.json()}")
return inferenceResponse
def _extract_json(self, payload: types.InferenceRequest) -> list[Dict[str, Any]]:
for inp in payload.inputs:
decoded = self.decode(inp, default_codec=StringCodec)
PrettyPrinter(indent=1).pprint(decoded)
return list(map(lambda x: json.loads(x), decoded))
return list()
{
"name": "json-hello-world",
"implementation": "jsonmodels.JsonHelloWorldModel",
"max_batch_size": 2,
"max_batch_time": 0.1
}
Sending request to server...
full response:
<Response [200]>
{"model_name":"json-hello-world","id":"d39e4ffc-f5b1-4e6c-b0c2-0732144d589c","parameters":{},"outputs":[{"name":"echo_response","shape":[59],"datatype":"BYTES","parameters":{"content_type":"str"},"data":["{\"request\": {\"name\": \"Foo Bar\", \"message\": \"Hello from Client (REST)!\"}, \"server_response\": \"Got your request. Hello from the server.\"}","{\"request\": {\"name\": \"Foo Bar 2\", \"message\": \"Hello from Client (REST) 2!\"}, \"server_response\": \"Got your request. Hello from the server.\"}"]}]}
raw_json: ['{"request": {"name": "Foo Bar", "message": "Hello from Client (REST)!"}, "server_response": "Got your request. Hello from the server."}', '{"request": {"name": "Foo Bar 2", "message": "Hello from Client (REST) 2!"}, "server_response": "Got your request. Hello from the server."}']
data part:
{'request': {'message': 'Hello from Client (REST)!', 'name': 'Foo Bar'},
'server_response': 'Got your request. Hello from the server.'}
full response2:
<Response [200]>
{"model_name":"json-hello-world","id":"17869200-b99e-4345-a960-cbb45a1eb930","parameters":{},"outputs":[{"name":"echo_response","shape":[63],"datatype":"BYTES","parameters":{"content_type":"str"},"data":[]}]}
raw_json2: []
Traceback (most recent call last):
File "/Users/D064633/SAPDevelop/mlserver-marian/client.py", line 76, in <module>
asyncio.run(main())
File "/opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py", line 190, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "/Users/D064633/SAPDevelop/mlserver-marian/client.py", line 70, in main
output2 = json.loads(raw_json2[0])
~~~~~~~~~^^^
IndexError: list index out of range
Initialize model
2024-04-22 09:24:27,117 [mlserver][json-hello-world] INFO - Loaded model 'json-hello-world' successfully.
2024-04-22 09:24:27,118 [mlserver][json-hello-world] INFO - Loaded model 'json-hello-world' successfully.
Received request {
"parameters": {
"headers": {
"host": "localhost:8080",
"user-agent": "python-requests/2.31.0",
"accept-encoding": "gzip, deflate, br",
"accept": "*/*",
"connection": "keep-alive",
"content-length": "159",
"content-type": "application/json",
"Ce-Specversion": "0.3",
"Ce-Source": "io.seldon.serving.deployment.mlserver",
"Ce-Type": "io.seldon.serving.inference.request",
"Ce-Modelid": "json-hello-world",
"Ce-Inferenceservicename": "mlserver",
"Ce-Endpoint": "json-hello-world",
"Ce-Id": "17869200-b99e-4345-a960-cbb45a1eb930",
"Ce-Requestid": "17869200-b99e-4345-a960-cbb45a1eb930"
}
},
"inputs": [
{
"name": "echo_request",
"shape": [
122
],
"datatype": "BYTES",
"data": [
"{\"name\": \"Foo Bar\", \"message\": \"Hello from Client (REST)!\"}",
"{\"name\": \"Foo Bar 2\", \"message\": \"Hello from Client (REST) 2!\"}"
]
}
]
}
['{"name": "Foo Bar", "message": "Hello from Client (REST)!"}',
'{"name": "Foo Bar 2", "message": "Hello from Client (REST) 2!"}']
Extracted request: [{'name': 'Foo Bar', 'message': 'Hello from Client (REST)!'}, {'name': 'Foo Bar 2', 'message': 'Hello from Client (REST) 2!'}]
Returning response: {"model_name": "json-hello-world", "outputs": [{"name": "echo_response", "shape": [274], "datatype": "BYTES", "parameters": {"content_type": "str"}, "data": ["{\"request\": {\"name\": \"Foo Bar\", \"message\": \"Hello from Client (REST)!\"}, \"server_response\": \"Got your request. Hello from the server.\"}", "{\"request\": {\"name\": \"Foo Bar 2\", \"message\": \"Hello from Client (REST) 2!\"}, \"server_response\": \"Got your request. Hello from the server.\"}"]}]}
INFO: 127.0.0.1:52497 - "POST /v2/models/json-hello-world/infer HTTP/1.1" 200 OK
INFO: 127.0.0.1:52498 - "POST /v2/models/json-hello-world/infer HTTP/1.1" 200 OK
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment