Created
April 22, 2024 07:26
-
-
Save patrickhuy/3fbdf9c4f4d483826f838aac859ebbbb to your computer and use it in GitHub Desktop.
mlserver adaptive batching issues
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import grequests | |
#import requests | |
import json | |
from mlserver.types import InferenceResponse | |
from mlserver.codecs.string import StringRequestCodec | |
from pprint import PrettyPrinter | |
pp = PrettyPrinter(indent=1) | |
async def main(): | |
inputs = {"name": "Foo Bar", "message": "Hello from Client (REST)!"} | |
inputs2 = {"name": "Foo Bar 2", "message": "Hello from Client (REST) 2!"} | |
# NOTE: this uses characters rather than encoded bytes. It is recommended that you use the `mlserver` types to assist in the correct encoding. | |
inputs_string = json.dumps(inputs) | |
inputs_string2 = json.dumps(inputs2) | |
inference_request = { | |
"inputs": [ | |
{ | |
"name": "echo_request", | |
"shape": [len(inputs_string)], | |
"datatype": "BYTES", | |
"data": [inputs_string], | |
} | |
] | |
} | |
inference_request2 = { | |
"inputs": [ | |
{ | |
"name": "echo_request", | |
"shape": [len(inputs_string2)], | |
"datatype": "BYTES", | |
"data": [inputs_string2], | |
} | |
] | |
} | |
print("Sending request to server...") | |
endpoint = "http://localhost:8080/v2/models/json-hello-world/infer" | |
# make post request asynchonously to send both requests at once | |
t1 = grequests.post(endpoint, json=inference_request) | |
t2 = grequests.post(endpoint, json=inference_request2) | |
[response,response2] = grequests.map([t1, t2]) | |
print(f"full response:\n") | |
print(response) | |
print(response.text) | |
# retrieve text output as dictionary | |
inference_response = InferenceResponse.parse_raw(response.text) | |
raw_json = StringRequestCodec.decode_response(inference_response) | |
print (f"raw_json: {raw_json}") | |
output = json.loads(raw_json[0]) | |
print(f"\ndata part:\n") | |
pp.pprint(output) | |
print(f"full response2:\n") | |
print(response2) | |
print(response2.text) | |
# retrieve text output as dictionary | |
inference_response2 = InferenceResponse.parse_raw(response2.text) | |
raw_json2 = StringRequestCodec.decode_response(inference_response2) | |
print (f"raw_json2: {raw_json2}") | |
output2 = json.loads(raw_json2[0]) | |
print(f"\ndata part:\n") | |
pp.pprint(output2) | |
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from typing import Dict, Any | |
from mlserver import MLModel, types | |
from mlserver.codecs import StringCodec | |
from pprint import PrettyPrinter | |
class JsonHelloWorldModel(MLModel): | |
async def load(self) -> bool: | |
# Perform additional custom initialization here. | |
print("Initialize model") | |
# Set readiness flag for model | |
return await super().load() | |
async def unload(self) -> bool: | |
return await super().unload() | |
async def predict(self, payload: types.InferenceRequest) -> types.InferenceResponse: | |
print("Received request", json.dumps(payload.dict(), indent=2)) | |
request = self._extract_json(payload) | |
print (f"Extracted request: {request}") | |
response = map(lambda x: {"request": x, "server_response": "Got your request. Hello from the server."}, request) | |
response_bytes = list(map(lambda x: json.dumps(x).encode("UTF-8"), response)) | |
response_total_bytes_len = sum(map(len, response_bytes)) | |
inferenceResponse = types.InferenceResponse( | |
id=payload.id, | |
model_name=self.name, | |
model_version=self.version, | |
outputs=[ | |
types.ResponseOutput( | |
name="echo_response", | |
shape=[response_total_bytes_len], | |
datatype="BYTES", | |
data=response_bytes, | |
parameters=types.Parameters(content_type="str"), | |
) | |
], | |
) | |
print (f"Returning response: {inferenceResponse.json()}") | |
return inferenceResponse | |
def _extract_json(self, payload: types.InferenceRequest) -> list[Dict[str, Any]]: | |
for inp in payload.inputs: | |
decoded = self.decode(inp, default_codec=StringCodec) | |
PrettyPrinter(indent=1).pprint(decoded) | |
return list(map(lambda x: json.loads(x), decoded)) | |
return list() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "json-hello-world", | |
"implementation": "jsonmodels.JsonHelloWorldModel", | |
"max_batch_size": 2, | |
"max_batch_time": 0.1 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sending request to server... | |
full response: | |
<Response [200]> | |
{"model_name":"json-hello-world","id":"d39e4ffc-f5b1-4e6c-b0c2-0732144d589c","parameters":{},"outputs":[{"name":"echo_response","shape":[59],"datatype":"BYTES","parameters":{"content_type":"str"},"data":["{\"request\": {\"name\": \"Foo Bar\", \"message\": \"Hello from Client (REST)!\"}, \"server_response\": \"Got your request. Hello from the server.\"}","{\"request\": {\"name\": \"Foo Bar 2\", \"message\": \"Hello from Client (REST) 2!\"}, \"server_response\": \"Got your request. Hello from the server.\"}"]}]} | |
raw_json: ['{"request": {"name": "Foo Bar", "message": "Hello from Client (REST)!"}, "server_response": "Got your request. Hello from the server."}', '{"request": {"name": "Foo Bar 2", "message": "Hello from Client (REST) 2!"}, "server_response": "Got your request. Hello from the server."}'] | |
data part: | |
{'request': {'message': 'Hello from Client (REST)!', 'name': 'Foo Bar'}, | |
'server_response': 'Got your request. Hello from the server.'} | |
full response2: | |
<Response [200]> | |
{"model_name":"json-hello-world","id":"17869200-b99e-4345-a960-cbb45a1eb930","parameters":{},"outputs":[{"name":"echo_response","shape":[63],"datatype":"BYTES","parameters":{"content_type":"str"},"data":[]}]} | |
raw_json2: [] | |
Traceback (most recent call last): | |
File "/Users/D064633/SAPDevelop/mlserver-marian/client.py", line 76, in <module> | |
asyncio.run(main()) | |
File "/opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py", line 190, in run | |
return runner.run(main) | |
^^^^^^^^^^^^^^^^ | |
File "/opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py", line 118, in run | |
return self._loop.run_until_complete(task) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete | |
return future.result() | |
^^^^^^^^^^^^^^^ | |
File "/Users/D064633/SAPDevelop/mlserver-marian/client.py", line 70, in main | |
output2 = json.loads(raw_json2[0]) | |
~~~~~~~~~^^^ | |
IndexError: list index out of range |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Initialize model | |
2024-04-22 09:24:27,117 [mlserver][json-hello-world] INFO - Loaded model 'json-hello-world' successfully. | |
2024-04-22 09:24:27,118 [mlserver][json-hello-world] INFO - Loaded model 'json-hello-world' successfully. | |
Received request { | |
"parameters": { | |
"headers": { | |
"host": "localhost:8080", | |
"user-agent": "python-requests/2.31.0", | |
"accept-encoding": "gzip, deflate, br", | |
"accept": "*/*", | |
"connection": "keep-alive", | |
"content-length": "159", | |
"content-type": "application/json", | |
"Ce-Specversion": "0.3", | |
"Ce-Source": "io.seldon.serving.deployment.mlserver", | |
"Ce-Type": "io.seldon.serving.inference.request", | |
"Ce-Modelid": "json-hello-world", | |
"Ce-Inferenceservicename": "mlserver", | |
"Ce-Endpoint": "json-hello-world", | |
"Ce-Id": "17869200-b99e-4345-a960-cbb45a1eb930", | |
"Ce-Requestid": "17869200-b99e-4345-a960-cbb45a1eb930" | |
} | |
}, | |
"inputs": [ | |
{ | |
"name": "echo_request", | |
"shape": [ | |
122 | |
], | |
"datatype": "BYTES", | |
"data": [ | |
"{\"name\": \"Foo Bar\", \"message\": \"Hello from Client (REST)!\"}", | |
"{\"name\": \"Foo Bar 2\", \"message\": \"Hello from Client (REST) 2!\"}" | |
] | |
} | |
] | |
} | |
['{"name": "Foo Bar", "message": "Hello from Client (REST)!"}', | |
'{"name": "Foo Bar 2", "message": "Hello from Client (REST) 2!"}'] | |
Extracted request: [{'name': 'Foo Bar', 'message': 'Hello from Client (REST)!'}, {'name': 'Foo Bar 2', 'message': 'Hello from Client (REST) 2!'}] | |
Returning response: {"model_name": "json-hello-world", "outputs": [{"name": "echo_response", "shape": [274], "datatype": "BYTES", "parameters": {"content_type": "str"}, "data": ["{\"request\": {\"name\": \"Foo Bar\", \"message\": \"Hello from Client (REST)!\"}, \"server_response\": \"Got your request. Hello from the server.\"}", "{\"request\": {\"name\": \"Foo Bar 2\", \"message\": \"Hello from Client (REST) 2!\"}, \"server_response\": \"Got your request. Hello from the server.\"}"]}]} | |
INFO: 127.0.0.1:52497 - "POST /v2/models/json-hello-world/infer HTTP/1.1" 200 OK | |
INFO: 127.0.0.1:52498 - "POST /v2/models/json-hello-world/infer HTTP/1.1" 200 OK |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment