crobby/mlserver-0.x.yaml

## mlserver-0.x.yaml
# Copyright 2021 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
  name: mlserver-0.x
  labels:
    name: modelmesh-serving-mlserver-0.x-SR
spec:
  supportedModelFormats:
    - name: sklearn
      version: "0" # v0.23.1
      autoSelect: true
    - name: xgboost
      version: "1" # v1.1.1
      autoSelect: true
    - name: lightgbm
      version: "3" # v3.2.1
      autoSelect: true

  multiModel: true

  grpcEndpoint: "port:8085"
  grpcDataEndpoint: "port:8001"

  containers:
    - name: mlserver
      image: quay.io/opendatahub/mlserver:0.5.2
      env:
        - name: MLSERVER_MODELS_DIR
          value: "/models/_mlserver_models/"
        - name: MLSERVER_GRPC_PORT
          value: "8001"
        # default value for HTTP port is 8080 which conflicts with MMesh's
        # Litelinks port
        - name: MLSERVER_HTTP_PORT
          value: "8002"
        - name: MLSERVER_LOAD_MODELS_AT_STARTUP
          value: "false"
        # Set a dummy model name via environment so that MLServer doesn't
        # error on a RepositoryIndex call when no models exist
        - name: MLSERVER_MODEL_NAME
          value: dummy-model-fixme
        # Set server addr to localhost to ensure MLServer only listen inside the pod
        - name: MLSERVER_HOST
          value: "127.0.0.1"
        # Increase gRPC max message size to 16 MiB to support larger payloads
        - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH
          value: "16777216"
      resources:
        requests:
          cpu: 500m
          memory: 1Gi
        limits:
          cpu: "5"
          memory: 1Gi
  builtInAdapter:
    serverType: "mlserver"
    runtimeManagementPort: 8001
    memBufferBytes: 134217728
    modelLoadingTimeoutMillis: 90000
	# Copyright 2021 IBM Corporation
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	apiVersion: serving.kserve.io/v1alpha1
	kind: ServingRuntime
	metadata:
	name: mlserver-0.x
	labels:
	name: modelmesh-serving-mlserver-0.x-SR
	spec:
	supportedModelFormats:
	- name: sklearn
	version: "0" # v0.23.1
	autoSelect: true
	- name: xgboost
	version: "1" # v1.1.1
	autoSelect: true
	- name: lightgbm
	version: "3" # v3.2.1
	autoSelect: true

	multiModel: true

	grpcEndpoint: "port:8085"
	grpcDataEndpoint: "port:8001"

	containers:
	- name: mlserver
	image: quay.io/opendatahub/mlserver:0.5.2
	env:
	- name: MLSERVER_MODELS_DIR
	value: "/models/_mlserver_models/"
	- name: MLSERVER_GRPC_PORT
	value: "8001"
	# default value for HTTP port is 8080 which conflicts with MMesh's
	# Litelinks port
	- name: MLSERVER_HTTP_PORT
	value: "8002"
	- name: MLSERVER_LOAD_MODELS_AT_STARTUP
	value: "false"
	# Set a dummy model name via environment so that MLServer doesn't
	# error on a RepositoryIndex call when no models exist
	- name: MLSERVER_MODEL_NAME
	value: dummy-model-fixme
	# Set server addr to localhost to ensure MLServer only listen inside the pod
	- name: MLSERVER_HOST
	value: "127.0.0.1"
	# Increase gRPC max message size to 16 MiB to support larger payloads
	- name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH
	value: "16777216"
	resources:
	requests:
	cpu: 500m
	memory: 1Gi
	limits:
	cpu: "5"
	memory: 1Gi
	builtInAdapter:
	serverType: "mlserver"
	runtimeManagementPort: 8001
	memBufferBytes: 134217728
	modelLoadingTimeoutMillis: 90000