From 3e755a909cbdcbadb6fb86cbd48c83ae46e68f9a Mon Sep 17 00:00:00 2001 From: Rafael Vasquez Date: Wed, 13 Mar 2024 15:52:36 -0400 Subject: [PATCH] feat: Update Triton model support (#485) #### Motivation Triton introduced [support for more model frameworks last year](https://developer.nvidia.com/blog/real-time-serving-for-xgboost-scikit-learn-randomforest-lightgbm-and-more/) and can support xgboost, lightgbm, and more. This PR adds examples and docs to advertise this. #### Modifications - Add newly supported models to Triton runtime config, setting `autoSelect: false`. - Add an example ISVC config for Triton-served XGBoost model. - Update example-models doc to reflect example models added in https://github.com/kserve/modelmesh-minio-examples/pull/7 - Update model-formats README to reflect framework support and framework-specific docs to show example ISVC using Triton. - Add FVTs for lightgbm and xgboost deployment on Triton runtime #### Result Closes #185 --------- Signed-off-by: Rafael Vasquez Signed-off-by: Rafael Vasquez --- .../example-triton-xgboost-isvc.yaml | 28 +++++ config/runtimes/triton-2.x.yaml | 9 ++ docs/example-models.md | 39 +++++++ docs/model-formats/README.md | 22 ++-- docs/model-formats/lightgbm.md | 31 ++++- docs/model-formats/xgboost.md | 31 ++++- fvt/inference.go | 40 +++++++ fvt/predictor/predictor_test.go | 109 ++++++++++++++++++ .../predictors/lightgbm-fil-predictor.yaml | 26 +++++ .../predictors/xgboost-fil-predictor.yaml | 26 +++++ fvt/utils.go | 6 + 11 files changed, 354 insertions(+), 13 deletions(-) create mode 100644 config/example-isvcs/example-triton-xgboost-isvc.yaml create mode 100644 fvt/testdata/predictors/lightgbm-fil-predictor.yaml create mode 100644 fvt/testdata/predictors/xgboost-fil-predictor.yaml diff --git a/config/example-isvcs/example-triton-xgboost-isvc.yaml b/config/example-isvcs/example-triton-xgboost-isvc.yaml new file mode 100644 index 00000000..ebadae46 --- /dev/null +++ b/config/example-isvcs/example-triton-xgboost-isvc.yaml @@ -0,0 +1,28 @@ +# Copyright 2022 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: example-xgboost-mushroom-fil + annotations: + serving.kserve.io/deploymentMode: ModelMesh +spec: + predictor: + model: + modelFormat: + name: xgboost + runtime: triton-2.x + storage: + key: localMinIO + path: xgboost/mushroom-fil diff --git a/config/runtimes/triton-2.x.yaml b/config/runtimes/triton-2.x.yaml index 9b1cd614..b03b70f9 100644 --- a/config/runtimes/triton-2.x.yaml +++ b/config/runtimes/triton-2.x.yaml @@ -39,6 +39,15 @@ spec: - name: tensorrt version: "7" # 7.2.1 autoSelect: true + - name: sklearn + version: "0" # v0.23.1 + autoSelect: false + - name: xgboost + version: "1" # v1.1.1 + autoSelect: false + - name: lightgbm + version: "3" # v3.2.1 + autoSelect: false protocolVersions: - grpc-v2 diff --git a/docs/example-models.md b/docs/example-models.md index 5e679d04..3daca7c1 100644 --- a/docs/example-models.md +++ b/docs/example-models.md @@ -28,6 +28,10 @@ s3://modelmesh-example-models/ │ └── mnist.h5 ├── lightgbm │ └── mushroom.bst +│ └── mushroom-fil +│ ├── 1 +│ │ └── model.txt +│ └── config.pbtxt ├── onnx │ └── mnist.onnx ├── pytorch @@ -45,6 +49,10 @@ s3://modelmesh-example-models/ │ └── variables.index └── xgboost └── mushroom.json + └── mushroom-fil + ├── 1 + │ └── xgboost.json + └── config.pbtxt ``` ### Example Inference Requests @@ -277,3 +285,34 @@ Response: ] } ``` + +#### XGBoost (Triton FIL): + +This is a sample inference request to an XGBoost model trained on a [mushroom dataset](https://archive.ics.uci.edu/ml/datasets/Mushroom) and served using the [FIL backend for Triton](https://github.com/triton-inference-server/fil_backend): + +```shell +MODEL_NAME=example-xgboost-mushroom-fil +grpcurl \ + -plaintext \ + -proto fvt/proto/kfs_inference_v2.proto \ + -d '{ "model_name": "'"${MODEL_NAME}"'", "inputs": [{ "name": "input__0", "shape": [1, 126], "datatype": "FP32", "contents": { "fp32_contents": [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0] }}]}' \ + localhost:8033 \ + inference.GRPCInferenceService.ModelInfer +``` + +Response: + +```json +{ + "modelName": "example-xgboost-mushroom-fil__isvc-ffe6a3f20b", + "modelVersion": "1", + "outputs": [ + { + "name": "output__0", + "datatype": "FP32", + "shape": ["1"] + } + ], + "rawOutputContents": ["B1xLPA=="] +} +``` diff --git a/docs/model-formats/README.md b/docs/model-formats/README.md index a5c128df..db4a288e 100644 --- a/docs/model-formats/README.md +++ b/docs/model-formats/README.md @@ -11,16 +11,16 @@ By leveraging existing third-party model servers, we support a number of standar - [TensorFlow](tensorflow.md) - [XGBoost](xgboost.md) -| Model Type | Framework | Supported via ServingRuntime | -| ----------- | ---------------- | ---------------------------- | -| keras | TensorFlow | Triton (C++) | -| lightgbm | LightGBM | MLServer (python) | -| onnx | ONNX | Triton (C++), OVMS (C++) | -| openvino_ir | Intel OpenVINO\* | OVMS (C++) | -| pytorch | PyTorch | Triton (C++) | -| sklearn | scikit-learn | MLServer (python) | -| tensorflow | TensorFlow | Triton (C++) | -| xgboost | XGBoost | MLServer (python) | -| any | Custom | [Custom](../runtimes) (any) | +| Model Type | Framework | Supported via ServingRuntime | +| ----------- | ---------------- | ------------------------------- | +| keras | TensorFlow | Triton (C++) | +| lightgbm | LightGBM | MLServer (python), Triton (C++) | +| onnx | ONNX | Triton (C++), OVMS (C++) | +| openvino_ir | Intel OpenVINO\* | OVMS (C++) | +| pytorch | PyTorch | Triton (C++) | +| sklearn | scikit-learn | MLServer (python), Triton (C++) | +| tensorflow | TensorFlow | Triton (C++) | +| xgboost | XGBoost | MLServer (python), Triton (C++) | +| any | Custom | [Custom](../runtimes) (any) | (\*)Many ML frameworks can have models converted to the OpenVINO IR format, such as Caffe, TensorFlow, MXNet, PaddlePaddle and ONNX, doc [here](https://docs.openvino.ai/latest/ovms_what_is_openvino_model_server.html). diff --git a/docs/model-formats/lightgbm.md b/docs/model-formats/lightgbm.md index 9e5db399..ce0811b6 100644 --- a/docs/model-formats/lightgbm.md +++ b/docs/model-formats/lightgbm.md @@ -32,11 +32,18 @@ The storage path can point directly to a serialized model ``` s3://modelmesh-example-models/ -└── lightgbm/mushroom.bst +└── lightgbm + └── mushroom.bst + └── mushroom-fil + ├── 1 + │ └── model.txt + └── config.pbtxt ``` **InferenceService** +For MLServer: + ```yaml kind: InferenceService metadata: @@ -54,3 +61,25 @@ spec: parameters: bucket: modelmesh-example-models ``` + +For Triton: + +```yaml +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: lightgbm-example + annotations: + serving.kserve.io/deploymentMode: ModelMesh +spec: + predictor: + model: + modelFormat: + name: lightgbm + runtime: triton-2.x + storage: + key: localMinIO + path: lightgbm/lightgbm-fil + parameters: + bucket: modelmesh-example-models +``` diff --git a/docs/model-formats/xgboost.md b/docs/model-formats/xgboost.md index 9294162b..ece1777c 100644 --- a/docs/model-formats/xgboost.md +++ b/docs/model-formats/xgboost.md @@ -33,11 +33,18 @@ The storage path can point directly to a serialized model ``` s3://modelmesh-example-models/ -└── xgboost/mushroom.json +└── xgboost + └── mushroom.json + └── mushroom-fil + ├── 1 + │ └── xgboost.json + └── config.pbtxt ``` **InferenceService** +If using MLServer: + ```yaml apiVersion: serving.kserve.io/v1beta1 kind: InferenceService @@ -56,3 +63,25 @@ spec: parameters: bucket: modelmesh-example-models ``` + +For Triton: + +```yaml +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: xgboost-example + annotations: + serving.kserve.io/deploymentMode: ModelMesh +spec: + predictor: + model: + modelFormat: + name: xgboost + runtime: triton-2.x + storage: + key: localMinIO + path: xgboost/mushroom-fil + parameters: + bucket: modelmesh-example-models +``` diff --git a/fvt/inference.go b/fvt/inference.go index eb2ff15e..68286cc8 100644 --- a/fvt/inference.go +++ b/fvt/inference.go @@ -302,6 +302,26 @@ func ExpectSuccessfulInference_lightgbmMushroom(predictorName string) { Expect(math.Round(float64(inferResponse.Outputs[0].Contents.Fp64Contents[0])*10) / 10).To(BeEquivalentTo(0.0)) } +// LightGBM Mushroom via Triton +// COS path: fvt/lightgbm/mushroom-fil +func ExpectSuccessfulInference_lightgbmFILMushroom(predictorName string) { + // build the grpc inference call + inferInput := &inference.ModelInferRequest_InferInputTensor{ + Name: "input__0", + Shape: []int64{1, 126}, + Datatype: "FP32", + Contents: &inference.InferTensorContents{Fp32Contents: mushroomInputData}, + } + inferRequest := &inference.ModelInferRequest{ + ModelName: predictorName, + Inputs: []*inference.ModelInferRequest_InferInputTensor{inferInput}, + } + + inferResponse, err := FVTClientInstance.RunKfsInference(inferRequest) + Expect(err).ToNot(HaveOccurred()) + Expect(inferResponse).ToNot(BeNil()) +} + // XGBoost Mushroom // COS path: fvt/xgboost/mushroom func ExpectSuccessfulInference_xgboostMushroom(predictorName string) { @@ -324,6 +344,26 @@ func ExpectSuccessfulInference_xgboostMushroom(predictorName string) { Expect(math.Round(float64(inferResponse.Outputs[0].Contents.Fp32Contents[0])*10) / 10).To(BeEquivalentTo(0.0)) } +// XGBoost Mushroom via Triton +// COS path: fvt/xgboost/mushroom-fil +func ExpectSuccessfulInference_xgboostFILMushroom(predictorName string) { + // build the grpc inference call + inferInput := &inference.ModelInferRequest_InferInputTensor{ + Name: "input__0", + Shape: []int64{1, 126}, + Datatype: "FP32", + Contents: &inference.InferTensorContents{Fp32Contents: mushroomInputData}, + } + inferRequest := &inference.ModelInferRequest{ + ModelName: predictorName, + Inputs: []*inference.ModelInferRequest_InferInputTensor{inferInput}, + } + + inferResponse, err := FVTClientInstance.RunKfsInference(inferRequest) + Expect(err).ToNot(HaveOccurred()) + Expect(inferResponse).ToNot(BeNil()) +} + // Helpers var mushroomInputData []float32 = []float32{1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0} diff --git a/fvt/predictor/predictor_test.go b/fvt/predictor/predictor_test.go index 5892c436..97cffb4d 100644 --- a/fvt/predictor/predictor_test.go +++ b/fvt/predictor/predictor_test.go @@ -113,6 +113,22 @@ var predictorsArray = []FVTPredictor{ differentPredictorName: "xgboost", differentPredictorFilename: "xgboost-predictor.yaml", }, + { + predictorName: "xgboost-fil", + predictorFilename: "xgboost-fil-predictor.yaml", + currentModelPath: "fvt/xgboost/mushroom-fil", + updatedModelPath: "fvt/xgboost/mushroom-fil-dup", + differentPredictorName: "onnx", + differentPredictorFilename: "onnx-predictor.yaml", + }, + { + predictorName: "lightgbm-fil", + predictorFilename: "lightgbm-fil-predictor.yaml", + currentModelPath: "fvt/lightgbm/mushroom-fil", + updatedModelPath: "fvt/lightgbm/mushroom-fil-dup", + differentPredictorName: "onnx", + differentPredictorFilename: "onnx-predictor.yaml", + }, // TorchServe test is currently disabled // { // predictorName: "pytorch-mar", @@ -731,6 +747,50 @@ var _ = Describe("Predictor", func() { }) }) + var _ = Describe("XGBoost FIL inference", Ordered, func() { + var xgboostPredictorObject *unstructured.Unstructured + var xgboostPredictorName string + + BeforeAll(func() { + // load the test predictor object + xgboostPredictorObject = NewPredictorForFVT("xgboost-fil-predictor.yaml") + xgboostPredictorName = xgboostPredictorObject.GetName() + + CreatePredictorAndWaitAndExpectLoaded(xgboostPredictorObject) + + err := FVTClientInstance.ConnectToModelServing(Insecure) + Expect(err).ToNot(HaveOccurred()) + }) + + AfterAll(func() { + FVTClientInstance.DeletePredictor(xgboostPredictorName) + }) + + It("should successfully run an inference", func() { + ExpectSuccessfulInference_xgboostFILMushroom(xgboostPredictorName) + }) + + It("should fail with invalid shape", func() { + // build the grpc inference call + inferInput := &inference.ModelInferRequest_InferInputTensor{ + Name: "input__0", + Shape: []int64{1, 28777}, + Datatype: "FP32", + Contents: &inference.InferTensorContents{Fp32Contents: []float32{}}, + } + inferRequest := &inference.ModelInferRequest{ + ModelName: xgboostPredictorName, + Inputs: []*inference.ModelInferRequest_InferInputTensor{inferInput}, + } + + inferResponse, err := FVTClientInstance.RunKfsInference(inferRequest) + + Expect(inferResponse).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("unexpected shape for input 'input__0'")) + }) + }) + var _ = Describe("Pytorch inference", Ordered, func() { var ptPredictorObject *unstructured.Unstructured var ptPredictorName string @@ -888,6 +948,50 @@ var _ = Describe("Predictor", func() { Expect(err.Error()).To(ContainSubstring("INTERNAL: builtins.ValueError: cannot reshape array")) }) }) + + var _ = Describe("LightGBM FIL inference", Ordered, func() { + var lightGBMPredictorObject *unstructured.Unstructured + var lightGBMPredictorName string + + BeforeAll(func() { + // load the test predictor object + lightGBMPredictorObject = NewPredictorForFVT("lightgbm-fil-predictor.yaml") + lightGBMPredictorName = lightGBMPredictorObject.GetName() + + CreatePredictorAndWaitAndExpectLoaded(lightGBMPredictorObject) + + err := FVTClientInstance.ConnectToModelServing(Insecure) + Expect(err).ToNot(HaveOccurred()) + }) + + AfterAll(func() { + FVTClientInstance.DeletePredictor(lightGBMPredictorName) + }) + + It("should successfully run an inference", func() { + ExpectSuccessfulInference_lightgbmFILMushroom(lightGBMPredictorName) + }) + + It("should fail with invalid shape input", func() { + // build the grpc inference call + inferInput := &inference.ModelInferRequest_InferInputTensor{ + Name: "input__0", + Shape: []int64{1, 28777}, + Datatype: "FP32", + Contents: &inference.InferTensorContents{Fp32Contents: []float32{}}, + } + inferRequest := &inference.ModelInferRequest{ + ModelName: lightGBMPredictorName, + Inputs: []*inference.ModelInferRequest_InferInputTensor{inferInput}, + } + + inferResponse, err := FVTClientInstance.RunKfsInference(inferRequest) + + Expect(inferResponse).To(BeNil()) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("unexpected shape for input 'input__0'")) + }) + }) }) // These tests verify that an invalid Predictor fails to load. These are in a @@ -948,6 +1052,11 @@ var _ = Describe("Invalid Predictors", func() { // modify the object with an unrecognized model type SetString(predictorObject, "invalidModelType", "spec", "modelType", "name") + // remove runtime field for predictors that have a runtime spec for this test + if CheckIfStringExists(predictorObject, "spec", "runtime", "name") { + SetString(predictorObject, "", "spec", "runtime", "name") + } + obj := CreatePredictorAndWaitAndExpectFailed(predictorObject) By("Verifying the predictor") diff --git a/fvt/testdata/predictors/lightgbm-fil-predictor.yaml b/fvt/testdata/predictors/lightgbm-fil-predictor.yaml new file mode 100644 index 00000000..ea3f5b94 --- /dev/null +++ b/fvt/testdata/predictors/lightgbm-fil-predictor.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: serving.kserve.io/v1alpha1 +kind: Predictor +metadata: + name: lightgbm-fil-predictor +spec: + modelType: + name: lightgbm + runtime: + name: triton-2.x + path: fvt/lightgbm/mushroom-fil + storage: + s3: + secretKey: localMinIO diff --git a/fvt/testdata/predictors/xgboost-fil-predictor.yaml b/fvt/testdata/predictors/xgboost-fil-predictor.yaml new file mode 100644 index 00000000..f7b7855c --- /dev/null +++ b/fvt/testdata/predictors/xgboost-fil-predictor.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 IBM Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: serving.kserve.io/v1alpha1 +kind: Predictor +metadata: + name: xgboost-fil-mushroom +spec: + modelType: + name: xgboost + runtime: + name: triton-2.x + path: fvt/xgboost/mushroom-fil + storage: + s3: + secretKey: localMinIO diff --git a/fvt/utils.go b/fvt/utils.go index 862a8834..6bbb0627 100644 --- a/fvt/utils.go +++ b/fvt/utils.go @@ -111,6 +111,12 @@ func GetString(obj *unstructured.Unstructured, fieldPath ...string) string { return value } +func CheckIfStringExists(obj *unstructured.Unstructured, fieldPath ...string) bool { + _, exists, err := unstructured.NestedString(obj.Object, fieldPath...) + Expect(err).ToNot(HaveOccurred()) + return exists +} + func GetSlice(obj *unstructured.Unstructured, fieldPath ...string) ([]interface{}, bool) { value, exists, err := unstructured.NestedSlice(obj.Object, fieldPath...) Expect(err).ToNot(HaveOccurred())