triton-inference-server · hcho3 · Oct 9, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/conda/environments/rapids_triton_dev.yml b/conda/environments/rapids_triton_dev.yml
@@ -6,4 +6,4 @@ dependencies:
   - ccache
   - cmake>=3.26.4,!=3.30.0
   - ninja
-  - rapidjson>=1.1.0,<1.1.0.post*
+  - rapidjson
diff --git a/conda/environments/triton_benchmark.yml b/conda/environments/triton_benchmark.yml
@@ -5,14 +5,14 @@ channels:
   - rapidsai
 dependencies:
   - cuda-version=11.8
-  - cudf=23.12
-  - libcusolver<=11.4.1.48
-  - libcusparse<=12.0
+  - cudf=24.08
+  - libcusolver
+  - libcusparse
   - matplotlib
   - pip
   - python
   - scipy
   - pip:
       - tritonclient[all]
-      - protobuf==3.20.1
-      - git+https://github.com/rapidsai/rapids-triton.git@branch-23.12#subdirectory=python
+      - protobuf
+      - git+https://github.com/rapidsai/rapids-triton.git@branch-24.04#subdirectory=python
diff --git a/conda/environments/triton_test.yml b/conda/environments/triton_test.yml
@@ -7,19 +7,19 @@ dependencies:
   - aws-sdk-cpp
   - clang-tools=11.1.0
   - cuda-version=11.8
-  - cudf=23.12
-  - cuml=23.12
+  - cudf=24.08
+  - cuml=24.08
   - flake8
-  - hypothesis<6.46.8
+  - hypothesis
   - lightgbm
   - matplotlib
   - pip
   - pytest
   - python
-  - rapidsai::xgboost>=1.7
-  - scikit-learn=1.2.0
+  - rapidsai::xgboost>=2.1
+  - scikit-learn>=1.5
   - treelite
   - pip:
       - tritonclient[all]
-      - protobuf==3.20.1
-      - git+https://github.com/rapidsai/rapids-triton.git@branch-23.12#subdirectory=python
+      - protobuf
+      - git+https://github.com/rapidsai/rapids-triton.git@branch-24.04#subdirectory=python
diff --git a/conda/environments/triton_test_no_client.yml b/conda/environments/triton_test_no_client.yml
@@ -7,15 +7,15 @@ dependencies:
   - aws-sdk-cpp
   - clang-tools=11.1.0
   - cuda-version=11.8
-  - cudf=23.12
-  - cuml=23.12
+  - cudf=24.08
+  - cuml=24.08
   - flake8
-  - hypothesis<6.46.8
+  - hypothesis
   - lightgbm
   - pip
   - pytest
   - python
   - python-rapidjson
-  - rapidsai::xgboost>=1.7
-  - scikit-learn=1.2.0
+  - rapidsai::xgboost>=2.1
+  - scikit-learn>=1.5
   - treelite
diff --git a/docs/model_config.md b/docs/model_config.md
@@ -70,7 +70,7 @@ instance_group [{ kind: KIND_AUTO }]
 parameters [
   {
     key: "model_type"
-    value: { string_value: "xgboost_json" }
+    value: { string_value: "xgboost_ubj" }
   },
   {
     key: "output_class"
@@ -185,23 +185,25 @@ Treelite's checkpoint format. For more information, see [Model
 Support](model_support.md).
 
 The `model_type` option is used to indicate which of these serialization
-formats your model uses: `xgboost` for XGBoost binary, `xgboost_json` for
-XGBoost JSON, `lightgbm` for LightGBM, or `treelite_checkpoint` for
-Treelite:
+formats your model uses: `xgboost_ubj` for XGBoost UBJSON [^1], `xgboost_json` for
+XGBoost JSON, `xgboost` for XGBoost binary (legacy), `lightgbm` for LightGBM,
+or `treelite_checkpoint` for Treelite:
 
 ```
 parameters [
   {
     key: "model_type"
-    value: { string_value: "xgboost_json" }
+    value: { string_value: "xgboost_ubj" }
   }
 ]
 ```
+[^1] Default format in XGBoost 2.1+
 
 #### Model Filenames
 For each model type, Triton expects a particular default filename:
-- `xgboost.model` for XGBoost Binary
+- `xgboost.ubj` for XGBoost UBJSON [^1]
 - `xgboost.json` for XGBoost JSON
+- `xgboost.model` for XGBoost Binary (Legacy)
 - `model.txt` for LightGBM
 - `checkpoint.tl` for Treelite
 It is recommended that you use these filenames, but custom filenames can be

diff --git a/ops/Dockerfile b/ops/Dockerfile
@@ -3,7 +3,7 @@
 # Arguments for controlling build details
 ###########################################################################################
 # Version of Triton to use
-ARG TRITON_VERSION=24.08
+ARG TRITON_VERSION=24.09
 # Base container image
 ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
 # Whether or not to enable GPU build

diff --git a/qa/L0_e2e/generate_example_model.py b/qa/L0_e2e/generate_example_model.py
@@ -307,13 +307,19 @@ def generate_model(
 
 def serialize_model(model, directory, output_format="xgboost"):
     if output_format == "xgboost":
-        model_path = os.path.join(directory, "xgboost.model")
+        model_path = os.path.join(directory, "xgboost.deprecated")
         model.save_model(model_path)
-        return model_path
+        new_model_path = os.path.join(directory, "xgboost.model")
+        os.rename(model_path, new_model_path)
+        return new_model_path
     if output_format == "xgboost_json":
         model_path = os.path.join(directory, "xgboost.json")
         model.save_model(model_path)
         return model_path
+    if output_format == "xgboost_ubj":
+        model_path = os.path.join(directory, "xgboost.ubj")
+        model.save_model(model_path)
+        return model_path
     if output_format == "lightgbm":
         model_path = os.path.join(directory, "model.txt")
         model.save_model(model_path)

diff --git a/qa/generate_example_models.sh b/qa/generate_example_models.sh
@@ -56,6 +56,19 @@ then
   models+=( $name )
 fi
 
+name=xgboost_ubj
+if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
+then
+  ${GENERATOR_SCRIPT} \
+    --name $name \
+    --format xgboost_ubj \
+    --depth 7 \
+    --trees 500 \
+    --features 500 \
+    --predict_proba
+  models+=( $name )
+fi
+
 name=xgboost_shap
 if [ $RETRAIN -ne 0 ] || [ ! -d "${MODEL_REPO}/${name}" ]
 then

diff --git a/src/model.h b/src/model.h
@@ -151,6 +151,9 @@ struct RapidsModel : rapids::Model<RapidsSharedState> {
         case SerializationFormat::xgboost_json:
           path /= "xgboost.json";
           break;
+        case SerializationFormat::xgboost_ubj:
+          path /= "xgboost.ubj";
+          break;
         case SerializationFormat::lightgbm:
           path /= "model.txt";
           break;

diff --git a/src/serialization.h b/src/serialization.h
@@ -23,7 +23,13 @@
 
 namespace triton { namespace backend { namespace NAMESPACE {
 
-enum struct SerializationFormat { xgboost, xgboost_json, lightgbm, treelite };
+enum struct SerializationFormat {
+  xgboost,
+  xgboost_json,
+  xgboost_ubj,
+  lightgbm,
+  treelite
+};
 
 inline auto
 string_to_serialization(std::string const& type_string)
@@ -34,6 +40,8 @@ string_to_serialization(std::string const& type_string)
     result = SerializationFormat::xgboost;
   } else if (type_string == "xgboost_json") {
     result = SerializationFormat::xgboost_json;
+  } else if (type_string == "xgboost_ubj") {
+    result = SerializationFormat::xgboost_ubj;
   } else if (type_string == "lightgbm") {
     result = SerializationFormat::lightgbm;
   } else if (type_string == "treelite_checkpoint") {
@@ -60,6 +68,9 @@ serialization_to_string(SerializationFormat format)
     case SerializationFormat::xgboost_json:
       result = "xgboost_json";
       break;
+    case SerializationFormat::xgboost_ubj:
+      result = "xgboost_ubj";
+      break;
     case SerializationFormat::lightgbm:
       result = "lightgbm";
       break;

diff --git a/src/tl_utils.h b/src/tl_utils.h
@@ -52,6 +52,14 @@ load_tl_base_model(
             model_file, config_str);
         break;
       }
+      case SerializationFormat::xgboost_ubj: {
+        auto config_str =
+            std::string("{\"allow_unknown_field\": ") +
+            std::string(xgboost_allow_unknown_field ? "true" : "false") + "}";
+        result = treelite::model_loader::LoadXGBoostModelUBJSON(
+            model_file, config_str);
+        break;
+      }
       case SerializationFormat::lightgbm:
         result = treelite::model_loader::LoadLightGBMModel(model_file);
         break;