From def6db04c1ff1f04a332db00d4a23a84ab809d35 Mon Sep 17 00:00:00 2001 From: interestingLSY Date: Sat, 9 Dec 2023 16:24:00 +0800 Subject: [PATCH] Add support for ray-BERT SUT --- .../app-mlperf-inference-reference/_cm.yaml | 49 +++++++++++++++++++ cm-mlops/script/app-mlperf-inference/_cm.yaml | 9 ++++ .../script/get-generic-python-lib/_cm.json | 38 ++++++++++++++ 3 files changed, 96 insertions(+) diff --git a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml index 3ea35ca5be..c581a3639c 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml @@ -198,6 +198,7 @@ deps: CM_MLPERF_BACKEND: - pytorch - tvm-pytorch + - ray CM_MLPERF_DEVICE: - gpu @@ -220,9 +221,44 @@ deps: CM_MLPERF_BACKEND: - pytorch - tvm-pytorch + - ray CM_MLPERF_DEVICE: - gpu + ## tensorrt + - tags: get,generic-python-lib,_tensorrt + names: + - ml-engine-tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## torch_tensorrt + - tags: get,generic-python-lib,_torch_tensorrt + names: + - ml-engine-torch_tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## Ray + - tags: get,generic-python-lib,_ray + names: + - ray + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## async_timeout (for multi-node) + # NOTE. This is a bug in ray 2.8.0. Ray 2.8.0 needs the pip package + # async_timeout to be installed, so we need to install it manually. + - tags: get,generic-python-lib,_async_timeout + names: + - async_timeout + enable_if_env: + CM_MLPERF_BACKEND: + - ray + ## Transformers - tags: get,generic-python-lib,_transformers names: @@ -560,6 +596,19 @@ variations: pytorch: tags: _rocm + ray: + group: framework + add_deps_recursive: + imagenet-preprocessed: + tags: _NCHW + openimages-preprocessed: + tags: _NCHW + ml-model: + tags: raw,_pytorch + env: + CM_MLPERF_BACKEND: ray + CM_MLPERF_BACKEND_VERSION: <<>> + tf,rocm: add_deps_recursive: tensorflow: diff --git a/cm-mlops/script/app-mlperf-inference/_cm.yaml b/cm-mlops/script/app-mlperf-inference/_cm.yaml index e4d94af271..2447fe4afb 100644 --- a/cm-mlops/script/app-mlperf-inference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference/_cm.yaml @@ -682,6 +682,15 @@ variations: mlperf-inference-implementation: tags: _tvm-tflite + ray: + group: backend + env: + CM_MLPERF_BACKEND: + ray + add_deps_recursive: + mlperf-inference-implementation: + tags: _ray + cpu: group: device diff --git a/cm-mlops/script/get-generic-python-lib/_cm.json b/cm-mlops/script/get-generic-python-lib/_cm.json index 9f9876d355..5185e0ded4 100644 --- a/cm-mlops/script/get-generic-python-lib/_cm.json +++ b/cm-mlops/script/get-generic-python-lib/_cm.json @@ -68,6 +68,14 @@ "CM_APACHE_TVM_VERSION" ] }, + "async_timeout": { + "env": { + "CM_GENERIC_PYTHON_PACKAGE_NAME": "async_timeout" + }, + "new_env_keys": [ + "CM_ASYNC_TIMEOUT_VERSION" + ] + }, "attrs": { "env": { "CM_GENERIC_PYTHON_PACKAGE_NAME": "attrs" @@ -543,6 +551,14 @@ "CM_PYCUDA_VERSION" ] }, + "ray": { + "env": { + "CM_GENERIC_PYTHON_PACKAGE_NAME": "ray[default]" + }, + "new_env_keys": [ + "CM_RAY_VERSION" + ] + }, "requests": { "env": { "CM_GENERIC_PYTHON_PACKAGE_NAME": "requests" @@ -655,6 +671,28 @@ "CM_TOKENIZATION_VERSION" ] }, + "tensorrt": { + "env": { + "CM_GENERIC_PYTHON_PACKAGE_NAME": "tensorrt", + "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/${CM_TORCH_CUDA}", + "CM_TORCH_CUDA": "cu118", + "CM_TORCH_VERSION_EXTRA": "CUDA" + }, + "new_env_keys": [ + "CM_TENSORRT_VERSION" + ] + }, + "torch_tensorrt": { + "env": { + "CM_GENERIC_PYTHON_PACKAGE_NAME": "torch-tensorrt", + "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/${CM_TORCH_CUDA}", + "CM_TORCH_CUDA": "cu118", + "CM_TORCH_VERSION_EXTRA": "CUDA" + }, + "new_env_keys": [ + "CM_TORCH_TENSORRT_VERSION" + ] + }, "six": { "env": { "CM_GENERIC_PYTHON_PACKAGE_NAME": "six"