Update test-nvidia-mlperf-inference-implementations.yml

mlcommons · Jan 2, 2025 · c7324b3 · c7324b3
1 parent 9a30c18
commit c7324b3
Showing 1 changed file with 14 additions and 3 deletions.
diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "08 01 * * */3" #to be adjusted
+    - cron: "58 10 * * *" #to be adjusted
 
 jobs:
   run_nvidia:
@@ -17,20 +17,31 @@ jobs:
       strategy:
         fail-fast: false
         matrix:
-          system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] 
+          # system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9", "mlc-server" ]
+          system: [ "mlc-server" ] 
           python-version: [ "3.12" ]
           model: [ "resnet50",  "retinanet",  "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ]
           exclude:
            - model: gptj-99.9
 
       steps:
       - name: Test MLPerf Inference NVIDIA ${{ matrix.model }}
+        env:
+          gpu_name: rtx_4090
         run: |
           # Set hw_name based on matrix.system
           if [ "${{ matrix.system }}" = "GO-spr" ]; then
             hw_name="RTX4090x2"
+            gpu_name=rtx_4090
+            docker_string=" --docker"
+          elif [ "${{ matrix.system }}" = "mlc-server" ]; then
+            hw_name="H100x8"
+            gpu_name=h100
+            docker_string=" "
           else
             hw_name="RTX4090x1"
+            gpu_name=rtx_4090
+            docker_string=" --docker"
           fi
 
           if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
@@ -40,6 +51,6 @@ jobs:
           pip install --upgrade cm4mlops
           cm pull repo
           
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string --quiet
 
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name