Fix github action failures (#68)

* Update test-nvidia-mlperf-inference-implementations.yml * Update test-mlperf-inference-mixtral.yml * Fix submission generation github action * Set predeps:False for mlperf-inference-submission-generation * Added version support for submission generation
mlcommons · Dec 22, 2024 · 7bc5f0d · 7bc5f0d
1 parent b051bb1
commit 7bc5f0d
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 6 deletions.
diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml
@@ -80,19 +80,26 @@ jobs:
         fi
         # Dynamically set the log group to simulate a dynamic step name
         echo "::group::$description"
-        cm ${{ matrix.action }} script --tags=generate,inference,submission  --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args
+        cm ${{ matrix.action }} script --tags=generate,inference,submission  --version=v4.1 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args
         exit_status=$?
+        exit $? || echo "STEP_FAILED=true" >> $GITHUB_ENV
         echo "Exit status for the job ${description} ${exit_status}"
         if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then
           # For cases 5 and 6, exit status should be 0 if cm command fails, 1 if it succeeds
           if [[ ${exit_status} -ne 0 ]]; then
-            exit 0
+            echo "STEP_FAILED=false" >> $GITHUB_ENV
           else
-            exit ${exit_status}
+            echo "STEP_FAILED=true" >> $GITHUB_ENV
           fi
         else
           # For other cases, exit with the original status
-          test ${exit_status} -eq 0 || exit ${exit_status}
+          test ${exit_status} -eq 0 || echo "STEP_FAILED=true" >> $GITHUB_ENV
         fi
         echo "::endgroup::"
+    - name: Fail if Step Failed
+      if: env.STEP_FAILED == 'true'
+      continue-on-error: false
+      run: |
+        echo "Manually failing the workflow because the step failed."
+        exit 1
   
diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml
@@ -10,6 +10,7 @@ on:
 jobs:
   build_reference:
     if: github.repository_owner == 'gateoverflow'
+    timeout-minutes: 1440
     runs-on: [ self-hosted, phoenix, linux, x64 ]
     strategy:
       fail-fast: false

diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,11 +2,12 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "08 23 * * *" #to be adjusted
+    - cron: "08 01 * * *" #to be adjusted
 
 jobs:
   run_nvidia:
       if: github.repository_owner == 'gateoverflow'
+      timeout-minutes: 1440
       runs-on:
        - self-hosted
        - linux

diff --git a/automation/script/module.py b/automation/script/module.py
@@ -1635,7 +1635,7 @@ def _run(self, i):
                 'self': self
             }
 
-            # Check if pre-process and detect
+            # Check and run predeps in customize.py
             if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile(
                     path_to_customize_py):  # possible duplicate execution - needs fix
                 r = utils.load_python_module(

diff --git a/script/generate-mlperf-inference-submission/_cm.yaml b/script/generate-mlperf-inference-submission/_cm.yaml
@@ -7,6 +7,7 @@ default_env:
   CM_MLPERF_RUN_STYLE: valid
   CM_MLPERF_SUBMISSION_DIR_SHARED: 'yes'
   CM_RUN_MLPERF_ACCURACY: 'on'
+predeps: False
 deps:
 - names:
   - python
@@ -84,6 +85,7 @@ input_mapping:
   sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA
   tar: CM_TAR_SUBMISSION_DIR
   get_platform_details: CM_GET_PLATFORM_DETAILS
+  version: CM_MLPERF_SUBMISSION_CHECKER_VERSION
 post_deps:
 - enable_if_env:
     CM_RUN_MLPERF_ACCURACY: