.github/workflows/workflow_inference.yml

name: Inference

on:
  workflow_call:
    inputs:
      ci_type:
        type: string
        default: 'pr'
      runner_container_image:
        type: string
        default: '10.1.2.13:5000/llmray-build'
      http_proxy:
        type: string
        default: 'http://proxy-prc.intel.com:912'
      https_proxy:
        type: string
        default: 'http://proxy-prc.intel.com:912'
      runner_config_path:
        type: string
        default: '/home/ci/llm-ray-actions-runner'
      code_checkout_path:
        type: string
        default: '/home/ci/actions-runner/_work/llm-on-ray/llm-on-ray'
      model_cache_path:
        type: string
        default: '/mnt/DP_disk1/huggingface/cache'

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf
  cancel-in-progress: true

permissions:  # added using https://github.com/step-security/secure-repo
  contents: read

jobs:
  inference:
    name: inference
    strategy:
      matrix:
        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-ipex-llm, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-no-vllm, deepseek-coder-33b-instruct]
        isPR:
          - ${{inputs.ci_type == 'pr'}}

        exclude:
          - { isPR: true }

        include:
          - { model: "gpt-j-6b"}
          - { model: "mistral-7b-v0.1"}
          - { model: "mpt-7b-ipex-llm"}
          - { model: "llama-2-7b-chat-hf-no-vllm"}

    runs-on: self-hosted

    defaults:
      run:
        shell: bash
    container:
      image: ${{ inputs.runner_container_image }}
      env:
        http_proxy: ${{ inputs.http_proxy }}
        https_proxy: ${{ inputs.https_proxy }}
        SHELL: bash -eo pipefail
      volumes:
        - /var/run/docker.sock:/var/run/docker.sock
        - ${{ inputs.runner_config_path }}:/root/actions-runner-config

    steps:
      - name: Checkout
        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

      - name: Determine Target
        id: "target"
        run: |
          target="inference"
          source dev/scripts/ci-functions.sh
          target="${target}$(get_TARGET_SUFFIX ${{ matrix.model }})"
          echo "target is ${target}"
          echo "target=$target" >> $GITHUB_OUTPUT

      - name: Build Docker Image
        run: |
          TARGET=${{steps.target.outputs.target}}
          USE_PROXY="1"
          source dev/scripts/ci-functions.sh
          DF_SUFFIX="$(get_DF_SUFFIX ${{ matrix.model }})"
          build_and_prune ${TARGET} ${DF_SUFFIX} ${USE_PROXY}

      - name: Start Docker Container
        run: |
          TARGET=${{steps.target.outputs.target}}
          code_checkout_path=${{ inputs.code_checkout_path }}
          model_cache_path=${{ inputs.model_cache_path }}
          USE_PROXY="1"
          source dev/scripts/ci-functions.sh
          start_docker ${TARGET} ${code_checkout_path} ${model_cache_path} ${USE_PROXY}

      - name: Start Ray Cluster
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          start_ray ${TARGET}

      - name: Run Inference Test
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          inference_test ${TARGET} ${{ matrix.model }}

      - name: Run Inference Test with DeepSpeed
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          inference_deepspeed_test ${TARGET} ${{ matrix.model }}

      - name: Run Inference Test with REST API
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          inference_restapi_test ${TARGET} ${{ matrix.model }}

      - name: Run Agent tool Inference Test with REST API
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          inference_agent_restapi_test ${TARGET} ${{ matrix.model }}

      - name: Stop Ray
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          stop_ray ${TARGET}

      - name: Stop Container
        if: success() || failure()
        run: |
          TARGET=${{steps.target.outputs.target}}
          source dev/scripts/ci-functions.sh
          stop_container ${TARGET}