Skip to content

merge main branch

merge main branch #1

name: Inference
on:
workflow_call:
inputs:
ci_type:
type: string
default: 'pr'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf
cancel-in-progress: true
jobs:
inference:
name: inference test
strategy:
matrix:
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1 ]
isPR:
- ${{inputs.ci_type == 'pr'}}
exclude:
- { isPR: true }
include:
- { model: "gpt-j-6b"}
- { model: "mistral-7b-v0.1"}
- { model: "mpt-7b-bigdl"}
- dtuner_model: nathan0/mpt-7b-deltatuner-model
model: mpt-7b
runs-on: self-hosted
defaults:
run:
shell: bash
container:
image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
env:
http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
volumes:
- /var/run/docker.sock:/var/run/docker.sock
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set Name Prefix
id: "prefix"
run: |
prefix="inference"
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
prefix="${prefix}_bigdl_cpu"
fi
echo "prefix is ${prefix}"
echo "prefix=$prefix" >> $GITHUB_OUTPUT
- name: Build Docker Image
run: |
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
DF_SUFFIX=".bigdl-cpu"
else
DF_SUFFIX=".cpu_and_deepspeed"
fi
PREFIX=${{steps.prefix.outputs.prefix}}
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes
docker image prune -f
- name: Start Docker Container
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
cid=$(docker ps -q --filter "name=${PREFIX}")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
- name: Start Ray Cluster
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
docker exec "${PREFIX}" bash -c "./inference/deep/start-ray-cluster.sh"
- name: Run Inference Test
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml --serve_simple"
else
docker exec "${PREFIX}" bash -c "MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/serve.py --serve_simple"
fi
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
- name: Run Inference Test with Deltatuner
if: ${{ matrix.dtuner_model }}
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml --serve_simple"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
- name: Run Inference Test with DeepSpeed
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then
echo ${{ matrix.model }} is not supported!
else
docker exec "${PREFIX}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
<<<<<<< HEAD

Check failure on line 109 in .github/workflows/workflow_inference.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/workflow_inference.yml

Invalid workflow file

You have an error in your yaml syntax on line 109
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed --serve_simple"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
=======
docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed"
docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
>>>>>>> opensource/main
fi
- name: Run Inference Test with DeepSpeed and Deltatuner
if: ${{ matrix.dtuner_model }}
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then
echo ${{ matrix.model }} is not supported!
else
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --serve_simple"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
fi
- name: Run Inference Test with REST API
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml"
else
docker exec "${PREFIX}" bash -c "MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/serve.py"
fi
docker exec "${PREFIX}" bash -c "MODEL_TO_SERVE=\"${{ matrix.model }}\" python examples/inference/api_server_openai/query_http_requests.py"
- name: Stop Ray
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
cid=$(docker ps -q --filter "name=${PREFIX}")
if [[ ! -z "$cid" ]]; then
docker exec "${PREFIX}" bash -c "ray stop"
fi
- name: Stop Container
if: success() || failure()
run: |
PREFIX=${{steps.prefix.outputs.prefix}}
cid=$(docker ps -q --filter "name=${PREFIX}")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
- name: Test Summary
run: echo "to be continued"