merge main branch #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Inference | ||
on: | ||
workflow_call: | ||
inputs: | ||
ci_type: | ||
type: string | ||
default: 'pr' | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf | ||
cancel-in-progress: true | ||
jobs: | ||
inference: | ||
name: inference test | ||
strategy: | ||
matrix: | ||
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1 ] | ||
isPR: | ||
- ${{inputs.ci_type == 'pr'}} | ||
exclude: | ||
- { isPR: true } | ||
include: | ||
- { model: "gpt-j-6b"} | ||
- { model: "mistral-7b-v0.1"} | ||
- { model: "mpt-7b-bigdl"} | ||
- dtuner_model: nathan0/mpt-7b-deltatuner-model | ||
model: mpt-7b | ||
runs-on: self-hosted | ||
defaults: | ||
run: | ||
shell: bash | ||
container: | ||
image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }} | ||
env: | ||
http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }} | ||
https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }} | ||
volumes: | ||
- /var/run/docker.sock:/var/run/docker.sock | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
- name: Set Name Prefix | ||
id: "prefix" | ||
run: | | ||
prefix="inference" | ||
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then | ||
prefix="${prefix}_bigdl_cpu" | ||
fi | ||
echo "prefix is ${prefix}" | ||
echo "prefix=$prefix" >> $GITHUB_OUTPUT | ||
- name: Build Docker Image | ||
run: | | ||
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then | ||
DF_SUFFIX=".bigdl-cpu" | ||
else | ||
DF_SUFFIX=".cpu_and_deepspeed" | ||
fi | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes | ||
docker image prune -f | ||
- name: Start Docker Container | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
cid=$(docker ps -q --filter "name=${PREFIX}") | ||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi | ||
docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest | ||
- name: Start Ray Cluster | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
docker exec "${PREFIX}" bash -c "./inference/deep/start-ray-cluster.sh" | ||
- name: Run Inference Test | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then | ||
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml --serve_simple" | ||
else | ||
docker exec "${PREFIX}" bash -c "MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/serve.py --serve_simple" | ||
fi | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" | ||
- name: Run Inference Test with Deltatuner | ||
if: ${{ matrix.dtuner_model }} | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml --serve_simple" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" | ||
- name: Run Inference Test with DeepSpeed | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then | ||
echo ${{ matrix.model }} is not supported! | ||
else | ||
docker exec "${PREFIX}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed" | ||
<<<<<<< HEAD | ||
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed --serve_simple" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" | ||
======= | ||
docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed" | ||
docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" | ||
docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" | ||
>>>>>>> opensource/main | ||
fi | ||
- name: Run Inference Test with DeepSpeed and Deltatuner | ||
if: ${{ matrix.dtuner_model }} | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then | ||
echo ${{ matrix.model }} is not supported! | ||
else | ||
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --serve_simple" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" | ||
docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" | ||
fi | ||
- name: Run Inference Test with REST API | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then | ||
docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml" | ||
else | ||
docker exec "${PREFIX}" bash -c "MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/serve.py" | ||
fi | ||
docker exec "${PREFIX}" bash -c "MODEL_TO_SERVE=\"${{ matrix.model }}\" python examples/inference/api_server_openai/query_http_requests.py" | ||
- name: Stop Ray | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
cid=$(docker ps -q --filter "name=${PREFIX}") | ||
if [[ ! -z "$cid" ]]; then | ||
docker exec "${PREFIX}" bash -c "ray stop" | ||
fi | ||
- name: Stop Container | ||
if: success() || failure() | ||
run: | | ||
PREFIX=${{steps.prefix.outputs.prefix}} | ||
cid=$(docker ps -q --filter "name=${PREFIX}") | ||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi | ||
- name: Test Summary | ||
run: echo "to be continued" | ||