Skip to content

Commit

Permalink
Add a new py3-functional-gpu workflow and tox env
Browse files Browse the repository at this point in the history
This runs our functional tests that require a GPU as a separate
workflow item, since they are not the instructlab e2e tests and also
are not the CPU-only unit or functional tests that run in the GitHub runners.

Signed-off-by: Ben Browning <[email protected]>
  • Loading branch information
bbrowning committed Jan 7, 2025
1 parent 8b86c75 commit 6d399f2
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 5 deletions.
157 changes: 157 additions & 0 deletions .github/workflows/functional-gpu-nvidia-t4-x1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# SPDX-License-Identifier: Apache-2.0

name: Functional GPU (NVIDIA Tesla T4 x1)

on:
# temporarily run for this PR for anything that changes this workflow file
pull_request:
paths:
- ".github/workflows/functional-gpu-nvidia-t4-x1.yml" # This workflow
# run against every merge commit to 'main' and release branches
push:
branches:
- main
- release-*
# only run on PRs that touch certain regex paths
pull_request_target:
branches:
- main
- release-*
paths:
# note this should match the merging criteria in 'mergify.yml'
- "**.py"
- "pyproject.toml"
- "requirements**.txt"
- ".github/workflows/functional-gpu-nvidia-t4-x1.yml" # This workflow

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
contents: read

jobs:
start-small-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}

- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.AWS_EC2_AMI }}
ec2-instance-type: g4dn.2xlarge
subnet-id: subnet-02d230cffd9385bd4
security-group-id: sg-06300447c4a5fbef3
iam-role-name: instructlab-ci-runner
aws-resource-tags: >
[
{"Key": "Name", "Value": "instructlab-ci-github-small-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]
functional-gpu-small-test:
needs:
- start-small-ec2-runner
runs-on: ${{ needs.start-small-ec2-runner.outputs.label }}

# It is important that this job has no write permissions and has
# no access to any secrets. This part is where we are running
# untrusted code from PRs.
permissions: {}

steps:
- name: Install Packages
run: |
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
- name: Checkout instructlab/sdg
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: "instructlab/sdg"
path: "sdg"
# https://github.com/actions/checkout/issues/249
fetch-depth: 0

- name: Fetch and checkout PR
if: github.event_name == 'pull_request_target'
working-directory: ./sdg
run: |
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }}
git checkout pr-${{ github.event.pull_request.number }}
- name: Install instructlab/sdg
working-directory: ./instructlab
run: |
export PATH="/home/ec2-user/.local/bin:/usr/local/cuda/bin:$PATH"
python3.11 -m venv --upgrade-deps venv
. venv/bin/activate
nvidia-smi
python3.11 -m pip cache remove llama_cpp_python
CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -r requirements-dev.txt
- name: Check disk before tests
run: |
df -h
- name: Run functional gpu tests with tox
run: |
tox -e py3-functional-gpu
- name: Check disk after tests
run: |
df -h
stop-small-ec2-runner:
needs:
- start-small-ec2-runner
- functional-gpu-small-test
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}

- name: Stop EC2 runner
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-small-ec2-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-small-ec2-runner.outputs.ec2-instance-id }}

functional-gpu-small-workflow-complete:
# we don't want to block PRs on failed EC2 cleanup
# so not requiring "stop-small-ec2-runner" as well
needs: ["start-small-ec2-runner", "functional-gpu-small-test"]
runs-on: ubuntu-latest
steps:
- name: Functional GPU Workflow Complete
run: echo "Functional GPU Workflow Complete"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,5 +105,5 @@ follow_imports = "silent"

[tool.pytest.ini_options]
markers = [
"slow: marks tests that are slow (deselect with '-m \"not slow\"')",
"gpu: marks tests that should run with gpus (deselect with '-m \"not gpu\"')",
]
5 changes: 5 additions & 0 deletions requirements-dev-gpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-License-Identifier: Apache-2.0

-r requirements-dev.txt

llama-cpp-python[server]>=0.3.0,<1.0.0
2 changes: 1 addition & 1 deletion tests/functional/test_full_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def pipelines_package():
return FULL_PIPELINES_PACKAGE


@pytest.mark.slow
@pytest.mark.gpu
class TestFullPipeline(unittest.TestCase):
@pytest.fixture(autouse=True)
def _setup_fixtures(self, knowledge_dataset, knowledge_pipeline):
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_simple_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def pipelines_package():
return SIMPLE_PIPELINES_PACKAGE


@pytest.mark.slow
@pytest.mark.gpu
class TestSimplePipeline(unittest.TestCase):
@pytest.fixture(autouse=True)
def _setup_fixtures(self, knowledge_dataset, knowledge_pipeline):
Expand Down
12 changes: 10 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ wheel_build_env = pkg
deps = -r requirements-dev.txt
commands =
unit: {envpython} -m pytest {posargs:tests --ignore=tests/functional}
unitcov: {envpython} -W error::UserWarning -m pytest --cov=instructlab.sdg --cov-report term --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests --ignore=tests/functional -m "not (examples or slow)"}
functional: {envpython} -m pytest {posargs:tests/functional}
unitcov: {envpython} -W error::UserWarning -m pytest --cov=instructlab.sdg --cov-report term --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests --ignore=tests/functional -m "not (examples or gpu)"}
functional: {envpython} -m pytest {posargs:tests/functional -m "not gpu"}

# format, check, and linting targets don't build and install the project to
# speed up testing.
Expand Down Expand Up @@ -84,6 +84,14 @@ deps = -r requirements-dev.txt
commands =
{envpython} ./scripts/validate_pipelines.py

[testenv:py3-functional-gpu]
description = run functional tests that require a GPU
package = wheel
wheel_build_env = pkg
deps = -r requirements-dev-gpu.txt
commands =
{envpython} -m pytest {posargs:tests/functional -m "gpu"}

[gh]
python =
3.11 = py311-{unitcov, functional}
Expand Down

0 comments on commit 6d399f2

Please sign in to comment.