feat(batch-jobs): Add step to pass API server env vars to spark jobs #2650
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Merlin CI Workflow | |
on: | |
push: | |
branches: | |
- main | |
tags: | |
- v* | |
pull_request: | |
env: | |
ARTIFACT_RETENTION_DAYS: 7 | |
DOCKER_BUILDKIT: 1 | |
DOCKER_REGISTRY: ghcr.io | |
GO_VERSION: "1.22" | |
jobs: | |
create-version: | |
runs-on: ubuntu-latest | |
outputs: | |
version: ${{ steps.create_version.outputs.version }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- id: create_version | |
name: Create version string | |
run: | | |
# Strip git ref prefix from version | |
VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') | |
[ "$VERSION" == "main" ] && VERSION=$(git describe --tags --always --first-parent) | |
# Strip "v" prefix | |
[[ "${VERSION}" == "v"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') | |
# If it's pull request the version string is prefixed by 0.0.0- | |
[ ${{ github.event_name}} == "pull_request" ] && VERSION="0.0.0-${{ github.event.pull_request.head.sha }}" | |
echo "${VERSION}" | |
echo "version=${VERSION}" >> "$GITHUB_OUTPUT" | |
test-batch-predictor: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ["3.8", "3.9", "3.10"] | |
env: | |
PIPENV_DEFAULT_PYTHON_VERSION: ${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Google Cloud SDK | |
uses: google-github-actions/setup-gcloud@v2 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip-${{ matrix.python-version }}- | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.local/share/virtualenvs | |
key: ${{ runner.os }}-python-${{ matrix.python-version }}-pipenv-batch-predictor | |
- name: Install dependencies | |
working-directory: ./python/batch-predictor | |
run: | | |
pip install pipenv==2023.7.23 | |
make setup | |
- name: Run batch-predictor test | |
working-directory: ./python/batch-predictor | |
run: make unit-test | |
test-pyfunc-server: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ["3.8", "3.9", "3.10"] | |
env: | |
PIPENV_DEFAULT_PYTHON_VERSION: ${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip-${{ matrix.python-version }}- | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.local/share/virtualenvs | |
key: ${{ runner.os }}-python-${{ matrix.python-version }}-pipenv-pyfunc-server | |
- name: Install dependencies | |
working-directory: ./python/pyfunc-server | |
run: | | |
pip install pipenv==2023.7.23 | |
make setup | |
- name: Run pyfunc-server test | |
working-directory: ./python/pyfunc-server | |
run: make test | |
test-python-sdk: | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ["3.8", "3.9", "3.10"] | |
env: | |
PIPENV_DEFAULT_PYTHON_VERSION: ${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip-${{ matrix.python-version }}- | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.local/share/virtualenvs | |
key: ${{ runner.os }}-python-${{ matrix.python-version }}-pipenv-python-sdk | |
- name: Install dependencies | |
working-directory: ./python/sdk | |
run: | | |
pip install pipenv==2023.7.23 | |
make setup | |
- name: Unit test Python SDK | |
env: | |
E2E_USE_GOOGLE_OAUTH: false | |
working-directory: ./python/sdk | |
run: make unit-test | |
lint-api: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: api/go.sum | |
- name: golangci-lint | |
uses: golangci/golangci-lint-action@v6 | |
with: | |
# Ensure the same version as the one defined in Makefile | |
version: v1.58.1 | |
working-directory: api | |
test-api: | |
runs-on: ubuntu-latest | |
services: | |
postgres: | |
image: postgres:12.4 | |
env: | |
POSTGRES_DB: ${{ secrets.DB_NAME }} | |
POSTGRES_USER: ${{ secrets.DB_USERNAME }} | |
POSTGRES_PASSWORD: ${{ secrets.DB_PASSWORD }} | |
ports: | |
- 5432:5432 | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: api/go.sum | |
- name: Install dependencies | |
run: | | |
make setup | |
make init-dep-api | |
- name: Test API files | |
env: | |
POSTGRES_HOST: localhost | |
POSTGRES_DB: ${{ secrets.DB_NAME }} | |
POSTGRES_USER: ${{ secrets.DB_USERNAME }} | |
POSTGRES_PASSWORD: ${{ secrets.DB_PASSWORD }} | |
run: make it-test-api-ci | |
test-observation-publisher: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
id: setup-python | |
with: | |
python-version: "3.10" | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip-${{ steps.setup-python-outputs.python-version }}- | |
- name: Install dependencies | |
working-directory: ./python/observation-publisher | |
run: | | |
make setup | |
- name: Unit test observation publisher | |
working-directory: ./python/observation-publisher | |
run: make test | |
build-ui: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-node@v4 | |
with: | |
node-version: 20 | |
cache: yarn | |
cache-dependency-path: ui/yarn.lock | |
- name: Install dependencies | |
run: make init-dep-ui | |
- name: Lint UI files | |
run: make lint-ui | |
- name: Test UI files | |
run: make test-ui | |
- name: Build UI static files | |
run: make build-ui | |
- name: Publish UI Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin-ui-dist | |
path: ui/build/ | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-api: | |
runs-on: ubuntu-latest | |
services: | |
postgres: | |
image: postgres:12.4 | |
env: | |
POSTGRES_DB: ${{ secrets.DB_NAME }} | |
POSTGRES_USER: ${{ secrets.DB_USERNAME }} | |
POSTGRES_PASSWORD: ${{ secrets.DB_PASSWORD }} | |
ports: | |
- 5432:5432 | |
needs: | |
- create-version | |
- build-ui | |
- test-api | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Download UI Dist | |
uses: actions/download-artifact@v4 | |
with: | |
name: merlin-ui-dist | |
path: ui/build | |
- name: Build API Docker | |
run: docker build -t merlin:${{ needs.create-version.outputs.version }} -f Dockerfile . | |
- name: Save API Docker | |
run: docker image save --output merlin.${{ needs.create-version.outputs.version }}.tar merlin:${{ needs.create-version.outputs.version }} | |
- name: Publish API Docker Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin.${{ needs.create-version.outputs.version }}.tar | |
path: merlin.${{ needs.create-version.outputs.version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-batch-predictor-base-image: | |
runs-on: ubuntu-latest | |
needs: | |
- create-version | |
- test-batch-predictor | |
env: | |
DOCKER_IMAGE_TAG: "ghcr.io/${{ github.repository }}/merlin-pyspark-base:${{ needs.create-version.outputs.version }}" | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Build Batch Predictor Base Docker | |
run: docker build -t ${{ env.DOCKER_IMAGE_TAG }} -f python/batch-predictor/docker/base.Dockerfile python | |
- name: Test Build Batch Predictor Docker | |
run: | | |
docker build -t "test-batch-predictor:1" \ | |
-f python/batch-predictor/docker/local-app.Dockerfile \ | |
--build-arg BASE_IMAGE=${{ env.DOCKER_IMAGE_TAG }} \ | |
--build-arg MODEL_DEPENDENCIES_URL=batch-predictor/test-model/conda.yaml \ | |
--build-arg MODEL_ARTIFACTS_URL=batch-predictor/test-model \ | |
python | |
- name: Save Batch Predictor Base Docker | |
run: docker image save --output merlin-pyspark-base.${{ needs.create-version.outputs.version }}.tar ${{ env.DOCKER_IMAGE_TAG }} | |
- name: Publish Batch Predictor Base Docker Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin-pyspark-base.${{ needs.create-version.outputs.version }}.tar | |
path: merlin-pyspark-base.${{ needs.create-version.outputs.version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-pyfunc-server-base-image: | |
runs-on: ubuntu-latest | |
needs: | |
- create-version | |
- test-pyfunc-server | |
env: | |
DOCKER_REGISTRY: ghcr.io | |
DOCKER_IMAGE_TAG: "ghcr.io/${{ github.repository }}/merlin-pyfunc-base:${{ needs.create-version.outputs.version }}" | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Build Pyfunc Server Base Docker | |
run: docker build -t ${{ env.DOCKER_IMAGE_TAG }} -f python/pyfunc-server/docker/base.Dockerfile python | |
- name: Test Build Pyfunc Server Docker | |
run: | | |
docker build -t "test-pyfunc-server:1" \ | |
-f python/pyfunc-server/docker/local.Dockerfile \ | |
--build-arg BASE_IMAGE=${{ env.DOCKER_IMAGE_TAG }} \ | |
--build-arg MODEL_DEPENDENCIES_URL=pyfunc-server/test/local-artifacts/conda.yaml \ | |
--build-arg MODEL_ARTIFACTS_URL=pyfunc-server/test/local-artifacts \ | |
python | |
- name: Save Pyfunc Server Base Docker | |
run: docker image save --output merlin-pyfunc-base.${{ needs.create-version.outputs.version }}.tar ${{ env.DOCKER_IMAGE_TAG }} | |
- name: Publish Pyfunc Server Base Docker Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin-pyfunc-base.${{ needs.create-version.outputs.version }}.tar | |
path: merlin-pyfunc-base.${{ needs.create-version.outputs.version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-transformer: | |
runs-on: ubuntu-latest | |
needs: | |
- create-version | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: api/go.sum | |
- name: Install dependencies | |
run: make init-dep-api | |
- name: Build Standard Transformer | |
run: make build-transformer | |
- name: Build Standard Transformer Docker | |
run: docker build -t merlin-transformer:${{ needs.create-version.outputs.version }} -f transformer.Dockerfile . | |
- name: Save Standard Transformer Docker | |
run: docker image save --output merlin-transformer.${{ needs.create-version.outputs.version }}.tar merlin-transformer:${{ needs.create-version.outputs.version }} | |
- name: Publish Standard Transformer Docker Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin-transformer.${{ needs.create-version.outputs.version }}.tar | |
path: merlin-transformer.${{ needs.create-version.outputs.version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-inference-logger: | |
runs-on: ubuntu-latest | |
needs: | |
- create-version | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-go@v5 | |
with: | |
go-version: ${{ env.GO_VERSION }} | |
cache-dependency-path: api/go.sum | |
- name: Build Inference Logger | |
run: make build-inference-logger | |
- name: Build Inference Logger Docker | |
run: docker build -t merlin-logger:${{ needs.create-version.outputs.version }} -f inference-logger.Dockerfile . | |
- name: Save Inference Logger Docker | |
run: docker image save --output merlin-logger.${{ needs.create-version.outputs.version }}.tar merlin-logger:${{ needs.create-version.outputs.version }} | |
- name: Publish Inference Logger Docker Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin-logger.${{ needs.create-version.outputs.version }}.tar | |
path: merlin-logger.${{ needs.create-version.outputs.version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
build-observation-publisher: | |
runs-on: ubuntu-latest | |
needs: | |
- create-version | |
- test-observation-publisher | |
env: | |
DOCKER_REGISTRY: ghcr.io | |
DOCKER_IMAGE_TAG: "ghcr.io/${{ github.repository }}/merlin-observation-publisher:${{ needs.create-version.outputs.version }}" | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Build Observation Publisher Docker | |
env: | |
OBSERVATION_PUBLISHER_IMAGE_TAG: ${{ env.DOCKER_IMAGE_TAG }} | |
run: make observation-publisher | |
working-directory: ./python | |
- name: Save Observation Publisher Docker | |
run: docker image save --output merlin-observation-publisher.${{ needs.create-version.outputs.version }}.tar ${{ env.DOCKER_IMAGE_TAG }} | |
- name: Publish Observation Publisher Docker Artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: merlin-observation-publisher.${{ needs.create-version.outputs.version }}.tar | |
path: merlin-observation-publisher.${{ needs.create-version.outputs.version }}.tar | |
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }} | |
e2e-test: | |
runs-on: ubuntu-latest | |
needs: | |
- build-api | |
- build-transformer | |
- create-version | |
env: | |
K3D_CLUSTER: merlin-cluster | |
LOCAL_REGISTRY_PORT: 12345 | |
LOCAL_REGISTRY: "dev.localhost" | |
DOCKER_REGISTRY: "dev.localhost:12345" | |
INGRESS_HOST: "127.0.0.1.nip.io" | |
MERLIN_CHART_VERSION: 0.13.18 | |
E2E_PYTHON_VERSION: "3.10.6" | |
K3S_VERSION: v1.26.7-k3s1 | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
path: merlin | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.E2E_PYTHON_VERSION }} | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-3.10-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip-3.10- | |
- uses: actions/cache@v4 | |
with: | |
path: ~/.local/share/virtualenvs | |
key: ${{ runner.os }}-python-3.10-pipenv-python-sdk | |
- name: Download k3d | |
run: | | |
curl --silent --fail https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | TAG=v5.6.0 bash | |
- name: Create Test Cluster | |
run: | | |
k3d registry create $LOCAL_REGISTRY --port $LOCAL_REGISTRY_PORT | |
k3d cluster create $K3D_CLUSTER --image rancher/k3s:${K3S_VERSION} --port 80:80@loadbalancer \ | |
--k3s-arg '--disable=traefik,metrics-server@server:*' \ | |
--k3s-arg '--kubelet-arg=eviction-hard=imagefs.available<0.5%,nodefs.available<0.5%@server:0' \ | |
--k3s-arg '--kubelet-arg=eviction-minimum-reclaim=imagefs.available=0.5%,nodefs.available=0.5%@server:0' \ | |
--k3s-arg '--kubelet-arg=eviction-hard=imagefs.available<0.5%,nodefs.available<0.5%@agent:*' \ | |
--k3s-arg '--kubelet-arg=eviction-minimum-reclaim=imagefs.available=0.5%,nodefs.available=0.5%@agent:*' | |
- name: Setup cluster | |
working-directory: merlin/scripts/e2e | |
run: ./setup-cluster.sh merlin-cluster | |
- name: Download API Docker Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
name: merlin.${{ needs.create-version.outputs.version }}.tar | |
- name: Download Standard Transformer Docker Artifact | |
uses: actions/download-artifact@v4 | |
with: | |
name: merlin-transformer.${{ needs.create-version.outputs.version }}.tar | |
- name: Publish images to k3d registry | |
run: | | |
# Merlin API | |
docker image load --input merlin.${{ needs.create-version.outputs.version }}.tar | |
docker tag merlin:${{ needs.create-version.outputs.version }} ${{ env.LOCAL_REGISTRY }}:${{ env.LOCAL_REGISTRY_PORT }}/merlin:${{ needs.create-version.outputs.version }} | |
k3d image import ${{ env.LOCAL_REGISTRY }}:${{ env.LOCAL_REGISTRY_PORT }}/merlin:${{ needs.create-version.outputs.version }} -c merlin-cluster | |
# Standard Transformer | |
docker image load --input merlin-transformer.${{ needs.create-version.outputs.version }}.tar | |
docker tag merlin-transformer:${{ needs.create-version.outputs.version }} ${{ env.LOCAL_REGISTRY }}:${{ env.LOCAL_REGISTRY_PORT }}/merlin-transformer:${{ needs.create-version.outputs.version }} | |
k3d image import ${{ env.LOCAL_REGISTRY }}:${{ env.LOCAL_REGISTRY_PORT }}/merlin-transformer:${{ needs.create-version.outputs.version }} -c merlin-cluster | |
- name: Deploy merlin and mlp | |
working-directory: merlin/scripts/e2e | |
run: ./deploy-merlin.sh ${{ env.INGRESS_HOST }} ${{ env.LOCAL_REGISTRY }}:${{ env.LOCAL_REGISTRY_PORT }} ${{ needs.create-version.outputs.version }} ${{ github.ref }} ${{ env.MERLIN_CHART_VERSION }} | |
- name: Prune docker system to make some space | |
run: docker system prune --all --force | |
# This step is needed to free up around 24GiB of storage on the GitHub runner to allow the e2e tests to pass | |
- name: Free Disk Space | |
uses: jlumbroso/free-disk-space@main | |
with: | |
# this will remove tools that are actually needed (like Python, which is needed), | |
# if set to "true" but frees about 6 GB | |
tool-cache: false | |
- name: Print space consumption | |
run: sudo df -h | |
- name: Run E2E Test | |
timeout-minutes: 30 | |
id: run-e2e-test | |
working-directory: merlin/scripts/e2e | |
run: ./run-e2e.sh ${{ env.INGRESS_HOST }} ${{ env.E2E_PYTHON_VERSION }} | |
- name: "Debug" | |
if: always() | |
continue-on-error: true | |
working-directory: merlin/scripts/e2e | |
run: ./debug-e2e.sh | |
- name: "Print post-e2e test space consumption" | |
if: always() | |
continue-on-error: true | |
working-directory: merlin/scripts/e2e | |
run: sudo df -h | |
release: | |
uses: ./.github/workflows/release.yml | |
needs: | |
- create-version | |
- build-api | |
- build-batch-predictor-base-image | |
- build-pyfunc-server-base-image | |
- build-observation-publisher | |
- test-python-sdk | |
- e2e-test | |
with: | |
version: ${{ needs.create-version.outputs.version }} | |
secrets: | |
pypi_username: ${{ secrets.PYPI_USERNAME }} | |
pypi_password: ${{ secrets.PYPI_PASSWORD }} | |
ghcr_token: ${{ secrets.GITHUB_TOKEN }} |