Skip to content

Commit

Permalink
Merge branch 'main' into javiermtorres/issue-492-runtime-resources
Browse files Browse the repository at this point in the history
Signed-off-by: javiermtorres <[email protected]>
  • Loading branch information
javiermtorres authored Feb 7, 2025
2 parents 15a88e5 + 23a85a9 commit 60136d2
Show file tree
Hide file tree
Showing 105 changed files with 7,838 additions and 5,624 deletions.
26 changes: 0 additions & 26 deletions .devcontainer/docker-compose.override.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ services:
- database_volume:/mzai/backend/local.db
ports:
- "5678:5678"
environment:
- MLFLOW_TRACKING_URI
depends_on:
mlflow:
condition: "service_started"
required: false
develop:
watch:
- path: lumigator/backend/
Expand All @@ -39,23 +33,3 @@ services:
- .venv/
- path: lumigator/backend/pyproject.toml
action: rebuild

mlflow:
image: ghcr.io/mlflow/mlflow:v2.0.1
environment:
- MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
- BACKEND_STORE_URI=sqlite:///mlflow.db
- ARTIFACT_ROOT=s3://mlflow`
ports:
- "8001:5000"
depends_on:
minio:
condition: service_healthy
command: mlflow server --backend-store-uri ${BACKEND_STORE_URI} --default-artifact-root ${ARTIFACT_ROOT} --host 0.0.0.0
extra_hosts:
- "localhost:host-gateway"
profiles:
- local
32 changes: 6 additions & 26 deletions .github/workflows/lumigator_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,6 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Download Docker images
if: steps.filter.outputs.push_be == 'true' || contains(github.ref, 'refs/tags/')
run: docker pull ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }}

- name: Login to DockerHub
if: steps.filter.outputs.push_be == 'true' || contains(github.ref, 'refs/tags/')
uses: docker/login-action@v3
Expand All @@ -285,21 +281,15 @@ jobs:

- name: Tag and push Docker image (tagged releases)
if: contains(github.ref, 'refs/tags/')
run: |
docker tag ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }} mzdotai/lumigator:${{ github.ref_name }}
docker push mzdotai/lumigator:${{ github.ref_name }}
run: skopeo copy --all docker://ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }} docker://mzdotai/lumigator:${{ github.ref_name }}

- name: Tag and push Docker image (normal build)
if: steps.filter.outputs.push_be == 'true' && !contains(github.ref, 'refs/tags/')
run: |
docker tag ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }} mzdotai/lumigator:backend_dev_${{ env.GITHUB_SHA_SHORT }}
docker push mzdotai/lumigator:backend_dev_${{ env.GITHUB_SHA_SHORT }}
run: skopeo copy --all docker://ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }} docker://mzdotai/lumigator:backend_dev_${{ env.GITHUB_SHA_SHORT }}

- name: Tag and push Docker image (latest)
if: steps.filter.outputs.push_be == 'true' && github.ref == 'refs/heads/main'
run: |
docker tag ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }} mzdotai/lumigator:latest
docker push mzdotai/lumigator:latest
run: skopeo copy --all docker://ghcr.io/${{ github.repository }}:backend_dev_${{ env.GITHUB_SHA_SHORT }} docker://mzdotai/lumigator:latest

push-frontend-images:
name: Push frontend Docker images
Expand Down Expand Up @@ -331,10 +321,6 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Download Docker images
if: steps.filter.outputs.push_fe == 'true' || contains(github.ref, 'refs/tags/')
run: docker pull ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }}

- name: Login to DockerHub
if: steps.filter.outputs.push_fe == 'true' || contains(github.ref, 'refs/tags/')
uses: docker/login-action@v3
Expand All @@ -344,21 +330,15 @@ jobs:

- name: Tag and push Docker image (tagged releases)
if: contains(github.ref, 'refs/tags/')
run: |
docker tag ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }} mzdotai/lumigator-frontend:${{ github.ref_name }}
docker push mzdotai/lumigator-frontend:${{ github.ref_name }}
run: skopeo copy --all docker://ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }} docker://mzdotai/lumigator-frontend:${{ github.ref_name }}

- name: Tag and push Docker image (normal build)
if: steps.filter.outputs.push_fe == 'true' && !contains(github.ref, 'refs/tags/')
run: |
docker tag ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }} mzdotai/lumigator-frontend:frontend_${{ env.GITHUB_SHA_SHORT }}
docker push mzdotai/lumigator-frontend:frontend_${{ env.GITHUB_SHA_SHORT }}
run: skopeo copy --all docker://ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }} docker://mzdotai/lumigator-frontend:frontend_${{ env.GITHUB_SHA_SHORT }}

- name: Tag and push Docker image (latest)
if: steps.filter.outputs.push_fe == 'true' && github.ref == 'refs/heads/main'
run: |
docker tag ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }} mzdotai/lumigator-frontend:latest
docker push mzdotai/lumigator-frontend:latest
run: skopeo copy --all docker://ghcr.io/${{ github.repository }}:frontend_${{ env.GITHUB_SHA_SHORT }} docker://mzdotai/lumigator-frontend:latest

sdk-packaging:
name: Package SDK
Expand Down
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"python.testing.pytestEnabled": true,
"python.analysis.extraPaths": [
"./lumigator/schemas",
"./lumigator/jobs"
"./lumigator/jobs",
"./lumigator/sdk"
]
}
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ local-up: check-dot-env
RAY_ARCH_SUFFIX=$(RAY_ARCH_SUFFIX) COMPUTE_TYPE=$(COMPUTE_TYPE) docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f ${DEV_DOCKER_COMPOSE_FILE} up --watch --build

local-down:
docker compose --profile local -f $(LOCAL_DOCKERCOMPOSE_FILE) down
docker compose --profile local $(GPU_COMPOSE) -f $(LOCAL_DOCKERCOMPOSE_FILE) -f ${DEV_DOCKER_COMPOSE_FILE} down

local-logs:
docker compose -f $(LOCAL_DOCKERCOMPOSE_FILE) logs
Expand Down Expand Up @@ -181,6 +181,7 @@ test-backend-unit:
RAY_HEAD_NODE_HOST=localhost \
RAY_DASHBOARD_PORT=8265 \
SQLALCHEMY_DATABASE_URL=sqlite:////tmp/local.db \
MLFLOW_TRACKING_URI=http://localhost:8001 \
PYTHONPATH=../jobs:$$PYTHONPATH \
uv run $(DEBUGPY_ARGS) -m pytest -s -o python_files="backend/tests/unit/*/test_*.py backend/tests/unit/test_*.py"

Expand All @@ -191,6 +192,7 @@ test-backend-integration:
RAY_HEAD_NODE_HOST=localhost \
RAY_DASHBOARD_PORT=8265 \
SQLALCHEMY_DATABASE_URL=sqlite:////tmp/local.db \
MLFLOW_TRACKING_URI=http://localhost:8001 \
RAY_WORKER_GPUS="0.0" \
RAY_WORKER_GPUS_FRACTION="0.0" \
INFERENCE_PIP_REQS=../jobs/inference/requirements_cpu.txt \
Expand Down
32 changes: 31 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ services:
- "8265:8265"
- "10001:10001"
# https://docs.ray.io/en/releases-2.30.0/cluster/cli.html#ray-start for more info about the command
# Apparently dead head nodes can be selected unless
# RAY_JOB_ALLOW_DRIVER_ON_WORKER_NODES_ENV_VAR is set
# Dead head nodes appear because of the GCS data being
# persisted in Redis
entrypoint:
- /bin/bash
- -c
Expand All @@ -75,7 +79,7 @@ services:
# a shared dir, permissions need to be setup
# ... || true allows this to fail (-e is set)
sudo chmod -R 777 /tmp/ray_pip_cache/ || true
RAY_REDIS_ADDRESS=redis:6379 ray start --head --dashboard-port=8265 --port=6379 --dashboard-host=0.0.0.0 --ray-client-server-port 10001
RAY_JOB_ALLOW_DRIVER_ON_WORKER_NODES_ENV_VAR=1 RAY_REDIS_ADDRESS=redis:6379 ray start --head --dashboard-port=8265 --port=6379 --dashboard-host=0.0.0.0 --ray-client-server-port 10001
mkdir -p /tmp/ray/session_latest/runtime_resources/pip
rmdir /tmp/ray/session_latest/runtime_resources/pip/ && ln -s /tmp/ray_pip_cache /tmp/ray/session_latest/runtime_resources/pip
sleep infinity
Expand Down Expand Up @@ -113,6 +117,7 @@ services:

backend:
image: mzdotai/lumigator:v0.1.0-alpha
pull_policy: always
build:
context: .
dockerfile: "Dockerfile"
Expand All @@ -126,6 +131,9 @@ services:
ray:
condition: "service_started"
required: false
mlflow:
condition: "service_started"
required: false
ports:
- 8000:8000
environment:
Expand Down Expand Up @@ -154,6 +162,7 @@ services:
- RAY_WORKER_GPUS=$RAY_WORKER_GPUS
- RAY_WORKER_GPUS_FRACTION=$RAY_WORKER_GPUS_FRACTION
- LUMI_API_CORS_ALLOWED_ORIGINS
- MLFLOW_TRACKING_URI
# NOTE: to keep AWS_ENDPOINT_URL as http://localhost:9000 both on the host system
# and inside containers, we map localhost to the host gateway IP.
# This currently works properly, but might be the cause of networking
Expand All @@ -163,6 +172,7 @@ services:
- "localhost:host-gateway"

frontend:
pull_policy: always
image: mzdotai/lumigator-frontend:v0.1.0-alpha
build:
context: .
Expand All @@ -182,6 +192,26 @@ services:
ports:
- 80:80

mlflow:
image: ghcr.io/mlflow/mlflow:v2.0.1
environment:
- MLFLOW_TRACKING_URI=${MLFLOW_TRACKING_URI}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
- BACKEND_STORE_URI=sqlite:///mlflow.db
- ARTIFACT_ROOT=s3://mlflow`
ports:
- "8001:5000"
depends_on:
minio:
condition: service_healthy
command: mlflow server --backend-store-uri ${BACKEND_STORE_URI} --default-artifact-root ${ARTIFACT_ROOT} --host 0.0.0.0
extra_hosts:
- "localhost:host-gateway"
profiles:
- local

volumes:
minio-data:
database_volume:
Expand Down
7 changes: 0 additions & 7 deletions docs/source/conceptual-guides/new-endpoint.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ from fastapi import APIRouter


from backend.api.routes import (
completions,
datasets,
experiments,
health,
Expand All @@ -70,7 +69,6 @@ api_router = APIRouter(prefix=API_V1_PREFIX)
api_router.include_router(health.router, prefix="/health", tags=[Tags.HEALTH])
api_router.include_router(datasets.router, prefix="/datasets", tags=[Tags.DATASETS])
api_router.include_router(experiments.router, prefix="/experiments", tags=[Tags.EXPERIMENTS])
api_router.include_router(completions.router, prefix="/completions", tags=[Tags.COMPLETIONS])
api_router.include_router(tasks.router, prefix="/tasks", tags=[Tags.TASKS]) # NEW
```

Expand All @@ -86,7 +84,6 @@ class Tags(str, Enum):
HEALTH = "health"
DATASETS = "datasets"
EXPERIMENTS = "experiments"
COMPLETIONS = "completions"
TASKS = "tasks" ### NEW


Expand All @@ -103,10 +100,6 @@ TAGS_METADATA = [
"name": Tags.EXPERIMENTS,
"description": "Create and manage evaluation experiments.",
},
{
"name": Tags.COMPLETIONS,
"description": "Access models via external vendor endpoints",
},
# NEW TAGS BELOW
{
"name": Tags.TASKS,
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Hugging Face and local stores or accessed through APIs. It consists of:
operations-guide/kubernetes
operations-guide/alembic
operations-guide/dev
operations-guide/configure-S3

.. toctree::
:maxdepth: 2
Expand Down
12 changes: 1 addition & 11 deletions docs/source/reference/sdk.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ SDK

The Lumigator SDK is a Python library that provides a simple interface to interact with the
Lumigator API. You can use it to create, update, delete, and list datasets, submit and monitor jobs,
download the results, request completions, and more.
download the results, and more.

Lumigator Client
----------------
Expand Down Expand Up @@ -44,16 +44,6 @@ types of jobs: Inference and Evaluation.
:members:
:undoc-members:

Completions
-----------

The `Completions` class provides a simple interface to request completions from external APIs.
Currently, we support two APIs: OpenAI's and Mistral's.

.. automodule:: lumigator_sdk.completions
:members:
:undoc-members:

Base Client
-----------

Expand Down
40 changes: 16 additions & 24 deletions lumigator/backend/backend/api/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@

from backend.db import session_manager
from backend.repositories.datasets import DatasetRepository
from backend.repositories.experiments import ExperimentRepository
from backend.repositories.jobs import JobRepository, JobResultRepository
from backend.services.completions import MistralCompletionService, OpenAICompletionService
from backend.services.datasets import DatasetService
from backend.services.experiments import ExperimentService
from backend.services.jobs import JobService
from backend.services.workflows import WorkflowService
from backend.settings import settings
from backend.tracking import tracking_client_manager


def get_db_session() -> Generator[Session, None, None]:
Expand All @@ -28,6 +27,14 @@ def get_db_session() -> Generator[Session, None, None]:
DBSessionDep = Annotated[Session, Depends(get_db_session)]


def get_tracking_client() -> Generator[Session, None, None]:
with tracking_client_manager.connect() as client:
yield client


TrackingClientDep = Annotated[Session, Depends(get_tracking_client)]


def get_s3_client() -> Generator[S3Client, None, None]:
return boto3.client("s3", endpoint_url=settings.S3_ENDPOINT_URL)

Expand Down Expand Up @@ -64,40 +71,25 @@ def get_job_service(session: DBSessionDep, dataset_service: DatasetServiceDep) -

def get_experiment_service(
session: DBSessionDep,
tracking_client: TrackingClientDep,
job_service: JobServiceDep,
dataset_service: DatasetServiceDep,
) -> ExperimentService:
job_repo = JobRepository(session)
experiment_repo = ExperimentRepository(session)
return ExperimentService(experiment_repo, job_repo, job_service, dataset_service)
return ExperimentService(job_repo, job_service, dataset_service, tracking_client)


ExperimentServiceDep = Annotated[ExperimentService, Depends(get_experiment_service)]


def get_workflow_service(
session: DBSessionDep, job_service: JobServiceDep, dataset_service: DatasetServiceDep
session: DBSessionDep,
tracking_client: TrackingClientDep,
job_service: JobServiceDep,
dataset_service: DatasetServiceDep,
) -> WorkflowService:
job_repo = JobRepository(session)
return WorkflowService(job_repo, job_service, dataset_service)
return WorkflowService(job_repo, job_service, dataset_service, tracking_client=tracking_client)


WorkflowServiceDep = Annotated[WorkflowService, Depends(get_workflow_service)]


def get_mistral_completion_service() -> MistralCompletionService:
return MistralCompletionService()


MistralCompletionServiceDep = Annotated[
MistralCompletionService, Depends(get_mistral_completion_service)
]


def get_openai_completion_service() -> OpenAICompletionService:
return OpenAICompletionService()


OpenAICompletionServiceDep = Annotated[
OpenAICompletionService, Depends(get_openai_completion_service)
]
4 changes: 1 addition & 3 deletions lumigator/backend/backend/api/router.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import APIRouter

from backend.api.routes import completions, datasets, experiments, health, jobs, models, workflows
from backend.api.routes import datasets, experiments, health, jobs, models, workflows
from backend.api.tags import Tags

API_V1_PREFIX = "/api/v1"
Expand All @@ -10,9 +10,7 @@
api_router.include_router(datasets.router, prefix="/datasets", tags=[Tags.DATASETS])
api_router.include_router(jobs.router, prefix="/jobs", tags=[Tags.JOBS])
api_router.include_router(experiments.router, prefix="/experiments", tags=[Tags.EXPERIMENTS])
api_router.include_router(completions.router, prefix="/completions", tags=[Tags.COMPLETIONS])
api_router.include_router(models.router, prefix="/models", tags=[Tags.MODELS])
# TODO: Workflows route is not yet ready so it is excluded from the OpenAPI schema
api_router.include_router(
workflows.router, prefix="/workflows", tags=[Tags.WORKFLOWS], include_in_schema=False
)
Loading

0 comments on commit 60136d2

Please sign in to comment.