Skip to content

Commit

Permalink
get it working on perlmutter
Browse files Browse the repository at this point in the history
- adds settings for `CONDA_ENV` and `ENV_FILE_PATH`in script
- removes agent id - we want to be able create multiple agents, depending on how many nodes we request
- updates tests to use .env in tests/
- updates tests to match changes
  • Loading branch information
swelborn committed Feb 1, 2025
1 parent 5356b49 commit b1b0e82
Show file tree
Hide file tree
Showing 14 changed files with 65 additions and 44 deletions.
2 changes: 1 addition & 1 deletion backend/agent/interactem/agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
LOCAL: bool = True
LOCAL: bool = False
DOCKER_COMPATIBILITY_MODE: bool = False
PODMAN_SERVICE_URI: str | None = None
NATS_SERVER_URL: NatsDsn = Field(default="nats://localhost:4222")
Expand Down
4 changes: 3 additions & 1 deletion backend/micro/launcher/.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
NATS_SERVER_URL="nats://localhost:4222"
SFAPI_KEY_PATH="~/.superfacility/key.pem"
SFAPI_KEY_PATH="~/.superfacility/key.pem"
CONDA_ENV="interactem"
ENV_FILE_PATH="/path/to/.env/file"
4 changes: 3 additions & 1 deletion backend/micro/launcher/interactem/launcher/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class Settings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env")
NATS_SERVER_URL: NatsDsn = NatsDsn("nats://localhost:4222")
SFAPI_KEY_PATH: Path = Path("/secrets/sfapi.pem")
CONDA_ENV: Path | str
ENV_FILE_PATH: Path

@model_validator(mode="after")
def resolve_path(self) -> Self:
Expand All @@ -18,4 +20,4 @@ def resolve_path(self) -> Self:
return self


cfg = Settings()
cfg = Settings() # type: ignore
4 changes: 3 additions & 1 deletion backend/micro/launcher/interactem/launcher/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ async def submit(req: Request) -> None:

# Render job script
template = jinja_env.get_template(LAUNCH_AGENT_TEMPLATE)
script = await template.render_async(job=job_req.model_dump())
script = await template.render_async(
job=job_req.model_dump(), settings=cfg.model_dump()
)

try:
job: AsyncJobSqueue = await perlmutter.submit_job(script)
Expand Down
11 changes: 2 additions & 9 deletions backend/micro/launcher/interactem/launcher/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import datetime
import pathlib
from typing import Self
from uuid import UUID

from pydantic import BaseModel, model_validator
from sfapi_client._models import StatusValue
from sfapi_client._models.job_status_response_squeue import JobStatusResponseSqueue
from sfapi_client.compute import Machine


Expand All @@ -23,12 +19,11 @@ class JobSubmitRequest(BaseModel):
qos: str
constraint: str
walltime: datetime.timedelta | str
output: pathlib.Path
agent_id: UUID
reservation: str | None = None
num_nodes: int = 1

@model_validator(mode="after")
def format_walltime(self) -> Self:
def format_walltime(self) -> "JobSubmitRequest":
if isinstance(self.walltime, str):
# Validate the string format HH:MM:SS
parts = self.walltime.split(":")
Expand All @@ -48,6 +43,4 @@ def format_walltime(self) -> Self:


class JobSubmitResponse(BaseModel):
job: JobStatusResponseSqueue
jobid: int
status: StatusValue
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
#SBATCH --qos={{job.qos}}
#SBATCH --constraint={{job.constraint}}
#SBATCH --time={{job.walltime}}
#SBATCH --job-name=agent-{{job.agent_id}}
#SBATCH --account={{job.account}}
#SBATCH --nodes=1
#SBATCH --nodes={{job.num_nodes}}
#SBATCH --exclusive
{%- if job.reservation %}
#SBATCH --reservation={{job.reservation}}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{% include "header.sh.j2" %}

module load python
conda activate interactem
source /path/to/.env/file
interactem-agent
module load conda
conda activate {{settings.CONDA_ENV}}
srun --nodes={{job.num_nodes}} --ntasks-per-node=1 dotenv -f {{settings.ENV_FILE_PATH}} run interactem-agent
24 changes: 21 additions & 3 deletions backend/micro/launcher/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions backend/micro/launcher/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jinja2 = "^3.1.5"
[tool.poetry.group.dev.dependencies]
pytest = "^8.3.4"
pytest-asyncio = "^0.25.3"
python-dotenv = { version = "^1.0.1", extras = ["cli"] }

[build-system]
requires = ["poetry-core"]
Expand Down
4 changes: 4 additions & 0 deletions backend/micro/launcher/scripts/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

THIS_DIR=$(dirname $0)
dotenv -f $THIS_DIR/../tests/.env run pytest -s -vv ./tests
4 changes: 4 additions & 0 deletions backend/micro/launcher/tests/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
NATS_SERVER_URL="nats://localhost:4222"
SFAPI_KEY_PATH="~/.superfacility/key.pem"
CONDA_ENV="interactem"
ENV_FILE_PATH="/path/to/.env/file"
8 changes: 3 additions & 5 deletions backend/micro/launcher/tests/expected_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@
#SBATCH --qos=normal
#SBATCH --constraint=gpu
#SBATCH --time=01:30:00
#SBATCH --job-name=agent-5e0adf32-4181-4cb1-921c-e1b6ae986176
#SBATCH --account=test_account
#SBATCH --nodes=1
#SBATCH --nodes=2
#SBATCH --exclusive

module load python
module load conda
conda activate interactem
source /path/to/.env/file
interactem-agent
srun --nodes=2 --ntasks-per-node=1 dotenv -f /path/to/.env/file run interactem-agent
30 changes: 15 additions & 15 deletions backend/micro/launcher/tests/test_rendering.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import pathlib
from datetime import timedelta
from pathlib import Path

import pytest
from jinja2 import Environment, PackageLoader
from sfapi_client.compute import Machine

from interactem.launcher.config import cfg
from interactem.launcher.constants import LAUNCH_AGENT_TEMPLATE
from interactem.launcher.models import JobSubmitRequest

HERE = pathlib.Path(__file__).parent
HERE = Path(__file__).parent


@pytest.fixture
Expand All @@ -19,24 +20,23 @@ def expected_script() -> str:

@pytest.mark.asyncio
async def test_submit_rendering(expected_script: str):
job_request = {
"machine": Machine.perlmutter,
"account": "test_account",
"qos": "normal",
"constraint": "gpu",
"walltime": timedelta(hours=1, minutes=30),
"output": pathlib.Path("/path/to/output"),
"agent_id": "5e0adf32-4181-4cb1-921c-e1b6ae986176",
"reservation": None,
}

job_req = JobSubmitRequest(**job_request)
job_req = JobSubmitRequest(
machine=Machine.perlmutter,
account="test_account",
qos="normal",
constraint="gpu",
walltime=timedelta(hours=1, minutes=30),
reservation=None,
num_nodes=2,
)

jinja_env = Environment(
loader=PackageLoader("interactem.launcher"), enable_async=True
)
template = jinja_env.get_template(LAUNCH_AGENT_TEMPLATE)

script = await template.render_async(job=job_req.model_dump())
script = await template.render_async(
job=job_req.model_dump(), settings=cfg.model_dump()
)

assert script == expected_script
3 changes: 1 addition & 2 deletions backend/sfapi_models/interactem/sfapi_models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import datetime
import pathlib
from typing import Self
from uuid import UUID

from pydantic import BaseModel, model_validator
from sfapi_client._models import StatusValue
Expand All @@ -24,8 +23,8 @@ class JobSubmitRequest(BaseModel):
constraint: str
walltime: datetime.timedelta | str
output: pathlib.Path
agent_id: UUID
reservation: str | None = None
num_nodes: int = 1

@model_validator(mode="after")
def format_walltime(self) -> Self:
Expand Down

0 comments on commit b1b0e82

Please sign in to comment.