Skip to content

Commit

Permalink
WIP: Butler
Browse files Browse the repository at this point in the history
  • Loading branch information
tcjennings committed Jan 27, 2025
1 parent 4ff6c31 commit f53023c
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 3 deletions.
77 changes: 75 additions & 2 deletions src/lsst/cmservice/common/butler.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,88 @@
"""Utility functions for working with butler commands"""

import os
import re
from collections.abc import Mapping
from functools import partial

from anyio import to_thread
import yaml
from anyio import Path, to_thread
from sqlalchemy.engine import url

from lsst.daf.butler import Butler, MissingCollectionError
from lsst.daf.butler import Butler, ButlerConfig
from lsst.daf.butler._exceptions import MissingCollectionError
from lsst.utils.db_auth import DbAuth

from ..common import errors
from ..config import config


# TODO this should be cached for the lifetime of the service
def parse_butler_repos_from_environment() -> Mapping[str, str]:
"""Parse a set of butler repositories from environment variables.
The environment variables name pairs are ``BUTLER__REPO__N__NAME`` and
``BUTLER__REPO__N__URI`` where ``N`` is a numeric identifier that has no
particular meaning except that the ``N`` should match between declared
pairs of ``NAME`` and ``URI`` variables.
Returns
-------
Mapping[str, str]
"""

var_pattern = re.compile("^BUTLER__REPO__(\\d+)__NAME$")
vars = filter(var_pattern.match, os.environ.keys())

repos: dict[str, str] = {}
for repo_name in vars:
if (repo_uri := os.getenv(repo_name.replace("__NAME", "__URI"))) is not None:
# The following type error can be safely ignored because we are
# confident that the environment variable exists, and will not be
# None, because of the filter operation above.
repos[os.getenv(repo_name)] = repo_uri # type: ignore

return repos


# TODO this should be cached for the lifetime of the service
async def get_butler_config(repo: str, *, without_datastore: bool = False) -> ButlerConfig:
"""Create a butler config object for a repo known to the service's
environment.
"""

try:
repo_uri = parse_butler_repos_from_environment()[repo]
except KeyError:
# No such repo known to the service
# TODO log an error, otherwise let butler try to sort it out
repo_uri = repo

bc = ButlerConfig(
other=repo_uri,
without_datastore=without_datastore,
)
db_url = url.make_url(bc["registry"]["db"])

try:
db_auth_info = yaml.safe_load(await Path(config.butler.authentication_file).read_text())
except FileNotFoundError:
# TODO Log error or warning
# delegate db auth info discovery to normal toolchain
return bc

db_auth = DbAuth(authList=db_auth_info).getAuth(
dialectname=db_url.drivername,
username=db_url.username,
host=db_url.host,
port=db_url.port,
database=db_url.database,
)

bc[".registry.db"] = str(db_url.set(username=db_auth[0], password=db_auth[1]))
return bc


async def remove_run_collections(
butler_repo: str,
collection_name: str,
Expand Down
11 changes: 11 additions & 0 deletions src/lsst/cmservice/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ class ButlerConfiguration(BaseModel):
default="/sdf/group/rubin/shared/data-repos.yaml",
)

authentication_file: str = Field(
description="Path and name of a db-auth.yaml to use with Butler",
default="~/.lsst/db-auth.yaml",
)

access_token: str | None = Field(
description=("Gafaelfawr access token used to authenticate to a Butler server."),
default=None,
)

mock: bool = Field(
description="Whether to mock out Butler calls.",
default=False,
Expand Down Expand Up @@ -177,6 +187,7 @@ class HTCondorConfiguration(BaseModel):
serialization_alias="FS_REMOTE_DIR",
)

# FIXME: unclear if this is at all necessary
dagman_job_append_get_env: bool = Field(
description="...", default=True, serialization_alias="_CONDOR_DAGMAN_MANAGER_JOB_APPEND_GETENV"
)
Expand Down
4 changes: 3 additions & 1 deletion src/lsst/cmservice/handlers/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from lsst.daf.butler import Butler

from ..common.butler import get_butler_config
from ..common.enums import StatusEnum
from ..common.errors import CMMissingScriptInputError, test_type_and_raise
from ..config import config
Expand Down Expand Up @@ -220,9 +221,10 @@ async def split(
if mock_butler:
sorted_field_values = np.arange(10)
else:
butler_config = await get_butler_config(butler_repo, without_datastore=True)
butler_f = partial(
Butler.from_config,
butler_repo,
butler_config,
collections=[input_coll, campaign_input_coll],
without_datastore=True,
)
Expand Down
70 changes: 70 additions & 0 deletions tests/common/test_butler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from typing import Any

import pytest

from lsst.cmservice.common.butler import get_butler_config, parse_butler_repos_from_environment
from lsst.cmservice.config import config


@pytest.fixture()
def mock_butler_environment(tmp_path: Any, monkeypatch: Any) -> None:
repo_mock_path = tmp_path / "repo" / "mock"
repo_mock_path.mkdir(parents=True)
repo_mock_butler_yaml = repo_mock_path / "butler.yaml"

repo_yaml = """---
datastore:
cls: lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore
registry:
db: "sqlite:///:memory:"
"""
repo_mock_butler_yaml.write_text(repo_yaml)

repo_mockgres_butler_yaml = repo_mock_path / "butler+postgres.yaml"

repo_yaml = """---
datastore:
cls: lsst.daf.butler.datastores.inMemoryDatastore.InMemoryDatastore
registry:
db: postgresql://localhost:5432/mockdb
"""
repo_mockgres_butler_yaml.write_text(repo_yaml)

monkeypatch.setenv("BUTLER__REPO__0__NAME", "/repo/mock")
monkeypatch.setenv("BUTLER__REPO__0__URI", f"{repo_mock_path}")
monkeypatch.setenv("BUTLER__REPO__1__NAME", "/repo/mockgres")
monkeypatch.setenv("BUTLER__REPO__1__URI", f"{repo_mockgres_butler_yaml}")
monkeypatch.setenv("BUTLER__REPO__2__NAME", "mockbargo")
monkeypatch.setenv("BUTLER__REPO__2__URI", "s3://bucket/prefix/object.yaml")
monkeypatch.setenv("BUTLER__REPO__3__NAME", "nosuchrepo")


@pytest.fixture
def mock_db_auth_file(tmp_path: Any, monkeypatch: Any) -> None:
mock_auth_path = tmp_path / "db-auth.yaml"
mock_auth_path.write_text("""---
- url: postgresql://localhost:5432/mockdb
username: mocker_mockerson
password: letmein
""")

monkeypatch.setattr(config.butler, "authentication_file", str(mock_auth_path))


def test_parse_butler_config_from_environment(mock_butler_environment: Any) -> None:
repos = parse_butler_repos_from_environment()
assert len(repos.keys()) == 3


@pytest.mark.asyncio
async def test_butler_creation_without_db_auth_file(mock_butler_environment: Any) -> None:
bc = await get_butler_config("/repo/mock", without_datastore=True)
assert bc[".registry.db"] == "sqlite:///:memory:"


@pytest.mark.asyncio
async def test_butler_creation_with_db_auth_file(
mock_butler_environment: Any, mock_db_auth_file: Any
) -> None:
bc = await get_butler_config("/repo/mockgres", without_datastore=True)
assert "mocker_mockerson" in bc[".registry.db"]

0 comments on commit f53023c

Please sign in to comment.