From 9cd16c7bf00062fa789c470327456d8876b380ea Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Fri, 6 Sep 2024 16:06:04 +0800 Subject: [PATCH] fix: reload plugin missing file changes (#4958) * fix: reload plugin to not miss file changes Signed-off-by: Frost Ming * fix: tests Signed-off-by: Frost Ming * update action version Signed-off-by: Frost Ming --------- Signed-off-by: Frost Ming --- .github/workflows/ci.yml | 10 ++-- src/bentoml/_internal/bento/bento.py | 5 +- src/bentoml/_internal/bento/build_config.py | 47 ++++++++-------- .../utils/circus/watchfilesplugin.py | 54 +++++++++++-------- src/bentoml/serving.py | 1 - .../utils/circus/test_watchfilesplugin.py | 18 +++++-- tests/unit/conftest.py | 9 ++-- 7 files changed, 84 insertions(+), 60 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f700eee519c..fe1a8597808 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -202,19 +202,19 @@ jobs: cache: pip python-version: '3.11' - name: Download e2e coverage - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: coverage-e2e-data - name: Download monitoring coverage - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: coverage-monitoring-data - name: Download integrations coverage - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: coverage-integrations-data - name: Download unit coverage - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: coverage-unit-data - name: Install dependencies @@ -222,7 +222,7 @@ jobs: - name: Export coverage reports and generate summary run: nox -s coverage - name: Upload uncovered HTML report - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: uncovered-html-report path: htmlcov diff --git a/src/bentoml/_internal/bento/bento.py b/src/bentoml/_internal/bento/bento.py index 7b151bd4a8a..7cda55b5dbc 100644 --- a/src/bentoml/_internal/bento/bento.py +++ b/src/bentoml/_internal/bento/bento.py @@ -254,7 +254,7 @@ def create( models.add(model) # create ignore specs - specs = BentoPathSpec(build_config.include, build_config.exclude) + specs = BentoPathSpec(build_config.include, build_config.exclude, build_ctx) # Copy all files base on include and exclude, into `src` directory relpaths = [s for s in build_config.include if s.startswith("../")] @@ -266,12 +266,11 @@ def create( target_fs = bento_fs.opendir(BENTO_PROJECT_DIR_NAME) with target_fs.open(DEFAULT_BENTO_BUILD_FILE, "w") as bentofile_yaml: build_config.to_yaml(bentofile_yaml) - ignore_specs = list(specs.from_path(build_ctx)) for dir_path, _, files in ctx_fs.walk(): for f in files: path = fs.path.combine(dir_path, f.name).lstrip("/") - if specs.includes(path, recurse_exclude_spec=ignore_specs): + if specs.includes(path): if ctx_fs.getsize(path) > 10 * 1024 * 1024: logger.warn("File size is larger than 10MiB: %s", path) target_fs.makedirs(dir_path, recreate=True) diff --git a/src/bentoml/_internal/bento/build_config.py b/src/bentoml/_internal/bento/build_config.py index e5ab60f8692..9f6fa5864eb 100644 --- a/src/bentoml/_internal/bento/build_config.py +++ b/src/bentoml/_internal/bento/build_config.py @@ -936,50 +936,51 @@ def to_yaml(self, stream: t.TextIO) -> None: raise -@attr.define(frozen=True) +@attr.frozen class BentoPathSpec: - _include: PathSpec = attr.field( + include: PathSpec = attr.field( converter=lambda x: PathSpec.from_lines("gitwildmatch", x) ) - _exclude: PathSpec = attr.field( + exclude: PathSpec = attr.field( converter=lambda x: PathSpec.from_lines("gitwildmatch", x) ) + ctx_dir: str = attr.field(default=".") + recurse_ignore_filename: str = ".bentoignore" + recurse_exclude_spec: list[tuple[str, PathSpec]] = attr.field(init=False) # we want to ignore .git and venv folders in cases they are very large. extra: PathSpec = attr.field( default=PathSpec.from_lines("gitwildmatch", [".git/", ".venv/", "venv/"]), init=False, ) - def includes( - self, - path: str, - *, - recurse_exclude_spec: t.Optional[t.Iterable[t.Tuple[str, PathSpec]]] = None, - ) -> bool: - # Determine whether a path is included or not. + @recurse_exclude_spec.default + def _default_recurse_exclude_spec(self) -> list[tuple[str, PathSpec]]: # recurse_exclude_spec is a list of (path, spec) pairs. + fs_ = fs.open_fs(encode_path_for_uri(self.ctx_dir)) + recurse_exclude_spec: list[tuple[str, PathSpec]] = [] + for file in fs_.walk.files(filter=[self.recurse_ignore_filename]): + dir_path = fs.path.dirname(file) + with fs_.open(file) as f: + recurse_exclude_spec.append( + (dir_path.lstrip("/"), PathSpec.from_lines("gitwildmatch", f)) + ) + return recurse_exclude_spec + + def includes(self, path: str) -> bool: + """Determine whether a path is included or not.""" to_include = ( - self._include.match_file(path) - and not self._exclude.match_file(path) + self.include.match_file(path) + and not self.exclude.match_file(path) and not self.extra.match_file(path) ) - if to_include and recurse_exclude_spec is not None: + if to_include: return not any( ignore_spec.match_file(fs.path.relativefrom(ignore_parent, path)) - for ignore_parent, ignore_spec in recurse_exclude_spec + for ignore_parent, ignore_spec in self.recurse_exclude_spec if fs.path.isparent(ignore_parent, path) ) return to_include - def from_path(self, path: str) -> t.Generator[t.Tuple[str, PathSpec], None, None]: - """ - yield (parent, exclude_spec) from .bentoignore file of a given path - """ - fs_ = fs.open_fs(encode_path_for_uri(path)) - for file in fs_.walk.files(filter=[".bentoignore"]): - dir_path = fs.path.dirname(file) - yield dir_path, PathSpec.from_lines("gitwildmatch", fs_.open(file)) - class FilledBentoBuildConfig(BentoBuildConfig): service: str diff --git a/src/bentoml/_internal/utils/circus/watchfilesplugin.py b/src/bentoml/_internal/utils/circus/watchfilesplugin.py index 29b46e1e087..83f4d659731 100644 --- a/src/bentoml/_internal/utils/circus/watchfilesplugin.py +++ b/src/bentoml/_internal/utils/circus/watchfilesplugin.py @@ -8,6 +8,7 @@ from threading import Thread from typing import TYPE_CHECKING +import fs from circus.plugins import CircusPlugin from watchfiles import watch @@ -39,36 +40,47 @@ def __init__(self, *args: t.Any, **config: t.Any): # circus/plugins/__init__.py:282 -> converts all given configs to dict[str, str] self.name = self.config.get("name") - self.bentoml_home = self.config["bentoml_home"] self.working_dir = self.config["working_dir"] # a list of folders to watch for changes - watch_dirs = [self.working_dir, os.path.join(self.bentoml_home, "models")] + watch_dirs = [self.working_dir] + self._specs = [(Path(self.working_dir).as_posix(), self.create_spec())] if not is_pypi_installed_bentoml(): # bentoml src from this __file__ - logger.info( - "BentoML is installed via development mode, adding source root to 'watch_dirs'." - ) - watch_dirs.append(t.cast(str, os.path.dirname(source_locations("bentoml")))) + bentoml_src = Path(source_locations("bentoml")).parent + if bentoml_src.with_name("pyproject.toml").exists(): + logger.info( + "BentoML is installed via development mode, adding source root to 'watch_dirs'." + ) + watch_dirs.append(str(bentoml_src)) + self._specs.append( + ( + bentoml_src.as_posix(), + BentoPathSpec( + # only watch python files in bentoml src + ["*.py", "*.yaml"], + [], + bentoml_src.as_posix(), + recurse_ignore_filename=".gitignore", + ), + ) + ) logger.info("Watching directories: %s", watch_dirs) - self.watch_dirs = watch_dirs - - self.create_spec() # a thread to restart circus self.exit_event = Event() self.file_changes = watch( - *self.watch_dirs, + *watch_dirs, watch_filter=None, yield_on_timeout=True, # stop hanging on our tests, doesn't affect the behaviour stop_event=self.exit_event, rust_timeout=0, # we should set timeout to zero for no timeout ) - def create_spec(self) -> None: + def create_spec(self) -> BentoPathSpec: bentofile_path = os.path.join(self.working_dir, "bentofile.yaml") if not os.path.exists(bentofile_path): # if bentofile.yaml is not found, by default we will assume to watch all files @@ -79,19 +91,17 @@ def create_spec(self) -> None: with open(bentofile_path, "r") as f: build_config = BentoBuildConfig.from_yaml(f).with_defaults() - self.bento_spec = BentoPathSpec(build_config.include, build_config.exclude) # type: ignore (unfinished converter type) + return BentoPathSpec( + build_config.include, build_config.exclude, self.working_dir + ) - def should_include(self, path: str | Path) -> bool: + def should_include(self, path: Path) -> bool: # returns True if file with 'path' has changed, else False - if isinstance(path, Path): - path = path.__fspath__() - - return any( - self.bento_spec.includes( - path, recurse_exclude_spec=self.bento_spec.from_path(dirs) - ) - for dirs in self.watch_dirs - ) + str_path = path.as_posix() + for parent, spec in self._specs: + if fs.path.isparent(parent, str_path): + return spec.includes(fs.path.relativefrom(parent, str_path)) + return False def has_modification(self) -> bool: for changes in self.file_changes: diff --git a/src/bentoml/serving.py b/src/bentoml/serving.py index d28d88ecff9..260e6016391 100644 --- a/src/bentoml/serving.py +++ b/src/bentoml/serving.py @@ -204,7 +204,6 @@ def make_reload_plugin(working_dir: str, bentoml_home: str) -> dict[str, str]: return { "use": "bentoml._internal.utils.circus.watchfilesplugin.ServiceReloaderPlugin", "working_dir": working_dir, - "bentoml_home": bentoml_home, } diff --git a/tests/unit/_internal/utils/circus/test_watchfilesplugin.py b/tests/unit/_internal/utils/circus/test_watchfilesplugin.py index 049b2d86df1..ce4267744cc 100644 --- a/tests/unit/_internal/utils/circus/test_watchfilesplugin.py +++ b/tests/unit/_internal/utils/circus/test_watchfilesplugin.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import os import typing as t from pathlib import Path from typing import TYPE_CHECKING @@ -105,7 +104,6 @@ def test_look_after_trigger_restart(self) -> None: call_mock = self.setup_call_mock(watcher_name="reloader") self.setup_watch_mock(watch_return={(Change(1), file)}) plugin = self.make_plugin(ServiceReloaderPlugin, **self.plugin_kwargs) - Path(file).touch() with self.assertLogs("bentoml", level=logging.WARNING) as log: plugin.look_after() @@ -120,9 +118,23 @@ def test_look_after_trigger_restart_on_deletion(self): call_mock = self.setup_call_mock(watcher_name="reloader") self.setup_watch_mock(watch_return={(Change(3), file)}) plugin = self.make_plugin(ServiceReloaderPlugin, **self.plugin_kwargs) - os.remove(file) with self.assertLogs("bentoml", level=logging.WARNING) as log: plugin.look_after() call_mock.assert_called_with("restart", name="*") self.assertIn("DELETED", log.output[0]) + + def test_not_trigger_restart_on_ignored_file(self) -> None: + from watchfiles.main import Change + + file = self.reload_directory.joinpath("myrust.rs").__fspath__() + data_file = self.reload_directory.joinpath("fdir_one/data.temp").__fspath__() + + call_mock = self.setup_call_mock(watcher_name="reloader") + self.setup_watch_mock( + watch_return={(Change.added, file), (Change.added, data_file)} + ) + plugin = self.make_plugin(ServiceReloaderPlugin, **self.plugin_kwargs) + + plugin.look_after() + call_mock.assert_not_called() diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 36fbff0ed58..f324ca9b92f 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -28,9 +28,10 @@ def reload_directory( ./ ├── models/ # mock default bentoml home models directory ├── [fdir, fdir_one, fdir_two]/ + │ ├── .bentoignore │ ├── README.md - ├── subdir/ - │ ├── README.md + │ ├── subdir/ + │ │ ├── README.md │ │ └── app.py │ ├── somerust.rs │ └── app.py @@ -67,17 +68,19 @@ def reload_directory( build_config = BentoBuildConfig( service="service.py:svc", description="A mock service", - exclude=["*.rs"], ).with_defaults() bentofile = root / "bentofile.yaml" bentofile.touch() with bentofile.open("w", encoding="utf-8") as f: yaml.safe_dump(bentoml_cattr.unstructure(build_config), f) + (root / ".bentoignore").write_text("*.rs\n") + custom_library = ["fdir", "fdir_one", "fdir_two"] for app in custom_library: ap = root.joinpath(app) ap.mkdir() + (ap / ".bentoignore").write_text("*.temp\n") dir_files: list[tuple[str, list[t.Any]]] = [ ("README.md", []), ("subdir", ["README.md", "app.py"]),