From 4a4e5009e3ae6e3a1bfad682d54fe8ed99107f81 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 26 Feb 2025 14:42:52 -0800 Subject: [PATCH 01/13] dev: Goodbye, Mypy! MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch from running both Mypy and Pyright to just Pyright. I can't bear to keep accommodating Mypy's limitations compared to Pyright. I only introduced Pyright¹ in addition to (as opposed to replacing) existing Mypy usage because Pyright had functionality Mypy didn't that I wanted to use. Mypy at the time also supported some things Pyright didn't at the time, so I used both instead of switching. Over time though, Pyright became a lot more sophisticated than Mypy, and most of Mypy's complaints became false-positives or overly-conservative because of various limitations/choices. I've made comments (in code, in commit messages, on GitHub issues, etc.) for a while now that yeah, we probably should drop Mypy at this point, but it was always low-priority. And I knew I'd get fed up enough at some point to JFDI, and lo and behold, here we are! Type ignore comments which were only necessary for Mypy are removed, and the remaining ignore comments are converted to Pyright-specific syntax so they can target specific rules. Mypy-specific workarounds with cast() and TYPE_CHECKING are removed. The most recent version of Pyright is required for tests as it contains a fix² to type inference that without which a cast() is still necessary in one spot due to type inference changing after removal of Mypy-specific workarounds. Reverts the following commits wholesale - "runner.docker: Use os.getuid()/getgid() when present instead of conditioning on os.name" (23f618e7cbad13a2fa4cf74359aabaf7fba5990f) - "dev: Work around some mypy limitation when type checking a lambda" (31d708f90d9746c075dc27f9bf15fe8d31d9b954) - "dev: Ignore for mypy's sake some superclass calls" (d2fe958cc0f486850c07a8688c7024c950e75e2a) as part of the changes. Drops from dev dependencies a few types-* packages that were only necessary for Mypy. ¹ In "dev: Add (optional) type checking with Pyright to our tests" (e0faaf52231bf3a388edbaa23332df87e14ec631) ² --- .gitignore | 3 -- CHANGES.md | 11 +++++ doc/development.md | 17 ++----- mypy.ini | 50 ------------------- nextstrain/cli/__main__.py | 4 +- nextstrain/cli/authn/session.py | 2 +- nextstrain/cli/command/build.py | 2 +- nextstrain/cli/command/shell.py | 4 +- nextstrain/cli/command/view.py | 4 +- nextstrain/cli/gzip.py | 2 +- nextstrain/cli/net.py | 2 +- nextstrain/cli/remote/__init__.py | 33 +------------ nextstrain/cli/remote/nextstrain_dot_org.py | 6 +-- nextstrain/cli/remote/s3.py | 2 +- nextstrain/cli/resources/__init__.py | 2 +- nextstrain/cli/rst/__init__.py | 4 +- nextstrain/cli/rst/sphinx.py | 4 +- nextstrain/cli/runner/__init__.py | 54 +++------------------ nextstrain/cli/runner/aws_batch/__init__.py | 2 +- nextstrain/cli/runner/conda.py | 2 +- nextstrain/cli/runner/docker.py | 13 ++--- nextstrain/cli/runner/singularity.py | 5 +- nextstrain/cli/url.py | 2 +- pyrightconfig.json | 2 + setup.py | 13 +++-- tests/mypy.py | 9 ---- tests/pyright.py | 2 +- 27 files changed, 65 insertions(+), 191 deletions(-) delete mode 100644 mypy.ini delete mode 100644 tests/mypy.py diff --git a/.gitignore b/.gitignore index a0b2710e..23113701 100644 --- a/.gitignore +++ b/.gitignore @@ -10,9 +10,6 @@ environment* /dist/ /nextstrain_cli.egg-info/ -# mypy cache -/.mypy_cache/ - # OS generated files # ###################### .DS_Store diff --git a/CHANGES.md b/CHANGES.md index a0017b7c..7706e3c7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -29,6 +29,17 @@ development source code and as such may not be routinely kept up to date. in a browser (if any). ([#417](https://github.com/nextstrain/cli/pull/417)) +## Development + +* Goodbye, Mypy! We now use only Pyright for type checking. Both type + checkers used to contribute to our development in their own way, but over + time Pyright's become more sophisticated and Mypy's required more workarounds + to appease it than issues it caught. So long, and thanks for all the ~fish~ + type checks! + ([#419][]) + +[#419]: https://github.com/nextstrain/cli/pull/419 + # 8.5.4 (1 November 2024) diff --git a/doc/development.md b/doc/development.md index 33a42086..5756c573 100644 --- a/doc/development.md +++ b/doc/development.md @@ -24,8 +24,8 @@ Now test that you can run: The development environment includes our dev tools (described below): - pytest # runs doctests as well as mypy and flake8 - mypy -p nextstrain.cli + pytest # runs doctests as well as pyright and flake8 + pyright flake8 make -C doc livehtml @@ -147,12 +147,7 @@ catch errors earlier and be explicit about the interfaces expected and provided. Annotation pairs well with the functional approach taken by the package. -During development you can run static type checks using [mypy][]: - - $ mypy nextstrain - # No output is good! - -and [pyright][]: +During development you can run static type checks using [pyright][]: $ npx pyright ... @@ -160,8 +155,7 @@ and [pyright][]: 0 errors, 0 warnings, 0 infos Completed in 2sec -There are also many [editor integrations for mypy][], and Pyright is integrated -into VS Code's Python support. +There are also many [editor integrations for Pyright][]. The [`typing_extensions`][] module should be used for features not yet available in the the standard `typings` module of supported Python versions. @@ -210,9 +204,8 @@ which may be helpful during development; see `--help` for details. [source distributions]: https://packaging.python.org/en/latest/glossary/#term-Source-Distribution [built distributions]: https://packaging.python.org/en/latest/glossary/#term-Built-Distribution [type annotations]: https://www.python.org/dev/peps/pep-0484/ -[mypy]: http://mypy-lang.org/ [pyright]: https://github.com/microsoft/pyright -[editor integrations for mypy]: https://github.com/python/mypy#integrations +[editor integrations for Pyright]: https://microsoft.github.io/pyright/#/installation [`typing_extensions`]: https://pypi.org/project/typing-extensions [Flake8]: https://flake8.pycqa.org [post-release version]: https://peps.python.org/pep-0440/#post-releases diff --git a/mypy.ini b/mypy.ini deleted file mode 100644 index d1b4d387..00000000 --- a/mypy.ini +++ /dev/null @@ -1,50 +0,0 @@ -[mypy] -# We currently aim for compat with 3.8. -python_version = 3.8 -namespace_packages = True - -# Check function bodies which don't have a typed signature. This prevents a -# single untyped function from poisoning other typed functions in a call chain. -check_untyped_defs = True - -# Require functions with an annotated return type to be explicit about -# potentially returning None (via Optional[…]). -strict_optional = False - -[mypy-nextstrain.cli.markdown] -ignore_errors = True - -# In the future maybe we can contribute typing stubs for these modules (either -# complete stubs in the python/typeshed repo or partial stubs just in -# this repo), but for now that's more work than we want to invest. These -# sections let us ignore missing stubs for specific modules without hiding all -# missing errors like (--ignore-missing-imports). -[mypy-boto3] -ignore_missing_imports = True - -[mypy-botocore.config] -ignore_missing_imports = True - -[mypy-botocore.exceptions] -ignore_missing_imports = True - -[mypy-importlib.metadata] -ignore_missing_imports = True - -[mypy-importlib.resources] -ignore_missing_imports = True - -[mypy-fasteners] -ignore_missing_imports = True - -[mypy-fsspec] -ignore_missing_imports = True - -[mypy-nextstrain] -ignore_missing_imports = True - -[mypy-wcmatch.glob] -ignore_missing_imports = True - -[mypy-wrapt] -ignore_missing_imports = True diff --git a/nextstrain/cli/__main__.py b/nextstrain/cli/__main__.py index bd634f35..9280b2a0 100644 --- a/nextstrain/cli/__main__.py +++ b/nextstrain/cli/__main__.py @@ -13,8 +13,8 @@ def main(): # rest of the codebase. Avoids needing to instruct folks to set # PYTHONIOENCODING=UTF-8 or use Python's UTF-8 mode (-X utf8 or # PYTHONUTF8=1). - reconfigure_stdio(sys.stdout) # type: ignore[arg-type] - reconfigure_stdio(sys.stderr) # type: ignore[arg-type] + reconfigure_stdio(sys.stdout) # pyright: ignore[reportArgumentType] + reconfigure_stdio(sys.stderr) # pyright: ignore[reportArgumentType] return cli.run( argv[1:] ) diff --git a/nextstrain/cli/authn/session.py b/nextstrain/cli/authn/session.py index d7524ed1..7c60963f 100644 --- a/nextstrain/cli/authn/session.py +++ b/nextstrain/cli/authn/session.py @@ -38,7 +38,7 @@ def __new__(cls, origin: Origin) -> 'Session': cls = CognitoSession else: cls = OpenIDSession - return super().__new__(cls) # type: ignore + return super().__new__(cls) # pyright: ignore[reportArgumentType] def authenticate_with_password(self, username: str, password: str) -> None: raise NotImplementedError diff --git a/nextstrain/cli/command/build.py b/nextstrain/cli/command/build.py index d6603055..f19c46e3 100644 --- a/nextstrain/cli/command/build.py +++ b/nextstrain/cli/command/build.py @@ -395,7 +395,7 @@ def pathogen_volumes(directory: Path) -> Tuple[NamedVolume, NamedVolume]: debug(f"Using {working_volume.src} as working ({working_volume.name}) volume") assert build_volume.src <= working_volume.src - assert docker.mount_point(build_volume) <= docker.mount_point(working_volume) # type: ignore[attr-defined] # for mypy + assert docker.mount_point(build_volume) <= docker.mount_point(working_volume) return build_volume, working_volume diff --git a/nextstrain/cli/command/shell.py b/nextstrain/cli/command/shell.py index baecadfe..eb88cecc 100644 --- a/nextstrain/cli/command/shell.py +++ b/nextstrain/cli/command/shell.py @@ -62,7 +62,7 @@ def run(opts): # will always be small so some special-casing seems ok. # -trs, 5 Jan 2023 (updated from 25 Sept 2018) for volume in opts.volumes: - print(" %s is from %s" % (docker.mount_point(volume), volume.src.resolve(strict = True))) # type: ignore + print(" %s is from %s" % (docker.mount_point(volume), volume.src.resolve(strict = True))) print() @@ -95,7 +95,7 @@ def run(opts): opts.volumes.append(NamedVolume("bashrc", bashrc, dir = False, writable = False)) history_volume = NamedVolume("bash_history", history_file, dir = False) - history_file = docker.mount_point(history_volume) # type: ignore[attr-defined] # for mypy + history_file = docker.mount_point(history_volume) opts.volumes.append(history_volume) extra_env = { diff --git a/nextstrain/cli/command/view.py b/nextstrain/cli/command/view.py index 6f00f789..73829fb4 100644 --- a/nextstrain/cli/command/view.py +++ b/nextstrain/cli/command/view.py @@ -404,8 +404,8 @@ def resolve(host: str, port: str) -> Tuple[str, int]: ip4 = [a for a in addrs if a.family is AF_INET] ip6 = [a for a in addrs if a.family is AF_INET6] - return ((ip4[0].sockaddr[0], ip4[0].sockaddr[1]) if ip4 # type: ignore - else (ip6[0].sockaddr[0], ip6[0].sockaddr[1]) if ip6 # type: ignore + return ((ip4[0].sockaddr[0], ip4[0].sockaddr[1]) if ip4 # pyright: ignore[reportReturnType] + else (ip6[0].sockaddr[0], ip6[0].sockaddr[1]) if ip6 else (str(host), int(port))) diff --git a/nextstrain/cli/gzip.py b/nextstrain/cli/gzip.py index f9263c5d..f41d41f4 100644 --- a/nextstrain/cli/gzip.py +++ b/nextstrain/cli/gzip.py @@ -115,7 +115,7 @@ def __init__(self, stream: BinaryIO): def writable(self): return True - def write(self, data: bytes): # type: ignore[override] + def write(self, data: bytes): # pyright: ignore[reportIncompatibleMethodOverride] assert self.stream and self.__gunzip return self.stream.write(self.__gunzip.decompress(data)) diff --git a/nextstrain/cli/net.py b/nextstrain/cli/net.py index 7ddbb99e..3860708c 100644 --- a/nextstrain/cli/net.py +++ b/nextstrain/cli/net.py @@ -44,7 +44,7 @@ def resolve_host(host: str, family: AddressFamily = AF_UNSPEC) -> Set[Union[IPv4 # ¹ See # and . return { - ip_address(getnameinfo(sockaddr, NI_NUMERICHOST)[0]) # type: ignore + ip_address(getnameinfo(sockaddr, NI_NUMERICHOST)[0]) # pyright: ignore[reportArgumentType] for _, _, _, _, sockaddr in getaddrinfo(host, None, family = family) if isinstance(sockaddr[0], str) diff --git a/nextstrain/cli/remote/__init__.py b/nextstrain/cli/remote/__init__.py index 336a40a1..ad844c3c 100644 --- a/nextstrain/cli/remote/__init__.py +++ b/nextstrain/cli/remote/__init__.py @@ -4,42 +4,13 @@ import os import re -from typing import cast, Tuple, TYPE_CHECKING +from typing import Tuple from ..errors import UserError from ..net import is_loopback from ..rst import doc_url from ..types import RemoteModule from ..url import URL -from . import ( - s3 as __s3, - nextstrain_dot_org as __nextstrain_dot_org, -) - - -# While PEP-0544 allows for modules to serve as implementations of Protocols¹, -# Mypy doesn't currently support it². Pyright does³, however, so we tell Mypy -# to "trust us", but let Pyright actually check our work. Mypy considers the -# MYPY variable to always be True when evaluating the code, regardless of the -# assignment below. -# -# This bit of type checking chicanery is not ideal, but the benefit of having -# our module interfaces actually checked by Pyright is worth it. In the -# future, we should maybe ditch Mypy in favor of Pyright alone, but I didn't -# want to put in the due diligence for a full switchover right now. -# -# -trs, 12 August 2021 -# -# ¹ https://www.python.org/dev/peps/pep-0544/#modules-as-implementations-of-protocols -# ² https://github.com/python/mypy/issues/5018 -# ³ https://github.com/microsoft/pyright/issues/1341 -# -MYPY = False -if TYPE_CHECKING and MYPY: - s3 = cast(RemoteModule, __s3) - nextstrain_dot_org = cast(RemoteModule, __nextstrain_dot_org) -else: - s3 = __s3 - nextstrain_dot_org = __nextstrain_dot_org +from . import s3, nextstrain_dot_org NEXTSTRAIN_DOT_ORG = os.environ.get("NEXTSTRAIN_DOT_ORG") \ diff --git a/nextstrain/cli/remote/nextstrain_dot_org.py b/nextstrain/cli/remote/nextstrain_dot_org.py index 08a2a7e3..1e5afe4c 100644 --- a/nextstrain/cli/remote/nextstrain_dot_org.py +++ b/nextstrain/cli/remote/nextstrain_dot_org.py @@ -240,7 +240,7 @@ def put(endpoint, file, media_type): try: response = http.put( endpoint, - data = data, # type: ignore + data = data, headers = { "Content-Type": media_type, "Content-Encoding": "gzip" }) @@ -803,7 +803,7 @@ def __call__(self, request: requests.PreparedRequest) -> requests.PreparedReques request.headers["Authorization"] = self.user.http_authorization # Used in error handling for more informative error messages - request._user = self.user # type: ignore + request._user = self.user # pyright: ignore[reportAttributeAccessIssue] return request @@ -852,7 +852,7 @@ def raise_for_status(origin: Origin, response: requests.Response) -> None: elif status in {401, 403}: try: - user = response.request._user # type: ignore + user = response.request._user # pyright: ignore[reportAttributeAccessIssue] except AttributeError: user = None diff --git a/nextstrain/cli/remote/s3.py b/nextstrain/cli/remote/s3.py index 583d56ad..3a756665 100644 --- a/nextstrain/cli/remote/s3.py +++ b/nextstrain/cli/remote/s3.py @@ -95,7 +95,7 @@ def upload(url: URL, local_files: List[Path], dry_run: bool = False) -> Iterable meta = { "ContentType": content_type, "ContentEncoding": "gzip" } else: # Already compressed; don't compress it again. - data = local_file.open("rb") # type: ignore + data = local_file.open("rb") meta = { "ContentType": encoding_type } with data: diff --git a/nextstrain/cli/resources/__init__.py b/nextstrain/cli/resources/__init__.py index cbdcac52..4f43389d 100644 --- a/nextstrain/cli/resources/__init__.py +++ b/nextstrain/cli/resources/__init__.py @@ -17,7 +17,7 @@ if sys.version_info >= (3, 11): from importlib.resources import files as _files, as_file as _as_file else: - from importlib_resources import files as _files, as_file as _as_file # type: ignore[import] + from importlib_resources import files as _files, as_file as _as_file from pathlib import Path from typing import ContextManager diff --git a/nextstrain/cli/rst/__init__.py b/nextstrain/cli/rst/__init__.py index cfc7ce02..e66eb32a 100644 --- a/nextstrain/cli/rst/__init__.py +++ b/nextstrain/cli/rst/__init__.py @@ -18,11 +18,11 @@ import docutils.transforms import os import re -from docutils.core import publish_string as convert_rst_to_string, publish_doctree as convert_rst_to_doctree # type: ignore +from docutils.core import publish_string as convert_rst_to_string, publish_doctree as convert_rst_to_doctree from docutils.parsers.rst import Directive from docutils.parsers.rst.directives import register_directive from docutils.parsers.rst.roles import register_local_role -from docutils.utils import unescape # type: ignore +from docutils.utils import unescape from ..__version__ import __version__ as cli_version from ..util import remove_suffix from .sphinx import TextWriter diff --git a/nextstrain/cli/rst/sphinx.py b/nextstrain/cli/rst/sphinx.py index 611c3d4a..b1a813b1 100644 --- a/nextstrain/cli/rst/sphinx.py +++ b/nextstrain/cli/rst/sphinx.py @@ -71,7 +71,7 @@ def dispatch_visit(self, node: Node) -> None: method(node) break else: - super().dispatch_visit(node) # type: ignore + super().dispatch_visit(node) def dispatch_departure(self, node: Node) -> None: """ @@ -88,7 +88,7 @@ def dispatch_departure(self, node: Node) -> None: method(node) break else: - super().dispatch_departure(node) # type: ignore + super().dispatch_departure(node) # Originally from sphinx/writers/text.py (version 4.3.2) diff --git a/nextstrain/cli/runner/__init__.py b/nextstrain/cli/runner/__init__.py index 83671570..a0954d66 100644 --- a/nextstrain/cli/runner/__init__.py +++ b/nextstrain/cli/runner/__init__.py @@ -1,16 +1,10 @@ import argparse import os from argparse import ArgumentParser, ArgumentTypeError -from typing import cast, List, Union, TYPE_CHECKING +from typing import List, Union # TODO: Use typing.TypeAlias once Python 3.10 is the minimum supported version. from typing_extensions import TypeAlias -from . import ( - docker as __docker, - conda as __conda, - singularity as __singularity, - ambient as __ambient, - aws_batch as __aws_batch, -) +from . import docker, conda, singularity, ambient, aws_batch from .. import config, env, hostenv from ..argparse import DirectoryPath, SKIP_AUTO_DEFAULT_IN_HELP from ..errors import UserError @@ -19,38 +13,6 @@ from ..volume import NamedVolume -# While PEP-0544 allows for modules to serve as implementations of Protocols¹, -# Mypy doesn't currently support it². Pyright does³, however, so we tell Mypy -# to "trust us", but let Pyright actually check our work. Mypy considers the -# MYPY variable to always be True when evaluating the code, regardless of the -# assignment below. -# -# This bit of type checking chicanery is not ideal, but the benefit of having -# our module interfaces actually checked by Pyright is worth it. In the -# future, we should maybe ditch Mypy in favor of Pyright alone, but I didn't -# want to put in the due diligence for a full switchover right now. -# -# -trs, 12 August 2021 -# -# ¹ https://www.python.org/dev/peps/pep-0544/#modules-as-implementations-of-protocols -# ² https://github.com/python/mypy/issues/5018 -# ³ https://github.com/microsoft/pyright/issues/1341 -# -MYPY = False -if TYPE_CHECKING and MYPY: - docker = cast(RunnerModule, __docker) - conda = cast(RunnerModule, __conda) - singularity = cast(RunnerModule, __singularity) - ambient = cast(RunnerModule, __ambient) - aws_batch = cast(RunnerModule, __aws_batch) -else: - docker = __docker - conda = __conda - singularity = __singularity - ambient = __ambient - aws_batch = __aws_batch - - all_runners: List[RunnerModule] = [ docker, conda, @@ -189,10 +151,10 @@ def register_arguments(parser: ArgumentParser, runners: List[RunnerModule], exec # Image to use; shared by Docker, AWS Batch, and Singularity runners development.add_argument( "--image", - help = "Container image name to use for the Nextstrain runtime " # type: ignore - f"(default: %(default)s for Docker and AWS Batch, {singularity.DEFAULT_IMAGE} for Singularity)", # type: ignore + help = "Container image name to use for the Nextstrain runtime " + f"(default: %(default)s for Docker and AWS Batch, {singularity.DEFAULT_IMAGE} for Singularity)", metavar = "", - default = docker.DEFAULT_IMAGE) # type: ignore + default = docker.DEFAULT_IMAGE) # Program to execute # @@ -246,7 +208,7 @@ def run(opts: Options, working_volume: NamedVolume = None, extra_env: Env = {}, ) ] - if (opts.image is not docker.DEFAULT_IMAGE # type: ignore + if (opts.image is not docker.DEFAULT_IMAGE and opts.__runner__ in {conda, ambient}): why_runner = "the configured default" if default_runner in {conda, ambient} else f"selected by --{runner_name(opts.__runner__)}" raise UserError(f""" @@ -263,8 +225,8 @@ def run(opts: Options, working_volume: NamedVolume = None, extra_env: Env = {}, # Account for potentially different defaults for --image depending on the # selected runner. - if opts.__runner__ is singularity and opts.image is docker.DEFAULT_IMAGE: # type: ignore - opts.image = singularity.DEFAULT_IMAGE # type: ignore + if opts.__runner__ is singularity and opts.image is docker.DEFAULT_IMAGE: + opts.image = singularity.DEFAULT_IMAGE if envdirs := os.environ.get("NEXTSTRAIN_RUNTIME_ENVDIRS"): try: diff --git a/nextstrain/cli/runner/aws_batch/__init__.py b/nextstrain/cli/runner/aws_batch/__init__.py index b3a0fdee..9d8219ef 100644 --- a/nextstrain/cli/runner/aws_batch/__init__.py +++ b/nextstrain/cli/runner/aws_batch/__init__.py @@ -91,7 +91,7 @@ from ...types import Env, RunnerSetupStatus, RunnerTestResults, RunnerUpdateStatus from ...util import colored, prose_list, warn from ... import config -from .. import docker # type: ignore[no-redef] # for mypy +from .. import docker from . import jobs, s3 diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index e938c7c2..90e726d5 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -217,7 +217,7 @@ def setup_micromamba(dry_run: bool = False, force: bool = False) -> bool: assert content_type == "application/x-tar", \ f"unknown content-type for micromamba dist: {content_type}" - with tarfile.open(fileobj = response.raw, mode = "r|*") as tar: # type: ignore + with tarfile.open(fileobj = response.raw, mode = "r|*") as tar: # pyright: ignore[reportArgumentType, reportCallIssue] # Ignore archive members starting with "/" and or including ".." parts, # as these can be used (maliciously or accidentally) to overwrite # unintended files (e.g. files outside of MICROMAMBA_ROOT). diff --git a/nextstrain/cli/runner/docker.py b/nextstrain/cli/runner/docker.py index 751d2e65..76068ffa 100644 --- a/nextstrain/cli/runner/docker.py +++ b/nextstrain/cli/runner/docker.py @@ -153,15 +153,6 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None # point to. (They're probably unchanged, but ya never know.) stdio_isatty = all(os.isatty(fd) for fd in [0, 1, 2]) - # The getuid()/getgid() functions are documented to be only available on - # Unix systems, not, for example, Windows. - # - # We use this weird getattr() construction in order to appease Mypy, which - # otherwise has trouble with platform-specific attributes on the "os" - # module. - uid = getattr(os, "getuid", lambda: None)() - gid = getattr(os, "getgid", lambda: None)() - # If the image supports /nextstrain/env.d, then pass any env vars using it # so values aren't visible in the container's config (e.g. visible via # `docker inspect`). @@ -209,7 +200,9 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None # On Unix (POSIX) systems, run the process in the container with the same # UID/GID so that file ownership is correct in the bind mount directories. - *(["--user=%d:%d" % (uid, gid)] if uid and gid else []), + # The getuid()/getgid() functions are documented to be only available on + # Unix systems, not, for example, Windows. + *(["--user=%d:%d" % (os.getuid(), os.getgid())] if os.name == "posix" else []), # Map directories to bind mount into the container. *["--volume=%s:%s:%s" % (v.src.resolve(strict = True), mount_point(v), "rw" if v.writable else "ro") diff --git a/nextstrain/cli/runner/singularity.py b/nextstrain/cli/runner/singularity.py index 14573223..8f8a352d 100644 --- a/nextstrain/cli/runner/singularity.py +++ b/nextstrain/cli/runner/singularity.py @@ -95,7 +95,7 @@ from ..paths import RUNTIMES from ..types import Env, RunnerSetupStatus, RunnerTestResults, RunnerUpdateStatus from ..util import capture_output, colored, exec_or_return, split_image_name, warn -from . import docker # type: ignore[no-redef] # for mypy +from . import docker flatten = itertools.chain.from_iterable @@ -142,8 +142,7 @@ "SINGULARITYENV_PROMPT_COMMAND": "unset PROMPT_COMMAND; #", } -# Not "… = lambda: [" due to mypy. See commit history. -def SINGULARITY_EXEC_ARGS(): return [ +SINGULARITY_EXEC_ARGS = lambda: [ # Increase isolation. # # In the future, we may find we want to use additional related flags to diff --git a/nextstrain/cli/url.py b/nextstrain/cli/url.py index 83d09ddb..5498baaa 100644 --- a/nextstrain/cli/url.py +++ b/nextstrain/cli/url.py @@ -41,7 +41,7 @@ class URL(SplitResult): __slots__ = () def __new__(cls, url: str) -> 'URL': - return super().__new__(cls, *urlsplit(url)) # type: ignore # for mypy + return super().__new__(cls, *urlsplit(url)) # This is for the type checkers, which otherwise consider URL.__init__ to # have a signature based on SplitResult.__new__ instead of our own __new__. diff --git a/pyrightconfig.json b/pyrightconfig.json index 43fc4cb9..7a328ed3 100644 --- a/pyrightconfig.json +++ b/pyrightconfig.json @@ -20,6 +20,8 @@ "reportSelfClsParameterName": true, "reportInvalidStubStatement": true, "reportIncompleteStub": true, + "reportUnnecessaryCast": true, + "reportUnnecessaryTypeIgnoreComment": true, "reportUnsupportedDunderAll": true, "strictParameterNoneValue": false } diff --git a/setup.py b/setup.py index 2f3a3d72..15e0506d 100644 --- a/setup.py +++ b/setup.py @@ -127,12 +127,16 @@ def find_namespaced_packages(namespace): # error described in https://github.com/fsspec/s3fs/issues/790 "fsspec !=2023.9.1", "s3fs[boto3] >=2021.04.0, !=2023.9.1", + + # From 2.0.0 onwards, urllib3 is better typed, but not usable (given + # our dep tree) on 3.8 and 3.9 so we use types-urllib3 there (see + # below). + "urllib3 >=2.0.0; python_version >= '3.10'", ], extras_require = { "dev": [ "flake8 >=4.0.0", - "mypy <1.6", "nextstrain-sphinx-theme>=2022.5", "pytest; python_version != '3.9'", "pytest !=7.0.0; python_version == '3.9'", @@ -144,9 +148,10 @@ def find_namespaced_packages(namespace): "sphinx_rtd_theme", "types-boto3", "types-botocore", - "types-docutils", - "types-setuptools", - "types-requests", + + # Only necessary for urllib3 <2.0.0, which we only have to use on + # Python 3.8 and 3.9. + "types-urllib3; python_version < '3.10'" ], }, ) diff --git a/tests/mypy.py b/tests/mypy.py deleted file mode 100644 index 09117ac7..00000000 --- a/tests/mypy.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path -from subprocess import run - -topdir = Path(__file__).resolve().parent.parent - -def pytest_mypy(): - # Check the exit status ourselves for nicer test output on failure - result = run(["mypy", "-p", "nextstrain.cli"], cwd = topdir) - assert result.returncode == 0, "mypy exited with errors" diff --git a/tests/pyright.py b/tests/pyright.py index 425b865f..5b1cea78 100644 --- a/tests/pyright.py +++ b/tests/pyright.py @@ -11,7 +11,7 @@ if pyright: pyright = [pyright] elif npx: - pyright = [npx, "pyright@<1.1.217 || >1.1.218"] + pyright = [npx, "pyright@>=1.1.396"] else: pyright = None From c97da1daeb28c628754092096c96bc4304588969 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 26 Nov 2024 10:06:08 -0800 Subject: [PATCH 02/13] dev: Set NEXTSTRAIN_HOME to a controlled directory in tests Allows for tests that require config and other app data and also isolates tests from the local user's personal config and data. --- CHANGES.md | 4 ++++ devel/pytest | 2 +- tests/data/home/.gitignore | 1 + tests/data/home/config | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/data/home/.gitignore create mode 100644 tests/data/home/config diff --git a/CHANGES.md b/CHANGES.md index 7706e3c7..509641ff 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -38,6 +38,10 @@ development source code and as such may not be routinely kept up to date. type checks! ([#419][]) +* `NEXTSTRAIN_HOME` is now set for tests so as to avoid interference with the + local user's personal config and data. + ([#419][]) + [#419]: https://github.com/nextstrain/cli/pull/419 diff --git a/devel/pytest b/devel/pytest index c1e0442f..02cdc44d 100755 --- a/devel/pytest +++ b/devel/pytest @@ -4,4 +4,4 @@ set -euo pipefail cd "$(dirname "$0")/.." set -x -NEXTSTRAIN_RST_STRICT=1 pytest "$@" +NEXTSTRAIN_RST_STRICT=1 NEXTSTRAIN_HOME=tests/data/home/ pytest "$@" diff --git a/tests/data/home/.gitignore b/tests/data/home/.gitignore new file mode 100644 index 00000000..96ea3343 --- /dev/null +++ b/tests/data/home/.gitignore @@ -0,0 +1 @@ +/lock diff --git a/tests/data/home/config b/tests/data/home/config new file mode 100644 index 00000000..a515f06e --- /dev/null +++ b/tests/data/home/config @@ -0,0 +1 @@ +# Nextstrain CLI config file used in tests, via NEXTSTRAIN_HOME=tests/data/home/. From 42a9dd188f1e950d70dff8997a9d8be64eb8ed26 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 22 Jul 2022 11:40:54 -0700 Subject: [PATCH 03/13] dev: Support for cram tests and example test file --- .cramrc | 3 +++ .gitignore | 3 +++ CHANGES.md | 4 ++++ devel/pytest | 3 ++- doc/development.md | 19 +++++++++++++++++-- setup.py | 1 + tests/cram.py | 14 ++++++++++++++ tests/env | 6 ++++++ tests/version.cram | 9 +++++++++ 9 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 .cramrc create mode 100644 tests/cram.py create mode 100644 tests/env create mode 100644 tests/version.cram diff --git a/.cramrc b/.cramrc new file mode 100644 index 00000000..839eed1d --- /dev/null +++ b/.cramrc @@ -0,0 +1,3 @@ +[cram] +shell = bash +indent = 4 diff --git a/.gitignore b/.gitignore index 23113701..26eed821 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,6 @@ environment* Icon? ehthumbs.db Thumbs.db + +# cram test run outputs +/tests/*.cram.err diff --git a/CHANGES.md b/CHANGES.md index 509641ff..49afeec6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -38,6 +38,10 @@ development source code and as such may not be routinely kept up to date. type checks! ([#419][]) +* [Cram](https://bitheap.org/cram/) test files are now supported, with one + example file for now. + ([#419][]) + * `NEXTSTRAIN_HOME` is now set for tests so as to avoid interference with the local user's personal config and data. ([#419][]) diff --git a/devel/pytest b/devel/pytest index 02cdc44d..4accf873 100755 --- a/devel/pytest +++ b/devel/pytest @@ -4,4 +4,5 @@ set -euo pipefail cd "$(dirname "$0")/.." set -x -NEXTSTRAIN_RST_STRICT=1 NEXTSTRAIN_HOME=tests/data/home/ pytest "$@" +source tests/env +exec pytest "$@" diff --git a/doc/development.md b/doc/development.md index 5756c573..8d78a5b8 100644 --- a/doc/development.md +++ b/doc/development.md @@ -24,7 +24,8 @@ Now test that you can run: The development environment includes our dev tools (described below): - pytest # runs doctests as well as pyright and flake8 + pytest # runs doctests as well as cram, pyright, and flake8 + cram tests/*.cram pyright flake8 make -C doc livehtml @@ -138,7 +139,21 @@ Tests are run with [pytest](https://pytest.org). To run everything, use: ./devel/pytest -This includes the type annotation and static analysis checks described below. +This includes Cram and the type annotation and static analysis checks described +below. + +## Cram + +Tests for the command-line interface use [Cram](https://bitheap.org/cram/) and +are written in `tests/*.cram` files. You can run individual test files, e.g.: + + $ cram -v tests/version.cram + tests/version.cram: passed + # Ran 1 tests, 0 skipped, 0 failed. + +Cram tests should always start by sourcing `tests/env` so that they run +identically regardless of if they're run directly with `cram` or via +`./devel/pytest`. ## Type annotations and static analysis diff --git a/setup.py b/setup.py index 15e0506d..366b62f4 100644 --- a/setup.py +++ b/setup.py @@ -136,6 +136,7 @@ def find_namespaced_packages(namespace): extras_require = { "dev": [ + "cram >=0.7", "flake8 >=4.0.0", "nextstrain-sphinx-theme>=2022.5", "pytest; python_version != '3.9'", diff --git a/tests/cram.py b/tests/cram.py new file mode 100644 index 00000000..e19aaf22 --- /dev/null +++ b/tests/cram.py @@ -0,0 +1,14 @@ +import os +import pytest +from pathlib import Path +from subprocess import run + +testsdir = Path(__file__).resolve().parent +topdir = testsdir.parent + +@pytest.mark.skipif(os.name != "posix", reason = "cram requires a POSIX platform") +@pytest.mark.parametrize("testfile", testsdir.glob("*.cram"), ids = str) +def pytest_cram(testfile): + # Check the exit status ourselves for nicer test output on failure + result = run(["cram", testfile], cwd = topdir) + assert result.returncode == 0, "cram exited with errors" diff --git a/tests/env b/tests/env new file mode 100644 index 00000000..8366491b --- /dev/null +++ b/tests/env @@ -0,0 +1,6 @@ +#!/bin/bash +# Shebang is not strictly necessary—we only source this file—but it lets our +# GitHub Action for ShellCheck to find it properly. + +export NEXTSTRAIN_RST_STRICT=1 +export NEXTSTRAIN_HOME=tests/data/home/ diff --git a/tests/version.cram b/tests/version.cram new file mode 100644 index 00000000..01acc8ea --- /dev/null +++ b/tests/version.cram @@ -0,0 +1,9 @@ +Cram setup. + + $ source "$TESTDIR"/env + +Version. + + $ nextstrain version + nextstrain[.]cli [0-9]+[.][0-9]+[.][0-9]+\S* (re) + From 2fcb8b45c3d213aee33fd9493190be88cb7363ab Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 13 Jan 2025 12:15:37 -0800 Subject: [PATCH 04/13] version: Emit prose name ("Nextstrain CLI") rather than package name ("nextstrain.cli") The prose name is more meaningful to users than the internal package name. --- CHANGES.md | 6 ++++++ nextstrain/cli/command/version.py | 3 +-- tests/version.cram | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 49afeec6..c5d1f960 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,12 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Improvements + +* `nextstrain version` now reports itself as "Nextstrain CLI" (instead of + "nextstrain.cli"). + ([#419][]) + ## Bug fixes * Fixed a rare but possible error case in `nextstrain view` and `nextstrain diff --git a/nextstrain/cli/command/version.py b/nextstrain/cli/command/version.py index f4623a87..b65d6b2c 100644 --- a/nextstrain/cli/command/version.py +++ b/nextstrain/cli/command/version.py @@ -5,7 +5,6 @@ import sys from textwrap import indent from ..__version__ import __version__ -from .. import __package__ as __top_package__ from ..runner import all_runners, default_runner from ..util import runner_name @@ -21,7 +20,7 @@ def register_parser(subparser): def run(opts): - print(__top_package__, __version__) + print("Nextstrain CLI", __version__) if opts.verbose: print() diff --git a/tests/version.cram b/tests/version.cram index 01acc8ea..09c0aecf 100644 --- a/tests/version.cram +++ b/tests/version.cram @@ -5,5 +5,5 @@ Cram setup. Version. $ nextstrain version - nextstrain[.]cli [0-9]+[.][0-9]+[.][0-9]+\S* (re) + Nextstrain CLI [0-9]+[.][0-9]+[.][0-9]+\S* (re) From 932e705ea946d37e59723cc89ab3d19c54f792b2 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Mar 2025 12:41:46 -0800 Subject: [PATCH 05/13] version: Add prominent indication if this is a standalone version (i.e. bundles Python) This is a very useful bit of information to know, but previously it was only discernible by knowing what to look for (e.g. the Python executable path) in the --verbose output. --- CHANGES.md | 3 ++- nextstrain/cli/command/version.py | 4 ++-- tests/version.cram | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index c5d1f960..56d801da 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,7 +16,8 @@ development source code and as such may not be routinely kept up to date. ## Improvements * `nextstrain version` now reports itself as "Nextstrain CLI" (instead of - "nextstrain.cli"). + "nextstrain.cli") and indicates if it is a "standalone" installation + (self-contained, bundles Python) or not. ([#419][]) ## Bug fixes diff --git a/nextstrain/cli/command/version.py b/nextstrain/cli/command/version.py index b65d6b2c..3087787e 100644 --- a/nextstrain/cli/command/version.py +++ b/nextstrain/cli/command/version.py @@ -6,7 +6,7 @@ from textwrap import indent from ..__version__ import __version__ from ..runner import all_runners, default_runner -from ..util import runner_name +from ..util import runner_name, standalone_installation def register_parser(subparser): parser = subparser.add_parser("version", help = "Show version information") @@ -20,7 +20,7 @@ def register_parser(subparser): def run(opts): - print("Nextstrain CLI", __version__) + print("Nextstrain CLI", __version__, "(standalone)" if standalone_installation() else "") if opts.verbose: print() diff --git a/tests/version.cram b/tests/version.cram index 09c0aecf..f31c8940 100644 --- a/tests/version.cram +++ b/tests/version.cram @@ -5,5 +5,7 @@ Cram setup. Version. $ nextstrain version - Nextstrain CLI [0-9]+[.][0-9]+[.][0-9]+\S* (re) + Nextstrain CLI [0-9]+[.][0-9]+[.][0-9]+\S* (re) + $ python3 -Xnextstrain-cli-is-standalone -m nextstrain.cli version + Nextstrain CLI [0-9]+[.][0-9]+[.][0-9]+\S* \(standalone\) (re) From d7cd538536c71fd271c217a00b36d43da0e06894 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 4 Mar 2025 20:49:15 -0800 Subject: [PATCH 06/13] dev: Fix open_browser() tests to really, truly, actually not pop a browser locally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous approach of late-loading of nextstrain.cli.command.view only worked when running an explicit set of tests/*.py files—pytest's discovery process for doctests and pytest_*() functions means it loads all of nextstrain.cli too early otherwise—and most of the time a new tab or two was popped open because the environment modification was too late. This was annoying. The previous approach also predated our devel/pytest wrapper to reliably set env by insisting on a common entrypoint to tests. With that now present, the easiest and more reliable thing is to move the setting of BROWSER there before pytest is ever exec-ed. --- CHANGES.md | 3 +++ tests/env | 3 +++ tests/open_browser.py | 11 ++--------- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 56d801da..e7b3f9c0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -53,6 +53,9 @@ development source code and as such may not be routinely kept up to date. local user's personal config and data. ([#419][]) +* Running tests no longer opens a browser. \o/ + ([#419][]) + [#419]: https://github.com/nextstrain/cli/pull/419 diff --git a/tests/env b/tests/env index 8366491b..ca9f7fd7 100644 --- a/tests/env +++ b/tests/env @@ -4,3 +4,6 @@ export NEXTSTRAIN_RST_STRICT=1 export NEXTSTRAIN_HOME=tests/data/home/ + +# Must do this early, before nextstrain.cli.command.view is loaded +export BROWSER=echo diff --git a/tests/open_browser.py b/tests/open_browser.py index 8ab302f1..4645839b 100644 --- a/tests/open_browser.py +++ b/tests/open_browser.py @@ -9,6 +9,7 @@ import multiprocessing import os import pytest +from nextstrain.cli.command.view import open_browser if os.name != "posix": pytest.skip("@pytest.mark.forked requires a POSIX platform", allow_module_level = True) @@ -17,18 +18,10 @@ @pytest.mark.forked def pytest_open_browser_fork(): multiprocessing.set_start_method("fork") - _open_browser() + assert open_browser("https://nextstrain.org") @pytest.mark.forked def pytest_open_browser_spawn(): multiprocessing.set_start_method("spawn") - _open_browser() - - -def _open_browser(): - # Must do this early, before nextstrain.cli.command.view is loaded - os.environ["BROWSER"] = "echo" - - from nextstrain.cli.command.view import open_browser assert open_browser("https://nextstrain.org") From 146ce769aca8cbc6292f1240e33f39f069849920 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 4 Mar 2025 11:36:08 -0800 Subject: [PATCH 07/13] util: Disable colored() under NO_COLOR and when stdout is not a TTY Good manners and standard practice. Almost essential for writing readable Cram tests too. --- CHANGES.md | 6 ++++++ nextstrain/cli/util.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e7b3f9c0..a48ab65f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,6 +20,12 @@ development source code and as such may not be routinely kept up to date. (self-contained, bundles Python) or not. ([#419][]) +* Colorized and bolded output is disabled when stdout is not attached to a + terminal (e.g. is redirected to a file, piped to another program, etc.) or + when the [`NO_COLOR` environment variable](https://no-color.org) is set to a + non-empty value. + ([#419][]) + ## Bug fixes * Fixed a rare but possible error case in `nextstrain view` and `nextstrain diff --git a/nextstrain/cli/util.py b/nextstrain/cli/util.py index eb834dcb..f7b7769a 100644 --- a/nextstrain/cli/util.py +++ b/nextstrain/cli/util.py @@ -19,6 +19,20 @@ from .types import RunnerModule, RunnerTestResults, RunnerTestResultStatus +NO_COLOR = bool( + # + os.environ.get("NO_COLOR") + + or + + # XXX TODO: This assumes our return value is being printed to stdout. + # That's true for all callers currently but may not always be. Rather than + # fix it properly, move away from our colored() function to the rich + # library instead. + # -trs, 4 March 2025 + not sys.stdout.isatty()) + + def warn(*args): print(*args, file = sys.stderr) @@ -27,6 +41,8 @@ def colored(color, text): """ Returns a string of text suitable for colored output on a terminal. """ + if NO_COLOR: + return text # These magic numbers are standard ANSI terminal escape codes for # formatting text. From 4b29a9856f0db093ec36b6a5b67c3b850ae04d7c Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 19 Dec 2024 12:31:52 -0800 Subject: [PATCH 08/13] errors: Describe intention of UserError in docstring Recently when working on this codebase I've though it's not clear enough that "UserError" doesn't mean an error the user's committed. It means an error intended for the user to see. Describe it as such for the benefit of other developers. --- nextstrain/cli/errors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nextstrain/cli/errors.py b/nextstrain/cli/errors.py index 93f7ec34..92c7a05a 100644 --- a/nextstrain/cli/errors.py +++ b/nextstrain/cli/errors.py @@ -12,6 +12,10 @@ class InternalError(NextstrainCliError): pass class UserError(NextstrainCliError): + """ + Error intended for display to the user, e.g. an error aiming to be clear, + friendly, and, if possible, actionable. + """ def __init__(self, message, *args, **kwargs): # Remove leading newlines, trailing whitespace, and then indentation # to better support nicely-formatted """multi-line strings""". From 74589d7c62813594ba5e28589abcfd524d06d79f Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Mar 2025 12:49:40 -0800 Subject: [PATCH 09/13] argparse: Emit blank lines between arguments in --help output Improves readability, it seems to me, having spent a bit more time going thru --help recently. --- CHANGES.md | 4 ++++ nextstrain/cli/argparse.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index a48ab65f..b6805e13 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -26,6 +26,10 @@ development source code and as such may not be routinely kept up to date. non-empty value. ([#419][]) +* The readability of `--help` output is improved by the addition of blank lines + between argument/option descriptions. + ([#419][]) + ## Bug fixes * Fixed a rare but possible error case in `nextstrain view` and `nextstrain diff --git a/nextstrain/cli/argparse.py b/nextstrain/cli/argparse.py index 2bcd6c8a..92d384f6 100644 --- a/nextstrain/cli/argparse.py +++ b/nextstrain/cli/argparse.py @@ -57,6 +57,18 @@ def _split_lines(self, text, width): # Render to rST here so rST gets control over wrapping/line breaks. return rst_to_text(text, width).splitlines() + # Emit blank lines between arguments for better readability. It might seem + # simpler to override _join_parts() instead, but that's called from two + # places and we only want to join on newlines for one of those places. + def add_arguments(self, actions): + for i, action in enumerate(actions): + if i != 0: + # Use " \n" to avoid a completely empty line (e.g. "\n") for + # the sake ShowBriefHelp.truncate_help()'s heuristic. + self._add_item(str, [" \n"]) + + self.add_argument(action) + def register_default_command(parser): """ From 9067893e10354afc1d6fc15efc09d5eb669409f3 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Mar 2025 12:51:57 -0800 Subject: [PATCH 10/13] authn: Correctly handle OpenID authorization endpoints with a query part The query parts of such endpoints were not being correctly joined with additional authentication parameters. Noticed in passing. No impacted users that I know of. --- CHANGES.md | 7 +++++++ nextstrain/cli/authn/session.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index b6805e13..b96d9c67 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -46,6 +46,13 @@ development source code and as such may not be routinely kept up to date. in a browser (if any). ([#417](https://github.com/nextstrain/cli/pull/417)) +* `nextstrain login` now correctly handles OAuth 2.0 authorization endpoint + URLs (which are obtained automatically from OpenID Connect 1.0 metadata + discovery) with existing query parameters in them. This bug likely effected + approximately no one; logging into nextstrain.org was unaffected and all + known users of non-nextstrain.org remotes were also unaffected. + ([#419][]) + ## Development * Goodbye, Mypy! We now use only Pyright for type checking. Both type diff --git a/nextstrain/cli/authn/session.py b/nextstrain/cli/authn/session.py index 7c60963f..87e93d0d 100644 --- a/nextstrain/cli/authn/session.py +++ b/nextstrain/cli/authn/session.py @@ -392,7 +392,7 @@ def __enter__(self): # The endpoint URI MAY include an "application/x-www-form-urlencoded" # formatted […] query component […], which MUST be retained when # adding additional query parameters. - auth_url = auth_endpoint._replace(query = auth_endpoint.query + query(auth_params)) + auth_url = auth_endpoint._replace(query = auth_endpoint.query + (auth_endpoint.query and "&") + query(auth_params)) assert auth_url.scheme == "https" From f8798ce6cf71b9b0647e888403fbcef8169c5f44 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Mar 2025 13:01:43 -0800 Subject: [PATCH 11/13] runner.{docker,singularity}: Use mount_point() in more places that hardcoded /nextstrain Brings this older code up to consistency with newer code and further centralizes mount_point() as the logic for where named volumes end up. --- nextstrain/cli/runner/docker.py | 2 +- nextstrain/cli/runner/singularity.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextstrain/cli/runner/docker.py b/nextstrain/cli/runner/docker.py index 76068ffa..aecd30e4 100644 --- a/nextstrain/cli/runner/docker.py +++ b/nextstrain/cli/runner/docker.py @@ -210,7 +210,7 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None if v.src is not None], # Change the default working directory if requested - *(["--workdir=/nextstrain/%s" % working_volume.name] if working_volume else []), + *(["--workdir=%s" % mount_point(working_volume)] if working_volume else []), # Pass thru any extra environment variables provided by us (not via an env.d) *["--env=%s" % name for name, value in extra_env.items() if value is not None], diff --git a/nextstrain/cli/runner/singularity.py b/nextstrain/cli/runner/singularity.py index 8f8a352d..ab67753c 100644 --- a/nextstrain/cli/runner/singularity.py +++ b/nextstrain/cli/runner/singularity.py @@ -266,7 +266,7 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None if v.src is not None), # Change the default working directory if requested - *(("--pwd", "/nextstrain/%s" % working_volume.name) if working_volume else ()), + *(("--pwd", str(docker.mount_point(working_volume))) if working_volume else ()), str(image_path(image)), *argv, From 44df1d4d4d0e561dfaacd7cda054e1a54bdacc56 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Mar 2025 20:14:07 -0800 Subject: [PATCH 12/13] =?UTF-8?q?runner.aws=5Fbatch:=20Restore=20times=20o?= =?UTF-8?q?n=20actually-extracted=20path=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …not our presumed extraction path. This is a nuanced change: normally the two paths are identical, but zipfile.extract() does sometimes munge extraction paths for security or platform compatibility and thus make the two paths different. --- CHANGES.md | 6 ++++++ nextstrain/cli/runner/aws_batch/s3.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index b96d9c67..e1e17277 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -53,6 +53,12 @@ development source code and as such may not be routinely kept up to date. known users of non-nextstrain.org remotes were also unaffected. ([#419][]) +* File timestamps for results files downloaded from an AWS Batch build are now + correctly restored even if the expected extraction path differs from the + actual extraction path due to ZIP security precautions. This bug likely + affected approximately no one. + ([#419][]) + ## Development * Goodbye, Mypy! We now use only Pyright for type checking. Both type diff --git a/nextstrain/cli/runner/aws_batch/s3.py b/nextstrain/cli/runner/aws_batch/s3.py index 7d07fb9f..663576e4 100644 --- a/nextstrain/cli/runner/aws_batch/s3.py +++ b/nextstrain/cli/runner/aws_batch/s3.py @@ -152,13 +152,13 @@ def download_workdir(remote_workdir: S3Object, workdir: Path, patterns: List[str if member.CRC != crc32(workdir / path): print("unzipping:", workdir / path) - zipfile.extract(member, str(workdir)) + extracted = zipfile.extract(member, str(workdir)) # Update atime and mtime from the zip member; it's a # bit boggling that .extract() doesn't handle this, # even optionally. mtime = zipinfo_mtime(member) - utime(str(workdir / path), (mtime, mtime)) + utime(extracted, (mtime, mtime)) def walk(path: Path, excluded: PathMatcher = lambda x: False) -> Generator[Path, None, None]: From b3338423a3acf9f5d1b2708fc3d7e6e1b44c2600 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Mar 2025 20:13:37 -0800 Subject: [PATCH 13/13] runner.aws_batch: Support overlay volumes (e.g. --augur) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The latest image, nextstrain/base:build-20250304T041009Z, provides a mechanism in the entrypoint to support bundling of overlays in the workdir ZIP archive by way of upwards-traversing archive member paths.¹ For example, an Augur overlay is bundled into the workdir ZIP archive with member paths starting with ../augur/ and ends up overwriting files in the image's /nextstrain/augur/ since the AWS Batch workdir is always /nextstrain/build/. Extending overlay support to AWS Batch has been very low priority and something I thought was unlikely to ever happen. However, in the course of working on AWS Batch support for `nextstrain run`, it turned out to be easiest/most straightforward/most minimal changes to bundle the pathogen source directory with the working analysis directory in the workdir ZIP archive, i.e. as a "pathogen" overlay. This naturally led to supporting overlays more generally, which I've done here. One caveat compared to overlays in runtimes with the concept of volume mounts (Docker, Singularity) is that any files in the image that do not exist in the overlaid files will remain present since nothing removes them. This is potentially problematic and will be annoying if run into but most of the time should be a non-issue. It is also solvable if we care to exert the effort and extra code to do so. I don't right now. ¹ --- CHANGES.md | 10 ++++ nextstrain/cli/command/build.py | 29 ++++++++++-- nextstrain/cli/runner/aws_batch/__init__.py | 12 ++++- nextstrain/cli/runner/aws_batch/s3.py | 52 ++++++++++++++++----- nextstrain/cli/runner/docker.py | 4 ++ nextstrain/cli/util.py | 8 ++-- 6 files changed, 94 insertions(+), 21 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e1e17277..2cf50758 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -30,6 +30,16 @@ development source code and as such may not be routinely kept up to date. between argument/option descriptions. ([#419][]) +* AWS Batch builds now support development overlays such as [`--augur`][] and + [`--auspice`][]. To use this functionality, you'll need at least + `nextstrain/base:build-20250304T041009Z` or newer of the Nextstrain Docker + runtime image. Compatibility of the runtime image is checked automatically + when overlays are used with AWS Batch. + ([#419][]) + +[`--augur`]: https://docs.nextstrain.org/projects/cli/en/__NEXT__/commands/build/#cmdoption-nextstrain-build-augur +[`--auspice`]: https://docs.nextstrain.org/projects/cli/en/__NEXT__/commands/build/#cmdoption-nextstrain-build-auspice + ## Bug fixes * Fixed a rare but possible error case in `nextstrain view` and `nextstrain diff --git a/nextstrain/cli/command/build.py b/nextstrain/cli/command/build.py index f19c46e3..83d5d231 100644 --- a/nextstrain/cli/command/build.py +++ b/nextstrain/cli/command/build.py @@ -24,8 +24,8 @@ from ..argparse import add_extended_help_flags, AppendOverwriteDefault, SKIP_AUTO_DEFAULT_IN_HELP from ..debug import debug from ..errors import UsageError, UserError -from ..runner import docker, singularity -from ..util import byte_quantity, runner_name, warn +from ..runner import docker, singularity, aws_batch +from ..util import byte_quantity, runner_name, split_image_name, warn from ..volume import NamedVolume @@ -306,10 +306,31 @@ def assert_overlay_volumes_support(opts): """ overlay_volumes = opts.volumes - if overlay_volumes and opts.__runner__ not in {docker, singularity}: + if not overlay_volumes: + return + + if opts.__runner__ not in {docker, singularity, aws_batch}: raise UserError(f""" The {runner_name(opts.__runner__)} runtime does not support overlays (e.g. of {overlay_volumes[0].name}). - Use the Docker or Singularity runtimes (via --docker or --singularity) if overlays are necessary. + Use the Docker, Singularity, or AWS Batch runtimes (via --docker, + --singularity, or --aws-batch) if overlays are necessary. + """) + + if opts.__runner__ is aws_batch and not docker.image_supports(docker.IMAGE_FEATURE.aws_batch_overlays, opts.image): + raise UserError(f""" + The Nextstrain runtime image version in use + + {opts.image} + + is too old to support overlays (e.g. of {overlay_volumes[0].name}) with AWS Batch. + + If overlays are necessary, please update the runtime image to at + least version + + {split_image_name(opts.image)[0]}:{docker.IMAGE_FEATURE.aws_batch_overlays.value} + + using `nextstrain update docker`. Alternatively, use the Docker or + Singularity runtime (via --docker or --singularity) instead. """) diff --git a/nextstrain/cli/runner/aws_batch/__init__.py b/nextstrain/cli/runner/aws_batch/__init__.py index 9d8219ef..9629743b 100644 --- a/nextstrain/cli/runner/aws_batch/__init__.py +++ b/nextstrain/cli/runner/aws_batch/__init__.py @@ -164,8 +164,15 @@ def register_arguments(parser) -> None: def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None, memory: int = None) -> int: + docker.assert_volumes_exist(opts.volumes) + + # "build" is a special-cased volume for AWS Batch, as /nextstrain/build is + # the fixed initial working directory and what we'll populate by extracting + # a ZIP file. build_volume = next((v for v in opts.volumes if v and v.name == "build"), None) + opts.volumes = [v for v in opts.volumes if v is not build_volume] + # Unlike other runners, the AWS Batch runner currently *requires* a working # dir in most usages. This is ok as we only provide the AWS Batch runner # for commands which also require a working dir (e.g. build), whereas other @@ -213,8 +220,11 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None # Upload workdir to S3 so it can be fetched at the start of the Batch job. print_stage("Uploading %s to S3" % local_workdir) + for volume in opts.volumes: + print(" and %s as %s" % (volume.src.resolve(strict = True), volume.name)) + bucket = s3.bucket(opts.s3_bucket) - remote_workdir = s3.upload_workdir(local_workdir, bucket, run_id, opts.exclude_from_upload) + remote_workdir = s3.upload_workdir(local_workdir, bucket, run_id, opts.exclude_from_upload, opts.volumes) print("uploaded:", s3.object_url(remote_workdir)) diff --git a/nextstrain/cli/runner/aws_batch/s3.py b/nextstrain/cli/runner/aws_batch/s3.py index 663576e4..6f7c54b4 100644 --- a/nextstrain/cli/runner/aws_batch/s3.py +++ b/nextstrain/cli/runner/aws_batch/s3.py @@ -5,21 +5,24 @@ import binascii import boto3 import fsspec +import os.path from botocore.config import Config from botocore.exceptions import ClientError from calendar import timegm from os import utime -from pathlib import Path +from pathlib import Path, PurePath from time import struct_time -from typing import Callable, Generator, Iterable, List, Optional, Any +from typing import Callable, Generator, Iterable, List, Optional, Any, Union from urllib.parse import urlparse from zipfile import ZipFile, ZipInfo from ... import env +from ...debug import DEBUGGING from ...types import Env, S3Bucket, S3Object from ...util import glob_matcher +from ...volume import NamedVolume -PathMatcher = Callable[[Path], bool] +PathMatcher = Callable[[Union[Path, PurePath]], bool] def object_url(object: S3Object) -> str: @@ -38,10 +41,10 @@ def object_from_url(s3url: str) -> S3Object: return bucket(url.netloc).Object(key) -def upload_workdir(workdir: Path, bucket: S3Bucket, run_id: str, patterns: List[str] = None) -> S3Object: +def upload_workdir(workdir: Path, bucket: S3Bucket, run_id: str, patterns: List[str] = None, volumes: List[NamedVolume] = []) -> S3Object: """ - Upload a ZIP archive of the local *workdir* to the remote S3 *bucket* for - the given *run_id*. + Upload a ZIP archive of the local *workdir* (and optional *volumes*) to the + remote S3 *bucket* for the given *run_id*. An optional list of *patterns* (shell-style advanced globs) can be passed to selectively exclude part of the local *workdir* from being uploaded. @@ -80,8 +83,23 @@ def upload_workdir(workdir: Path, bucket: S3Bucket, run_id: str, patterns: List[ with fsspec.open(object_url(remote_workdir), "wb", auto_mkdir = False) as remote_file: with ZipFile(remote_file, "w") as zipfile: for path in walk(workdir, excluded): - print("zipping:", path) - zipfile.write(str(path), str(path.relative_to(workdir))) + dst = path.relative_to(workdir) + print(f"zipping: {path}" + (f" (as {dst})" if DEBUGGING else "")) + zipfile.write(str(path), dst) + + for volume in volumes: + # XXX TODO: Use the "walk_up" argument to Path.relative_to() + # once we require Python 3.12. + # -trs, 10 Feb 2025 + try: + prefix = PurePath(volume.name).relative_to("build") + except ValueError: + prefix = PurePath("..", volume.name) + + for path in walk(volume.src, always_excluded): + dst = prefix / path.relative_to(volume.src) + print(f"zipping: {path}" + (f" (as {dst})" if DEBUGGING else "")) + zipfile.write(str(path), dst) return remote_workdir @@ -138,9 +156,19 @@ def download_workdir(remote_workdir: S3Object, workdir: Path, patterns: List[str # …and extract its contents to the workdir. with ZipFile(remote_file) as zipfile: - for member in zipfile.infolist(): - path = Path(member.filename) - + # Completely ignore archive members with unsafe paths (absolute or + # upwards-traversing) instead of relying on zipfile.extract()'s + # default of munging them to be "safe". Munging seems more + # confusing than skipping, and skipping is essential in the case of + # additional volumes being uploaded in the workdir initially. + safe_members = [ + (filename, member) + for filename, member + in ((PurePath(m.filename), m) for m in zipfile.infolist()) + if not filename.is_absolute() + and os.path.pardir not in filename.parts ] + + for path, member in safe_members: # Inclusions negate exclusions but aren't an exhaustive # list of what is included. if selected(path) and (included(path) or not excluded(path)): @@ -179,7 +207,7 @@ def path_matcher(patterns: Iterable[str]) -> PathMatcher: Generate a function which matches a Path object against the list of glob *patterns*. """ - def matches(path: Path) -> bool: + def matches(path: Union[Path, PurePath]) -> bool: return any(map(path.match, patterns)) return matches diff --git a/nextstrain/cli/runner/docker.py b/nextstrain/cli/runner/docker.py index aecd30e4..08a851fe 100644 --- a/nextstrain/cli/runner/docker.py +++ b/nextstrain/cli/runner/docker.py @@ -112,6 +112,10 @@ class IMAGE_FEATURE(Enum): # /nextstrain/env.d support first present. envd = "build-20230613T204512Z" + # AWS Batch: support for volume overlays (i.e. ../ in archive members and + # file overwriting) in ZIP extraction. + aws_batch_overlays = "build-20250304T041009Z" + def register_arguments(parser) -> None: # Docker development options diff --git a/nextstrain/cli/util.py b/nextstrain/cli/util.py index f7b7769a..3161dd02 100644 --- a/nextstrain/cli/util.py +++ b/nextstrain/cli/util.py @@ -9,7 +9,7 @@ from importlib.metadata import distribution as distribution_info, PackageNotFoundError from typing import Any, Callable, Iterable, Literal, Mapping, List, Optional, Sequence, Tuple, Union, overload from packaging.version import parse as parse_version -from pathlib import Path +from pathlib import Path, PurePath from shlex import quote as shquote from shutil import which from textwrap import dedent, indent @@ -553,7 +553,7 @@ def split_image_name(name: str, implicit_latest: bool = True) -> Tuple[str, Opti return (repository, tag) -def glob_matcher(patterns: Sequence[str], *, root: Path = None) -> Callable[[Union[str, Path]], bool]: +def glob_matcher(patterns: Sequence[str], *, root: Path = None) -> Callable[[Union[str, Path, PurePath]], bool]: """ Generate a function which matches a string or path-like object against the list of Bash-like glob *patterns*. @@ -563,13 +563,13 @@ def glob_matcher(patterns: Sequence[str], *, root: Path = None) -> Callable[[Uni See :func:`glob_match` for supported pattern features. """ - def matcher(path: Union[str, Path]) -> bool: + def matcher(path: Union[str, Path, PurePath]) -> bool: return glob_match(path, patterns, root = root) return matcher -def glob_match(path: Union[str, Path], patterns: Union[str, Sequence[str]], *, root: Path = None) -> bool: +def glob_match(path: Union[str, Path, PurePath], patterns: Union[str, Sequence[str]], *, root: Path = None) -> bool: """ Test if *path* matches any of the glob *patterns*.