Skip to content

Commit

Permalink
feat: provide explicit feedback for new MacOS rsync/openrsync issue
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 13, 2025
1 parent 3de0a5c commit d757961
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ Reading a sequence repository requires several Python packages, all of which are
available from pypi. Installation should be as simple as `pip install
biocommons.seqrepo`.

Acquiring SeqRepo snapshots using the CLI requires an [rsync](https://github.com/RsyncProject/rsync) binary to be available on the system $PATH. Note that [openrsync](https://www.openrsync.org/), which now ships with new MacOS installs, does not support all required functions. Mac users should install rsync from [HomeBrew](https://formulae.brew.sh/formula/rsync) and ensure that it's available on the $PATH.

*Writing* sequence files also requires `bgzip`, which provided in the
[htslib](https://github.com/samtools/htslib) repo. Ubuntu users should install
the `tabix` package with `sudo apt install tabix`.
Expand Down
35 changes: 33 additions & 2 deletions src/biocommons/seqrepo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,28 @@
_logger = logging.getLogger(__name__)


class RsyncExeError(Exception):
"""Raise for issues relating to rsync executable."""


def _raise_for_rsync_binary_error(e: subprocess.CalledProcessError, opts: argparse.Namespace):
"""Check if error appears to be rooted in a known problem with recent (as of 2024)
MacOS distributions, which ship with an incompatible rsync clone.
Raise a custom error type if so. Do nothing otherwise. Calling contexts should
presumably raise the original error in that case.
:param e: originating subprocess error
:param opts: CLI args given
:raise RsyncExeError: if issue appears to be related to openrsync
"""
if e.returncode == 1:
result = subprocess.check_output([opts.rsync_exe, "--version"])
if result is not None and ("openrsync" in result.decode()):
msg = f"Binary located at {opts.rsync_exe} appears to be an `openrsync` instance, but the SeqRepo CLI requires `rsync` (NOT `openrsync`). Please install `rsync` and either make it available on the $PATH variable or manually provide its location with the `--rsync-exe` option. See README for more information." # noqa: E501
raise RsyncExeError(msg)


def _get_remote_instances(opts: argparse.Namespace) -> list[str]:
line_re = re.compile(r"d[-rwx]{9}\s+[\d,]+ \d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} (.+)")
rsync_cmd = [
Expand All @@ -58,7 +80,12 @@ def _get_remote_instances(opts: argparse.Namespace) -> list[str]:
opts.remote_host + "::seqrepo",
]
_logger.debug("Executing `" + " ".join(rsync_cmd) + "`")
lines = subprocess.check_output(rsync_cmd).decode().splitlines()[1:]
try:
result = subprocess.check_output(rsync_cmd)
except subprocess.CalledProcessError as e:
_raise_for_rsync_binary_error(e, opts)
raise
lines = result.decode().splitlines()[1:]
dirs = (m.group(1) for m in (line_re.match(line) for line in lines) if m)
return sorted(list(filter(instance_name_new_re.match, dirs)))

Expand Down Expand Up @@ -571,7 +598,11 @@ def pull(opts: argparse.Namespace) -> None:

_logger.debug("Executing: " + " ".join(cmd))
if not opts.dry_run:
subprocess.check_call(cmd)
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
_raise_for_rsync_binary_error(e, opts)
raise
dst_dir = os.path.join(opts.root_directory, instance_name)
os.rename(tmp_dir, dst_dir)
_logger.info("{}: successfully updated ({})".format(instance_name, dst_dir))
Expand Down

0 comments on commit d757961

Please sign in to comment.