diff --git a/python/hdfs_native/cli.py b/python/hdfs_native/cli.py new file mode 100644 index 0000000..5c9a6d8 --- /dev/null +++ b/python/hdfs_native/cli.py @@ -0,0 +1,90 @@ +import os +from argparse import ArgumentParser, Namespace +from typing import Optional, Sequence +from urllib.parse import urlparse + +from hdfs_native import Client + + +def _client_for_url(url: str) -> Client: + parsed = urlparse(url) + + if parsed.scheme: + connection_url = f"{parsed.scheme}://{parsed.hostname}" + if parsed.port: + connection_url += f":{parsed.port}" + return Client(connection_url) + elif parsed.hostname or parsed.port: + raise ValueError( + f"Cannot provide host or port without scheme: {parsed.hostname}" + ) + else: + return Client() + + +def _verify_nameservices_match(url: str, *urls: str) -> None: + first = urlparse(url) + + for url in urls: + parsed = urlparse(url) + if first.scheme != parsed.scheme or first.hostname != parsed.hostname: + raise ValueError( + f"Protocol and host must match: {first.scheme}://{first.hostname} != {parsed.scheme}://{parsed.hostname}" + ) + + +def _path_for_url(url: str) -> str: + return urlparse(url).path + + +def mv(args: Namespace): + _verify_nameservices_match(args.dst, *args.src) + + client = _client_for_url(args.dst) + dst_path = _path_for_url(args.dst) + + dst_isdir = False + try: + dst_isdir = client.get_file_info(dst_path).isdir + except FileNotFoundError: + pass + + if len(args.src) > 1 and not dst_isdir: + raise ValueError( + "destination must be a directory if multiple sources are provided" + ) + + for src in args.src: + src_path = _path_for_url(src) + if dst_isdir: + target_path = os.path.join(dst_path, os.path.basename(src_path)) + else: + target_path = dst_path + + client.rename(src_path, target_path) + + +def main(in_args: Optional[Sequence[str]] = None): + parser = ArgumentParser( + description="""Command line utility for interacting with HDFS using hdfs-native. + Globs are not currently supported, all file paths are treated as exact paths.""" + ) + + subparsers = parser.add_subparsers(title="Subcommands", required=True) + + mv_parser = subparsers.add_parser( + "mv", + help="Move files or directories", + description="""Move a file or directory from to . Must be part of the same name service. + If multiple src are provided, dst must be a directory""", + ) + mv_parser.add_argument("src", nargs="+", help="Files or directories to move") + mv_parser.add_argument("dst", help="Target destination of file or directory") + mv_parser.set_defaults(func=mv) + + args = parser.parse_args(in_args) + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/python/hdfs_native/py.typed b/python/hdfs_native/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python/pyproject.toml b/python/pyproject.toml index b00e4c4..2bb5aba 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -29,6 +29,9 @@ devel = [ [project.urls] repository = "https://github.com/Kimahriman/hdfs-native" +[project.scripts] +hdfsn = "hdfs_native.cli:main" + [project.entry-points."fsspec.specs"] hdfs = "hdfs_native.fsspec.HdfsFileSystem" viewfs = "hdfs_native.fsspec.HdfsFileSystem" diff --git a/python/tests/test_cli.py b/python/tests/test_cli.py new file mode 100644 index 0000000..ee5c5fb --- /dev/null +++ b/python/tests/test_cli.py @@ -0,0 +1,42 @@ +import pytest + +from hdfs_native import Client +from hdfs_native.cli import main as cli_main + + +def test_cli(minidfs: str): + client = Client(minidfs) + + def qualify(path: str) -> str: + return f"{minidfs}{path}" + + # mv + client.create("/testfile").close() + client.mkdirs("/testdir") + + cli_main(["mv", qualify("/testfile"), qualify("/testfile2")]) + + client.get_file_info("/testfile2") + + with pytest.raises(ValueError): + cli_main(["mv", qualify("/testfile2"), "hdfs://badnameservice/testfile"]) + + with pytest.raises(RuntimeError): + cli_main(["mv", qualify("/testfile2"), qualify("/nonexistent/testfile")]) + + cli_main(["mv", qualify("/testfile2"), qualify("/testdir")]) + + client.get_file_info("/testdir/testfile2") + + client.rename("/testdir/testfile2", "/testfile1") + client.create("/testfile2").close() + + with pytest.raises(ValueError): + cli_main( + ["mv", qualify("/testfile1"), qualify("/testfile2"), qualify("/testfile3")] + ) + + cli_main(["mv", qualify("/testfile1"), qualify("/testfile2"), qualify("/testdir/")]) + + client.get_file_info("/testdir/testfile1") + client.get_file_info("/testdir/testfile2")