diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index d83500d..4139cd0 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -56,8 +56,8 @@ jobs: run: | source .venv/bin/activate mypy - isort . --check - black . --check + ruff check . + ruff format . --check --diff - name: Run tests run: | diff --git a/python/conftest.py b/python/conftest.py index 03d4783..c4d150c 100644 --- a/python/conftest.py +++ b/python/conftest.py @@ -5,6 +5,7 @@ import fsspec import pytest +from hdfs_native import Client from hdfs_native.fsspec import HdfsFileSystem @@ -35,10 +36,15 @@ def minidfs(): try: child.communicate(input="\n", timeout=30) - except: + except: # noqa: E722 child.kill() +@pytest.fixture(scope="module") +def client(minidfs: str) -> Client: + return Client(minidfs) + + @pytest.fixture(scope="module") def fs(minidfs: str) -> HdfsFileSystem: url = urllib.parse.urlparse(minidfs) diff --git a/python/hdfs_native/__init__.py b/python/hdfs_native/__init__.py index 90b3cfc..d64d40e 100644 --- a/python/hdfs_native/__init__.py +++ b/python/hdfs_native/__init__.py @@ -1,15 +1,17 @@ import io import os -from typing import Dict, Iterator, Optional +from typing import TYPE_CHECKING, Dict, Iterator, Optional # For some reason mypy doesn't think this exists from typing_extensions import Buffer # type: ignore -from ._internal import * +from ._internal import RawClient, WriteOptions +if TYPE_CHECKING: + from ._internal import ContentSummary, FileStatus, RawFileReader, RawFileWriter -class FileReader(io.RawIOBase): +class FileReader(io.RawIOBase): def __init__(self, inner: "RawFileReader"): self.inner = inner @@ -64,7 +66,6 @@ def close(self) -> None: class FileWriter(io.RawIOBase): - def __init__(self, inner: "RawFileWriter"): self.inner = inner @@ -87,7 +88,6 @@ def __exit__(self, *_args): class Client: - def __init__( self, url: Optional[str] = None, diff --git a/python/hdfs_native/_internal.pyi b/python/hdfs_native/_internal.pyi index 6147da7..d013466 100644 --- a/python/hdfs_native/_internal.pyi +++ b/python/hdfs_native/_internal.pyi @@ -30,6 +30,15 @@ class WriteOptions: overwrite: bool create_parent: bool + def __init__( + self, + block_size: Optional[int] = None, + replication: Optional[int] = None, + permission: Optional[int] = None, + overwrite: Optional[bool] = None, + create_parent: Optional[bool] = None, + ): ... + class RawFileReader: def file_length(self) -> int: """Returns the size of the file""" diff --git a/python/pyproject.toml b/python/pyproject.toml index de78ccc..450b62e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -20,11 +20,9 @@ dependencies = [ [project.optional-dependencies] devel = [ - "mypy~=1.8.0", - "ruff~=0.4.8", - "pytest~=7.4", - "black~=24.4", - "isort~=5.13" + "mypy~=1.13.0", + "ruff~=0.7.2", + "pytest~=8.3", ] [project.urls] @@ -53,3 +51,6 @@ testpaths = [ "tests", "hdfs_native", ] + +[tool.ruff.lint] +extend-select = ["I"] diff --git a/python/src/lib.rs b/python/src/lib.rs index e71c4a8..eb8d779 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -50,6 +50,25 @@ impl From for PyFileStatus { } } +#[pymethods] +impl PyFileStatus { + /// Return a dataclass-esque format for the repr + fn __repr__(&self) -> String { + format!("FileStatus(path='{}', length={}, isdir={}, permission={}, owner={}, group={}, modification_time={}, access_time={}, replication={}, blocksize={})", + self.path, + self.length, + self.isdir, + self.permission, + self.owner, + self.group, + self.modification_time, + self.access_time, + self.replication.map(|r| r.to_string()).unwrap_or("None".to_string()), + self.blocksize.map(|r| r.to_string()).unwrap_or("None".to_string()) + ) + } +} + #[pyclass(name = "FileStatusIter")] struct PyFileStatusIter { inner: ListStatusIterator, @@ -96,6 +115,21 @@ impl From for PyContentSummary { } } +#[pymethods] +impl PyContentSummary { + /// Return a dataclass-esque format for the repr + fn __repr__(&self) -> String { + format!("ContentSummary(length={}, file_count={}, directory_count={}, quota={}, space_consumed={}, space_quota={})", + self.length, + self.file_count, + self.directory_count, + self.quota, + self.space_consumed, + self.space_quota, + ) + } +} + #[pyclass] struct RawFileReader { inner: FileReader, @@ -173,9 +207,42 @@ impl From for PyWriteOptions { #[pymethods] impl PyWriteOptions { #[new] - #[pyo3(signature = ())] - pub fn new() -> Self { - Self::from(WriteOptions::default()) + pub fn new( + block_size: Option, + replication: Option, + permission: Option, + overwrite: Option, + create_parent: Option, + ) -> Self { + let mut write_options = WriteOptions::default(); + if let Some(block_size) = block_size { + write_options = write_options.block_size(block_size); + } + if let Some(replication) = replication { + write_options = write_options.replication(replication); + } + if let Some(permission) = permission { + write_options = write_options.permission(permission); + } + if let Some(overwrite) = overwrite { + write_options = write_options.overwrite(overwrite); + } + if let Some(create_parent) = create_parent { + write_options = write_options.create_parent(create_parent); + } + + PyWriteOptions::from(write_options) + } + + /// Return a dataclass-esque format for the repr + fn __repr__(&self) -> String { + format!("WriteOptions(block_size={}, replication={}, permission={}, overwrite={}, create_parent={})", + self.block_size.map(|x| x.to_string()).unwrap_or("None".to_string()), + self.replication.map(|x| x.to_string()).unwrap_or("None".to_string()), + self.permission, + self.overwrite, + self.create_parent + ) } } diff --git a/python/tests/test_fsspec.py b/python/tests/test_fsspec.py index 724ca7f..f1ec501 100644 --- a/python/tests/test_fsspec.py +++ b/python/tests/test_fsspec.py @@ -10,22 +10,16 @@ def test_dirs(fs: HdfsFileSystem): fs.mkdir("/testdir") assert fs.info("/testdir")["type"] == "directory" - try: + with pytest.raises(FileExistsError): fs.makedirs("/testdir", exist_ok=False) - assert False, '"/testdir" already exists, should fail' - except: - pass fs.makedirs("/testdir", exist_ok=True) fs.mkdir("/testdir/nested/dir") assert fs.info("/testdir/nested/dir")["type"] == "directory" - try: + with pytest.raises(RuntimeError): fs.mkdir("/testdir/nested2/dir", create_parents=False) - assert False, "Should fail to make dir because parent doesn't exist" - except: - pass with pytest.raises(RuntimeError): fs.rm("/testdir", recursive=False) diff --git a/python/tests/test_integration.py b/python/tests/test_integration.py index 99b0c17..38af4a0 100644 --- a/python/tests/test_integration.py +++ b/python/tests/test_integration.py @@ -3,9 +3,8 @@ from hdfs_native import Client, WriteOptions -def test_integration(minidfs: str): - client = Client(minidfs) - client.create("/testfile", WriteOptions()).close() +def test_integration(client: Client): + client.create("/testfile").close() file_info = client.get_file_info("/testfile") assert file_info.path == "/testfile" @@ -25,7 +24,7 @@ def test_integration(minidfs: str): file_list = list(client.list_status("/", False)) assert len(file_list) == 0 - with client.create("/testfile", WriteOptions()) as file: + with client.create("/testfile") as file: data = io.BytesIO() for i in range(0, 32 * 1024 * 1024): @@ -101,3 +100,18 @@ def test_integration(minidfs: str): assert content_summary.length == 33 * 1024 * 1024 * 4 client.delete("/testfile", False) + + +def test_write_options(client: Client): + with client.create("/testfile") as file: + file.write(b"abcd") + + client.create( + "/testfile", + WriteOptions(overwrite=True, permission=0o700, block_size=1024 * 1024), + ).close() + + file_info = client.get_file_info("/testfile") + assert file_info.length == 0 + assert file_info.permission == 0o700 + assert file_info.blocksize == 1024 * 1024