From c2ac57b48e54a5fc0dc8478838ac07d63f5654b8 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 28 Nov 2024 07:33:49 +1100 Subject: [PATCH 01/45] chore(deps): bump zarrs to 0.18.0 No relevant API changes since 0.18.0-beta.0 --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5b08288..081bce8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1313,9 +1313,9 @@ dependencies = [ [[package]] name = "zarrs" -version = "0.18.0-beta.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf2268e57ea21ebb30abee0f5707a73d59b3ee822d8cd92dffd4230286ea6af" +checksum = "a2957fbdc365a192c71fe61ab2e759d4e77fbad598e032102b87b4188c32369d" dependencies = [ "blosc-src", "bytemuck", diff --git a/Cargo.toml b/Cargo.toml index 517874f..eb01440 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] pyo3 = { version = "0.22.6", features = ["abi3-py311"] } -zarrs = "0.18.0-beta.0" +zarrs = "0.18.0" rayon_iter_concurrent_limit = "0.2.0" rayon = "1.10.0" # fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path From 4074328d7c0bc8866a8edeeadcc90ed860931ac9 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 28 Nov 2024 09:51:09 +1100 Subject: [PATCH 02/45] feat: add HTTP store support --- Cargo.lock | 1291 +++++++++++++++++++++++++++++- Cargo.toml | 3 + python/zarrs/utils.py | 16 +- src/chunk_item.rs | 12 +- src/codec_pipeline_store_http.rs | 38 + src/lib.rs | 20 +- src/runtime.rs | 18 + tests/test_zarrs_http.py | 28 + 8 files changed, 1411 insertions(+), 15 deletions(-) create mode 100644 src/codec_pipeline_store_http.rs create mode 100644 src/runtime.rs create mode 100644 tests/test_zarrs_http.py diff --git a/Cargo.lock b/Cargo.lock index 081bce8..086a52e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + [[package]] name = "adler2" version = "2.0.0" @@ -14,24 +23,91 @@ version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5289ec98f68f28dd809fd601059e6aa908bb8f6108620930828283d4ee23d7" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "blosc-src" version = "0.3.4" @@ -71,6 +147,12 @@ dependencies = [ "syn", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.8.0" @@ -94,6 +176,30 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "crc32c" version = "0.6.8" @@ -152,6 +258,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "derive_more" version = "1.0.0" @@ -173,6 +289,27 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.13.0" @@ -185,6 +322,18 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "fastrand" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" + +[[package]] +name = "flagset" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" + [[package]] name = "flate2" version = "1.0.35" @@ -195,6 +344,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.3" @@ -216,6 +371,114 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -223,8 +486,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", ] [[package]] @@ -255,6 +538,270 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" + +[[package]] +name = "hyper" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.6.0" @@ -277,6 +824,12 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + [[package]] name = "itertools" version = "0.13.0" @@ -337,6 +890,12 @@ dependencies = [ "cc", ] +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "lock_api" version = "0.4.12" @@ -388,6 +947,16 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.4" @@ -403,6 +972,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "miniz_oxide" version = "0.8.0" @@ -412,6 +987,18 @@ dependencies = [ "adler2", ] +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi", + "libc", + "wasi", + "windows-sys 0.52.0", +] + [[package]] name = "moka" version = "0.12.8" @@ -554,7 +1141,16 @@ dependencies = [ "num-integer", "num-traits", "pyo3", - "rustc-hash", + "rustc-hash 1.1.0", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", ] [[package]] @@ -563,6 +1159,34 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "opendal" +version = "0.50.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" +dependencies = [ + "anyhow", + "async-trait", + "backon", + "base64", + "bytes", + "chrono", + "flagset", + "futures", + "getrandom", + "http", + "log", + "md-5", + "once_cell", + "percent-encoding", + "quick-xml", + "reqwest", + "serde", + "serde_json", + "tokio", + "uuid", +] + [[package]] name = "openssl" version = "0.10.68" @@ -650,6 +1274,24 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d61c5ce1153ab5b689d0c074c4e7fc613e942dfb7dd9eea5ab202d2ad91fe361" +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.31" @@ -671,6 +1313,15 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro2" version = "1.0.89" @@ -786,6 +1437,68 @@ dependencies = [ "winapi", ] +[[package]] +name = "quick-xml" +version = "0.36.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7649a7b4df05aed9ea7ec6f628c67c9953a43869b8bc50929569b2999d443fe" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quinn" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.0.0", + "rustls", + "socket2", + "thiserror 2.0.3", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +dependencies = [ + "bytes", + "getrandom", + "rand", + "ring", + "rustc-hash 2.0.0", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.3", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "quote" version = "1.0.37" @@ -795,6 +1508,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "raw-cpuid" version = "11.2.0" @@ -848,12 +1591,83 @@ dependencies = [ "bitflags", ] +[[package]] +name = "reqwest" +version = "0.12.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pemfile", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "webpki-roots", + "windows-registry", +] + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc-hash" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustc_version" version = "0.4.1" @@ -863,6 +1677,49 @@ dependencies = [ "semver", ] +[[package]] +name = "rustls" +version = "0.23.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" +dependencies = [ + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "ryu" version = "1.0.18" @@ -943,12 +1800,33 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -965,6 +1843,34 @@ dependencies = [ "link-cplusplus", ] +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "2.0.87" @@ -976,6 +1882,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tagptr" version = "0.2.0" @@ -1038,6 +1964,70 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "windows-sys 0.52.0", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls", + "rustls-pki-types", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.19" @@ -1072,12 +2062,49 @@ dependencies = [ "winnow", ] +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +dependencies = [ + "once_cell", +] + [[package]] name = "triomphe" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicode-ident" version = "1.0.13" @@ -1102,6 +2129,35 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbbe93dca7f1a429c2bf4164923dce6921bfba92aa2bbffc5e16f79eab19bcd6" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.11.0" @@ -1109,6 +2165,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", + "serde", ] [[package]] @@ -1117,6 +2174,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -1127,6 +2190,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1159,6 +2231,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.95" @@ -1188,6 +2272,19 @@ version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.72" @@ -1198,6 +2295,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1220,7 +2336,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -1229,6 +2345,54 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" @@ -1311,6 +2475,42 @@ dependencies = [ "memchr", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zarrs" version = "0.18.0" @@ -1349,14 +2549,17 @@ name = "zarrs-python" version = "0.1.0" dependencies = [ "numpy", + "opendal", "openssl", "pyo3", "pyo3-stub-gen", "rayon", "rayon_iter_concurrent_limit", "serde_json", + "tokio", "unsafe_cell_slice", "zarrs", + "zarrs_opendal", ] [[package]] @@ -1393,20 +2596,104 @@ dependencies = [ "thiserror 2.0.3", ] +[[package]] +name = "zarrs_opendal" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0e5151abecb70514fd2f6c86cd5280f65bd741c110181a9c08d16720a3cd539" +dependencies = [ + "async-trait", + "futures", + "opendal", + "zarrs_storage", +] + [[package]] name = "zarrs_storage" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d23665d868dd8843989bb74239956eb4600f0bd713f1f81489b4c1877503b607" dependencies = [ + "async-trait", "bytes", "derive_more", + "futures", "itertools", "parking_lot", "thiserror 2.0.3", "unsafe_cell_slice", ] +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zstd" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index eb01440..aa1f939 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,9 @@ numpy = "0.22.1" unsafe_cell_slice = "0.2.0" serde_json = "1.0.128" pyo3-stub-gen = { version = "0.6.1", git = "https://github.com/flying-sheep/pyo3-stub-gen.git", branch = "py-untyped-array" } +opendal = { version = "0.50.2", features = ["services-http"] } +tokio = { version = "1.41.1", features = ["rt-multi-thread"] } +zarrs_opendal = "0.4.0" [profile.release] lto = true diff --git a/python/zarrs/utils.py b/python/zarrs/utils.py index 43e8b73..f139db3 100644 --- a/python/zarrs/utils.py +++ b/python/zarrs/utils.py @@ -7,6 +7,8 @@ import numpy as np from zarr.core.indexing import SelectorTuple, is_integer +from zarr.storage.local import LocalStore +from zarr.storage.remote import RemoteStore if TYPE_CHECKING: from collections.abc import Iterable @@ -64,9 +66,19 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli def convert_chunk_to_primitive( byte_getter: ByteGetter | ByteSetter, chunk_spec: ArraySpec -) -> tuple[str, ChunkCoords, str, Any]: +) -> tuple[(str, str), ChunkCoords, str, Any]: + if isinstance(byte_getter.store, RemoteStore): + # TODO: Prefer passing enum to Rust for RemoteStore, LocalStore, etc? + root = str(byte_getter.store.path) + path = str(byte_getter.path) + elif isinstance(byte_getter.store, LocalStore): + root = "" + path = str(byte_getter) + else: + # TODO: Check what other store types exist + raise ValueError(f"Unsupported store type: {type(byte_getter.store)}") return ( - str(byte_getter), + (root, path), chunk_spec.shape, str(chunk_spec.dtype), chunk_spec.fill_value.tobytes(), diff --git a/src/chunk_item.rs b/src/chunk_item.rs index c381f9b..ddf4018 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -12,11 +12,11 @@ use zarrs::{ storage::{MaybeBytes, ReadableWritableListableStorageTraits, StorageError, StoreKey}, }; -use crate::utils::PyErrExt; +use crate::{utils::PyErrExt, StorePath}; pub(crate) type Raw<'a> = ( - // store path - String, + // store root and path + StorePath, // shape Vec, // data type @@ -34,7 +34,7 @@ pub(crate) type RawWithIndices<'a> = ( ); pub(crate) trait IntoItem: std::marker::Sized { - fn store_path(&self) -> &str; + fn store_path(&self) -> &StorePath; fn into_item( self, store: Arc, @@ -90,7 +90,7 @@ impl ChunksItem for WithSubset { } impl<'a> IntoItem for Raw<'a> { - fn store_path(&self) -> &str { + fn store_path(&self) -> &StorePath { &self.0 } fn into_item( @@ -110,7 +110,7 @@ impl<'a> IntoItem for Raw<'a> { } impl IntoItem for RawWithIndices<'_> { - fn store_path(&self) -> &str { + fn store_path(&self) -> &StorePath { &self.0 .0 } fn into_item( diff --git a/src/codec_pipeline_store_http.rs b/src/codec_pipeline_store_http.rs new file mode 100644 index 0000000..bf98e37 --- /dev/null +++ b/src/codec_pipeline_store_http.rs @@ -0,0 +1,38 @@ +use std::sync::Arc; + +use pyo3::{exceptions::PyValueError, PyResult}; +use zarrs::storage::storage_adapter::async_to_sync::AsyncToSyncStorageAdapter; +use zarrs::storage::ReadableWritableListableStorageTraits; +use zarrs_opendal::AsyncOpendalStore; + +use crate::{ + runtime::{tokio_block_on, TokioBlockOn}, + utils::PyErrExt, + CodecPipelineStore, +}; + +pub struct CodecPipelineStoreHTTP { + store: Arc>, +} + +impl CodecPipelineStoreHTTP { + pub fn new(url_root: &str) -> PyResult { + let builder = opendal::services::Http::default().endpoint(url_root); + let operator = opendal::Operator::new(builder) + .map_py_err::()? + .finish(); + let store = Arc::new(zarrs_opendal::AsyncOpendalStore::new(operator)); + let store = Arc::new(AsyncToSyncStorageAdapter::new(store, tokio_block_on())); + Ok(Self { store }) + } +} + +impl CodecPipelineStore for CodecPipelineStoreHTTP { + fn store(&self) -> Arc { + self.store.clone() + } + + fn chunk_path(&self, store_path: &str) -> PyResult { + Ok(store_path.to_string()) + } +} diff --git a/src/lib.rs b/src/lib.rs index 9c43ece..33dab15 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,12 +25,15 @@ use zarrs::storage::{ReadableWritableListableStorageTraits, StorageHandle, Store mod chunk_item; mod codec_pipeline_store_filesystem; +mod codec_pipeline_store_http; mod concurrency; +mod runtime; #[cfg(test)] mod tests; mod utils; use codec_pipeline_store_filesystem::CodecPipelineStoreFilesystem; +use codec_pipeline_store_http::CodecPipelineStoreHTTP; use utils::{PyErrExt, PyUntypedArrayExt}; trait CodecPipelineStore: Send + Sync { @@ -50,10 +53,14 @@ pub struct CodecPipelineImpl { pub(crate) num_threads: usize, } +/// A store root and path pair. +// TODO: Prefer a struct with named fields, but I couldn't be bothered to figure out how to do that with stubgen etc +type StorePath = (String, String); + impl CodecPipelineImpl { fn get_store_and_path( &self, - store_path: &str, + store_path: &StorePath, ) -> PyResult<(Arc, String)> { let mut gstore = self.store.lock().map_err(|_| { PyErr::new::("failed to lock the store mutex".to_string()) @@ -61,17 +68,20 @@ impl CodecPipelineImpl { #[allow(clippy::collapsible_if)] if gstore.is_none() { - if store_path.starts_with("file://") { + if store_path.0.is_empty() && store_path.1.starts_with("file://") { *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new()?)); + } else if store_path.0.starts_with("http://") || store_path.0.starts_with("https://") { + *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(&store_path.0)?)); } // TODO: Add support for more stores } if let Some(gstore) = gstore.as_ref() { - Ok((gstore.store(), gstore.chunk_path(store_path)?)) + Ok((gstore.store(), gstore.chunk_path(&store_path.1)?)) } else { - Err(PyErr::new::(format!( - "unsupported store for {store_path}" + Err(PyErr::new::(format!( + "unsupported store for root:{} path:{}", + store_path.0, store_path.1 ))) } } diff --git a/src/runtime.rs b/src/runtime.rs new file mode 100644 index 0000000..161db08 --- /dev/null +++ b/src/runtime.rs @@ -0,0 +1,18 @@ +use std::sync::OnceLock; +use tokio::runtime::Runtime; +use zarrs::storage::storage_adapter::async_to_sync::AsyncToSyncBlockOn; + +static RUNTIME: OnceLock = OnceLock::new(); + +pub struct TokioBlockOn(tokio::runtime::Handle); + +impl AsyncToSyncBlockOn for TokioBlockOn { + fn block_on(&self, future: F) -> F::Output { + self.0.block_on(future) + } +} + +pub fn tokio_block_on() -> TokioBlockOn { + let runtime = RUNTIME.get_or_init(|| Runtime::new().expect("Failed to create Tokio runtime")); + TokioBlockOn(runtime.handle().clone()) +} diff --git a/tests/test_zarrs_http.py b/tests/test_zarrs_http.py new file mode 100644 index 0000000..00d6096 --- /dev/null +++ b/tests/test_zarrs_http.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +import numpy as np +import zarr + +import zarrs # noqa: F401 + +arr_ref = np.array( + [ + [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -0.6, 0.1], + [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -1.6, 0.1], + [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -2.6, 0.1], + [np.nan, np.nan, np.nan, np.nan, -3.4, -3.5, -3.6, 0.1], + [1.0, 1.0, 1.0, -4.3, -4.4, -4.5, -4.6, 1.1], + [1.0, 1.0, 1.0, -5.3, -5.4, -5.5, -5.6, 1.1], + [1.0, 1.0, 1.0, 1.0, 1.1, 1.1, -6.6, 1.1], + [1.0, 1.0, 1.0, 1.0, -7.4, -7.5, -7.6, -7.7], + ] +) + + +def test_zarrs_http(): + zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) + arr = zarr.open( + "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array" + ) + assert arr.shape == (8, 8) + assert np.allclose(arr[:], arr_ref, equal_nan=True) From 2e8599c3c59e8c2a07a6b8849a7b0ac17dc9737d Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 28 Nov 2024 10:01:18 +1100 Subject: [PATCH 03/45] update stubs --- python/zarrs/_internal.pyi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 6c8ce36..86e2fd9 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -21,7 +21,7 @@ class CodecPipelineImpl: self, chunk_descriptions: typing.Sequence[ tuple[ - tuple[str, typing.Sequence[int], str, typing.Sequence[int]], + tuple[tuple[str, str], typing.Sequence[int], str, typing.Sequence[int]], typing.Sequence[slice], typing.Sequence[slice], ] @@ -31,14 +31,14 @@ class CodecPipelineImpl: def retrieve_chunks( self, chunk_descriptions: typing.Sequence[ - tuple[str, typing.Sequence[int], str, typing.Sequence[int]] + tuple[tuple[str, str], typing.Sequence[int], str, typing.Sequence[int]] ], ) -> list[numpy.typing.NDArray[numpy.uint8]]: ... def store_chunks_with_indices( self, chunk_descriptions: typing.Sequence[ tuple[ - tuple[str, typing.Sequence[int], str, typing.Sequence[int]], + tuple[tuple[str, str], typing.Sequence[int], str, typing.Sequence[int]], typing.Sequence[slice], typing.Sequence[slice], ] From f61183ee67ec54ee943f9ffff1e46fa35e1d3dc6 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 28 Nov 2024 13:17:10 +1100 Subject: [PATCH 04/45] fix: disallow storage_options for HTTP store --- python/zarrs/utils.py | 8 ++++++-- tests/test_zarrs_http.py | 22 ++++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/python/zarrs/utils.py b/python/zarrs/utils.py index f139db3..d184584 100644 --- a/python/zarrs/utils.py +++ b/python/zarrs/utils.py @@ -67,8 +67,12 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli def convert_chunk_to_primitive( byte_getter: ByteGetter | ByteSetter, chunk_spec: ArraySpec ) -> tuple[(str, str), ChunkCoords, str, Any]: + # TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here if isinstance(byte_getter.store, RemoteStore): - # TODO: Prefer passing enum to Rust for RemoteStore, LocalStore, etc? + # TODO: Handle supported storage_options per store type (HTTP, S3, etc) and put in an enum (per store) for Rust + storage_options = byte_getter.store.fs.storage_options + if set(storage_options) > {"asynchronous"}: + raise NotImplementedError(f"Unsupported storage options: {storage_options}") root = str(byte_getter.store.path) path = str(byte_getter.path) elif isinstance(byte_getter.store, LocalStore): @@ -76,7 +80,7 @@ def convert_chunk_to_primitive( path = str(byte_getter) else: # TODO: Check what other store types exist - raise ValueError(f"Unsupported store type: {type(byte_getter.store)}") + raise NotImplementedError(f"Unsupported store type: {type(byte_getter.store)}") return ( (root, path), chunk_spec.shape, diff --git a/tests/test_zarrs_http.py b/tests/test_zarrs_http.py index 00d6096..a7ea726 100644 --- a/tests/test_zarrs_http.py +++ b/tests/test_zarrs_http.py @@ -1,11 +1,14 @@ #!/usr/bin/env python3 +import aiohttp import numpy as np +import pytest import zarr +from zarr.storage.remote import RemoteStore import zarrs # noqa: F401 -arr_ref = np.array( +ARR_REF = np.array( [ [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -0.6, 0.1], [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -1.6, 0.1], @@ -18,11 +21,22 @@ ] ) +URL = "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array" + def test_zarrs_http(): zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) - arr = zarr.open( - "https://raw.githubusercontent.com/LDeakin/zarrs/main/zarrs/tests/data/array_write_read.zarr/group/array" + arr = zarr.open(URL) + assert arr.shape == (8, 8) + assert np.allclose(arr[:], ARR_REF, equal_nan=True) + + +@pytest.mark.xfail(reason="Storage options are not supported for HTTP store") +def test_zarrs_http_kwargs(): + zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) + store = RemoteStore.from_url( + URL, storage_options={"auth": aiohttp.BasicAuth("user", "pass")} ) + arr = zarr.open(store) assert arr.shape == (8, 8) - assert np.allclose(arr[:], arr_ref, equal_nan=True) + assert np.allclose(arr[:], ARR_REF, equal_nan=True) From 3f2d07711f5c0928c6889a24c11db431a9205756 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 28 Nov 2024 13:31:11 +1100 Subject: [PATCH 05/45] add another todo for store info on codec init --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 33dab15..3966bc2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,6 +66,7 @@ impl CodecPipelineImpl { PyErr::new::("failed to lock the store mutex".to_string()) })?; + // TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here #[allow(clippy::collapsible_if)] if gstore.is_none() { if store_path.0.is_empty() && store_path.1.starts_with("file://") { From bf4e3659616f1a1b8093b6508e7e6f9c1b49a02d Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 30 Nov 2024 08:10:13 +1100 Subject: [PATCH 06/45] Merge remote-tracking branch 'origin/main' into ld/http_store --- Cargo.lock | 51 +++++++++++++++++++-------------------------------- Cargo.toml | 4 ++-- src/lib.rs | 2 +- 3 files changed, 22 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 086a52e..970923e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -538,12 +538,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "http" version = "1.1.0" @@ -989,11 +983,10 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi", "libc", "wasi", "windows-sys 0.52.0", @@ -1131,9 +1124,9 @@ dependencies = [ [[package]] name = "numpy" -version = "0.22.1" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edb929bc0da91a4d85ed6c0a84deaa53d411abfb387fc271124f91bf6b89f14e" +checksum = "b94caae805f998a07d33af06e6a3891e38556051b8045c615470a71590e13e78" dependencies = [ "libc", "ndarray", @@ -1141,7 +1134,7 @@ dependencies = [ "num-integer", "num-traits", "pyo3", - "rustc-hash 1.1.0", + "rustc-hash", ] [[package]] @@ -1333,9 +1326,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "f54b3d09cbdd1f8c20650b28e7b09e338881482f4aa908a5f61a00c98fba2690" dependencies = [ "cfg-if", "indoc", @@ -1351,9 +1344,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "3015cf985888fe66cfb63ce0e321c603706cd541b7aec7ddd35c281390af45d8" dependencies = [ "once_cell", "target-lexicon", @@ -1361,9 +1354,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "6fca7cd8fd809b5ac4eefb89c1f98f7a7651d3739dfb341ca6980090f554c270" dependencies = [ "libc", "pyo3-build-config", @@ -1371,9 +1364,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "34e657fa5379a79151b6ff5328d9216a84f55dc93b17b08e7c3609a969b73aa0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1383,9 +1376,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "295548d5ffd95fd1981d2d3cf4458831b21d60af046b729b6fd143b0ba7aee2f" dependencies = [ "heck", "proc-macro2", @@ -1457,7 +1450,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.0.0", + "rustc-hash", "rustls", "socket2", "thiserror 2.0.3", @@ -1475,7 +1468,7 @@ dependencies = [ "getrandom", "rand", "ring", - "rustc-hash 2.0.0", + "rustc-hash", "rustls", "rustls-pki-types", "slab", @@ -1656,12 +1649,6 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.0.0" @@ -1845,9 +1832,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", diff --git a/Cargo.toml b/Cargo.toml index aa1f939..9d269d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,13 +9,13 @@ name = "zarrs_python" crate-type = ["cdylib", "rlib"] [dependencies] -pyo3 = { version = "0.22.6", features = ["abi3-py311"] } +pyo3 = { version = "0.23.2", features = ["abi3-py311"] } zarrs = "0.18.0" rayon_iter_concurrent_limit = "0.2.0" rayon = "1.10.0" # fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path openssl = { version = "0.10", features = ["vendored"] } -numpy = "0.22.1" +numpy = "0.23.0" unsafe_cell_slice = "0.2.0" serde_json = "1.0.128" pyo3-stub-gen = { version = "0.6.1", git = "https://github.com/flying-sheep/pyo3-stub-gen.git", branch = "py-untyped-array" } diff --git a/src/lib.rs b/src/lib.rs index 3966bc2..7fc4b8e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -451,7 +451,7 @@ impl CodecPipelineImpl { })?; Ok(chunk_bytes .into_iter() - .map(|x| x.into_pyarray_bound(py)) + .map(|x| x.into_pyarray(py)) .collect()) } From 3244beece02140b26da36084fbda7fc278da9ec1 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 09:54:49 +1100 Subject: [PATCH 07/45] refactor store initialisation --- python/zarrs/_internal.pyi | 18 +++++- python/zarrs/utils.py | 29 ++------- src/chunk_item.rs | 29 ++++++--- src/codec_pipeline_store_filesystem.rs | 46 ++++++-------- src/codec_pipeline_store_http.rs | 44 ++++++++++--- src/lib.rs | 88 ++++++++++++++++++-------- 6 files changed, 158 insertions(+), 96 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 86e2fd9..852392b 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -2,6 +2,7 @@ # ruff: noqa: E501, F401 import typing +from enum import Enum, auto import numpy import numpy.typing @@ -21,7 +22,9 @@ class CodecPipelineImpl: self, chunk_descriptions: typing.Sequence[ tuple[ - tuple[tuple[str, str], typing.Sequence[int], str, typing.Sequence[int]], + tuple[ + StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] + ], typing.Sequence[slice], typing.Sequence[slice], ] @@ -31,17 +34,26 @@ class CodecPipelineImpl: def retrieve_chunks( self, chunk_descriptions: typing.Sequence[ - tuple[tuple[str, str], typing.Sequence[int], str, typing.Sequence[int]] + tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]] ], ) -> list[numpy.typing.NDArray[numpy.uint8]]: ... def store_chunks_with_indices( self, chunk_descriptions: typing.Sequence[ tuple[ - tuple[tuple[str, str], typing.Sequence[int], str, typing.Sequence[int]], + tuple[ + StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] + ], typing.Sequence[slice], typing.Sequence[slice], ] ], value: numpy.NDArray[typing.Any], ) -> None: ... + +class FilesystemStoreConfig: ... +class HTTPStoreConfig: ... + +class StoreConfig(Enum): + Filesystem = auto() + HTTP = auto() diff --git a/python/zarrs/utils.py b/python/zarrs/utils.py index d184584..5c5a2af 100644 --- a/python/zarrs/utils.py +++ b/python/zarrs/utils.py @@ -7,14 +7,12 @@ import numpy as np from zarr.core.indexing import SelectorTuple, is_integer -from zarr.storage.local import LocalStore -from zarr.storage.remote import RemoteStore if TYPE_CHECKING: from collections.abc import Iterable from types import EllipsisType - from zarr.abc.store import ByteGetter, ByteSetter + from zarr.abc.store import ByteGetter, ByteSetter, Store from zarr.core.array_spec import ArraySpec from zarr.core.common import ChunkCoords @@ -65,24 +63,11 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli def convert_chunk_to_primitive( - byte_getter: ByteGetter | ByteSetter, chunk_spec: ArraySpec -) -> tuple[(str, str), ChunkCoords, str, Any]: - # TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here - if isinstance(byte_getter.store, RemoteStore): - # TODO: Handle supported storage_options per store type (HTTP, S3, etc) and put in an enum (per store) for Rust - storage_options = byte_getter.store.fs.storage_options - if set(storage_options) > {"asynchronous"}: - raise NotImplementedError(f"Unsupported storage options: {storage_options}") - root = str(byte_getter.store.path) - path = str(byte_getter.path) - elif isinstance(byte_getter.store, LocalStore): - root = "" - path = str(byte_getter) - else: - # TODO: Check what other store types exist - raise NotImplementedError(f"Unsupported store type: {type(byte_getter.store)}") + byte_interface: ByteGetter | ByteSetter, chunk_spec: ArraySpec +) -> tuple[(Store, str), ChunkCoords, str, Any]: return ( - (root, path), + byte_interface.store, + byte_interface.path, chunk_spec.shape, str(chunk_spec.dtype), chunk_spec.fill_value.tobytes(), @@ -165,7 +150,7 @@ def make_chunk_info_for_rust_with_indices( tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], drop_axes: tuple[int, ...], -) -> list[tuple[tuple[str, ChunkCoords, str, Any], list[slice], list[slice]]]: +) -> list[tuple[tuple[(Store, str), ChunkCoords, str, Any], list[slice], list[slice]]]: chunk_info_with_indices = [] for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info: chunk_info = convert_chunk_to_primitive(byte_getter, chunk_spec) @@ -194,7 +179,7 @@ def make_chunk_info_for_rust( batch_info: Iterable[ tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], -) -> list[tuple[str, ChunkCoords, str, Any]]: +) -> list[tuple[(Store, str), ChunkCoords, str, Any]]: return list( convert_chunk_to_primitive(byte_getter, chunk_spec) for (byte_getter, chunk_spec, _, _) in batch_info diff --git a/src/chunk_item.rs b/src/chunk_item.rs index ddf4018..a573e86 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -12,11 +12,13 @@ use zarrs::{ storage::{MaybeBytes, ReadableWritableListableStorageTraits, StorageError, StoreKey}, }; -use crate::{utils::PyErrExt, StorePath}; +use crate::{utils::PyErrExt, StoreConfig}; pub(crate) type Raw<'a> = ( - // store root and path - StorePath, + // store + StoreConfig, + // path + String, // shape Vec, // data type @@ -34,7 +36,8 @@ pub(crate) type RawWithIndices<'a> = ( ); pub(crate) trait IntoItem: std::marker::Sized { - fn store_path(&self) -> &StorePath; + fn store_config(&self) -> &StoreConfig; + fn path(&self) -> &str; fn into_item( self, store: Arc, @@ -90,16 +93,21 @@ impl ChunksItem for WithSubset { } impl<'a> IntoItem for Raw<'a> { - fn store_path(&self) -> &StorePath { + fn store_config(&self) -> &StoreConfig { &self.0 } + + fn path(&self) -> &str { + &self.1 + } + fn into_item( self, store: Arc, key: StoreKey, (): (), ) -> PyResult { - let (_, chunk_shape, dtype, fill_value) = self; + let (_, _, chunk_shape, dtype, fill_value) = self; let representation = get_chunk_representation(chunk_shape, &dtype, fill_value)?; Ok(Basic { store, @@ -110,9 +118,14 @@ impl<'a> IntoItem for Raw<'a> { } impl IntoItem for RawWithIndices<'_> { - fn store_path(&self) -> &StorePath { + fn store_config(&self) -> &StoreConfig { &self.0 .0 } + + fn path(&self) -> &str { + &self.0 .1 + } + fn into_item( self, store: Arc, @@ -120,7 +133,7 @@ impl IntoItem for RawWithIndices<'_> { shape: &[u64], ) -> PyResult { let (raw, selection, chunk_selection) = self; - let chunk_shape = raw.1.clone(); + let chunk_shape = raw.2.clone(); let item = raw.into_item(store.clone(), key, ())?; Ok(WithSubset { item, diff --git a/src/codec_pipeline_store_filesystem.rs b/src/codec_pipeline_store_filesystem.rs index ea573ee..588070b 100644 --- a/src/codec_pipeline_store_filesystem.rs +++ b/src/codec_pipeline_store_filesystem.rs @@ -1,28 +1,32 @@ use std::sync::Arc; -use pyo3::{ - exceptions::{PyRuntimeError, PyValueError}, - PyErr, PyResult, -}; +use pyo3::{exceptions::PyRuntimeError, pyclass, PyResult}; +use pyo3_stub_gen::derive::gen_stub_pyclass; use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorageTraits}; use crate::{utils::PyErrExt, CodecPipelineStore}; pub struct CodecPipelineStoreFilesystem { store: Arc, - cwd: String, } -impl CodecPipelineStoreFilesystem { - pub fn new() -> PyResult { - let store = Arc::new(FilesystemStore::new("/").map_py_err::()?); - let cwd = std::env::current_dir()? - .to_string_lossy() - .replace('\\', "/"); // TODO: Check zarr-python path handling on windows +#[gen_stub_pyclass] +#[pyclass] +pub struct FilesystemStoreConfig { + root: String, +} + +impl FilesystemStoreConfig { + pub fn new(root: String) -> Self { + Self { root } + } +} - // Remove the leading / from the cwd if preset, so cwd is a valid Zarr store path - let cwd = cwd.strip_prefix("/").unwrap_or(&cwd).to_string(); - Ok(Self { store, cwd }) +impl CodecPipelineStoreFilesystem { + pub fn new(config: &FilesystemStoreConfig) -> PyResult { + let store = + Arc::new(FilesystemStore::new(config.root.clone()).map_py_err::()?); + Ok(Self { store }) } } @@ -30,18 +34,4 @@ impl CodecPipelineStore for CodecPipelineStoreFilesystem { fn store(&self) -> Arc { self.store.clone() } - - fn chunk_path(&self, store_path: &str) -> PyResult { - if let Some(chunk_path) = store_path.strip_prefix("file://") { - if let Some(chunk_path) = chunk_path.strip_prefix("/") { - Ok(chunk_path.to_string()) - } else { - Ok(format!("{}/{}", self.cwd, chunk_path)) - } - } else { - Err(PyErr::new::(format!( - "a filesystem store was initialised, but received a store path without a file:// prefix: {store_path}" - ))) - } - } } diff --git a/src/codec_pipeline_store_http.rs b/src/codec_pipeline_store_http.rs index bf98e37..43434c2 100644 --- a/src/codec_pipeline_store_http.rs +++ b/src/codec_pipeline_store_http.rs @@ -1,6 +1,7 @@ -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; -use pyo3::{exceptions::PyValueError, PyResult}; +use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyResult}; +use pyo3_stub_gen::derive::gen_stub_pyclass; use zarrs::storage::storage_adapter::async_to_sync::AsyncToSyncStorageAdapter; use zarrs::storage::ReadableWritableListableStorageTraits; use zarrs_opendal::AsyncOpendalStore; @@ -15,9 +16,40 @@ pub struct CodecPipelineStoreHTTP { store: Arc>, } +#[gen_stub_pyclass] +#[pyclass] +pub struct HTTPStoreConfig { + pub root: String, +} + +impl HTTPStoreConfig { + pub fn new<'py>( + path: &str, + storage_options: &HashMap>, + ) -> PyResult { + if !storage_options.is_empty() { + for storage_option in storage_options.keys() { + match storage_option.as_str() { + // TODO: Add support for other storage options + "asynchronous" => {} + _ => { + return Err(PyValueError::new_err(format!( + "Unsupported storage option for HTTPFileSystem: {storage_option}" + ))); + } + } + } + } + + Ok(Self { + root: path.to_string(), + }) + } +} + impl CodecPipelineStoreHTTP { - pub fn new(url_root: &str) -> PyResult { - let builder = opendal::services::Http::default().endpoint(url_root); + pub fn new(config: &HTTPStoreConfig) -> PyResult { + let builder = opendal::services::Http::default().endpoint(&config.root); let operator = opendal::Operator::new(builder) .map_py_err::()? .finish(); @@ -31,8 +63,4 @@ impl CodecPipelineStore for CodecPipelineStoreHTTP { fn store(&self) -> Arc { self.store.clone() } - - fn chunk_path(&self, store_path: &str) -> PyResult { - Ok(store_path.to_string()) - } } diff --git a/src/lib.rs b/src/lib.rs index 7fc4b8e..40e3bd0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,10 +7,11 @@ use numpy::{IntoPyArray, PyArray1, PyUntypedArray, PyUntypedArrayMethods}; use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3_stub_gen::define_stub_info_gatherer; -use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; +use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum, gen_stub_pymethods}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon_iter_concurrent_limit::iter_concurrent_limit; use std::borrow::Cow; +use std::collections::HashMap; use std::sync::{Arc, Mutex}; use unsafe_cell_slice::UnsafeCellSlice; use zarrs::array::codec::{ @@ -32,13 +33,12 @@ mod runtime; mod tests; mod utils; -use codec_pipeline_store_filesystem::CodecPipelineStoreFilesystem; -use codec_pipeline_store_http::CodecPipelineStoreHTTP; +use codec_pipeline_store_filesystem::{CodecPipelineStoreFilesystem, FilesystemStoreConfig}; +use codec_pipeline_store_http::{CodecPipelineStoreHTTP, HTTPStoreConfig}; use utils::{PyErrExt, PyUntypedArrayExt}; trait CodecPipelineStore: Send + Sync { fn store(&self) -> Arc; - fn chunk_path(&self, store_path: &str) -> PyResult; } // TODO: Use a OnceLock for store with get_or_try_init when stabilised? @@ -53,37 +53,69 @@ pub struct CodecPipelineImpl { pub(crate) num_threads: usize, } -/// A store root and path pair. -// TODO: Prefer a struct with named fields, but I couldn't be bothered to figure out how to do that with stubgen etc -type StorePath = (String, String); +#[gen_stub_pyclass_enum] +enum StoreConfig { + Filesystem(FilesystemStoreConfig), + HTTP(HTTPStoreConfig), + // TODO: Add support for more stores +} + +impl<'py> FromPyObject<'py> for StoreConfig { + fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { + if store.get_type().name()? == "LocalStore" { + let root: String = store + .getattr("root")? + .call_method("as_posix", (), None)? + .extract()?; + Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root))) + } else if store.get_type().name()? == "RemoteStore" { + let fs = store.getattr("fs")?; + let name = fs.get_type().name()?; + let path: String = store.getattr("path")?.extract()?; + let storage_options: HashMap> = + fs.getattr("storage_options")?.extract()?; + if name == "HTTPFileSystem" { + Ok(StoreConfig::HTTP(HTTPStoreConfig::new( + &path, + &storage_options, + )?)) + } else { + return Err(PyErr::new::( + "zarrs-python only supports a HTTPFileSystem RemoteStore".to_string(), + )); + } + } else { + Err(PyErr::new::( + "zarrs-python only supports LocalStore and RemoteStore".to_string(), + )) + } + } +} impl CodecPipelineImpl { - fn get_store_and_path( + fn get_store( &self, - store_path: &StorePath, - ) -> PyResult<(Arc, String)> { + store: &StoreConfig, + ) -> PyResult> { let mut gstore = self.store.lock().map_err(|_| { PyErr::new::("failed to lock the store mutex".to_string()) })?; // TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here - #[allow(clippy::collapsible_if)] - if gstore.is_none() { - if store_path.0.is_empty() && store_path.1.starts_with("file://") { - *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new()?)); - } else if store_path.0.starts_with("http://") || store_path.0.starts_with("https://") { - *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(&store_path.0)?)); + match gstore.as_ref() { + Some(gstore) => Ok(gstore.store()), + None => { + match store { + StoreConfig::Filesystem(config) => { + *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new(config)?)); + } + StoreConfig::HTTP(config) => { + *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(config)?)); + } + } + let gstore = gstore.as_ref().expect("store was just initialised"); + Ok(gstore.store()) } - // TODO: Add support for more stores - } - - if let Some(gstore) = gstore.as_ref() { - Ok((gstore.store(), gstore.chunk_path(&store_path.1)?)) - } else { - Err(PyErr::new::(format!( - "unsupported store for root:{} path:{}", - store_path.0, store_path.1 - ))) } } @@ -95,7 +127,9 @@ impl CodecPipelineImpl { chunk_descriptions .into_iter() .map(|raw| { - let (store, path) = self.get_store_and_path(raw.store_path())?; + // TODO: Prefer to get the store once, and assume it is the same for all chunks + let store = self.get_store(raw.store_config())?; + let path = raw.path(); let key = StoreKey::new(path).map_py_err::()?; raw.into_item(store, key, shape) }) From 95ba17b01522f9fbb1aec41d5360cb7feeab2a4c Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 10:28:29 +1100 Subject: [PATCH 08/45] try build with latest ring --- Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 9d269d6..72d340f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,6 @@ zarrs_opendal = "0.4.0" [profile.release] lto = true + +[patch.crates-io] +ring = { git = "https://github.com/briansmith/ring.git", rev = "45ff8561744987fdae22dc826e441092eb411327" } From fa4fca527de6e8c798d4ec13e2df60a35792fbde Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 10:46:03 +1100 Subject: [PATCH 09/45] try add nasm --- .github/workflows/cd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index e5ce819..67cadc3 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -42,6 +42,7 @@ jobs: python-version: '3.13' architecture: ${{ matrix.python-architecture || 'x64' }} - run: pip install twine + - uses: ilammy/setup-nasm@v1 - uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} From 38636fdf5d9ae5ebdf86882cf3be85078136d53c Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 10:57:22 +1100 Subject: [PATCH 10/45] Revert "try add nasm" This reverts commit fa4fca527de6e8c798d4ec13e2df60a35792fbde. --- .github/workflows/cd.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 67cadc3..e5ce819 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -42,7 +42,6 @@ jobs: python-version: '3.13' architecture: ${{ matrix.python-architecture || 'x64' }} - run: pip install twine - - uses: ilammy/setup-nasm@v1 - uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} From 3bf21dadd49f4c1f0a0bdd8f2f69f31aa9dccebc Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 10:57:39 +1100 Subject: [PATCH 11/45] Revert "try build with latest ring" This reverts commit 95ba17b01522f9fbb1aec41d5360cb7feeab2a4c. --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 72d340f..9d269d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,3 @@ zarrs_opendal = "0.4.0" [profile.release] lto = true - -[patch.crates-io] -ring = { git = "https://github.com/briansmith/ring.git", rev = "45ff8561744987fdae22dc826e441092eb411327" } From 1bae158e866c240261fcce0c6653d1328140d914 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 14:35:21 +1100 Subject: [PATCH 12/45] CD: bump maturin to 1.7.8 --- .github/workflows/cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index e5ce819..1ca861b 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -50,7 +50,7 @@ jobs: rust-toolchain: stable docker-options: -e CI # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846 - maturin-version: 1.7.4 + maturin-version: 1.7.8 before-script-linux: | # If we're running on rhel centos, install needed packages. if command -v yum &> /dev/null; then @@ -75,7 +75,7 @@ jobs: - uses: PyO3/maturin-action@v1 with: # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846 - maturin-version: 1.7.4 + maturin-version: 1.7.8 command: sdist args: --out dist rust-toolchain: stable From 11f4c87b65e6a5c1479650480a2971fdaa518477 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 14:54:58 +1100 Subject: [PATCH 13/45] set python architecture for windows aarch64 --- .github/workflows/cd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 1ca861b..ab97e1f 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -34,6 +34,7 @@ jobs: - { os: linux, manylinux: musllinux_1_2, target: armv7 } # windows - { os: windows, target: i686, python-architecture: x86 } + - { os: windows, target: aarch64, python-architecture: arm64 } runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest steps: - uses: actions/checkout@v4 From 4020a7cdc95fa246640e1971585db8a6fb966f25 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 15:03:33 +1100 Subject: [PATCH 14/45] Revert "set python architecture for windows aarch64" This reverts commit 11f4c87b65e6a5c1479650480a2971fdaa518477. --- .github/workflows/cd.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index ab97e1f..1ca861b 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -34,7 +34,6 @@ jobs: - { os: linux, manylinux: musllinux_1_2, target: armv7 } # windows - { os: windows, target: i686, python-architecture: x86 } - - { os: windows, target: aarch64, python-architecture: arm64 } runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest steps: - uses: actions/checkout@v4 From 08476b3b23a7e1b86f078bf46787cbec9b3c042e Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 15:11:36 +1100 Subject: [PATCH 15/45] Set MATURIN_USE_XWIN=1 on windows aarch64 --- .github/workflows/cd.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 1ca861b..9f408b4 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -35,6 +35,8 @@ jobs: # windows - { os: windows, target: i686, python-architecture: x86 } runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest + env: + MATURIN_USE_XWIN: ${{ matrix.os == 'windows' && matrix.target == 'aarch64' && '1' || '0' }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From 56d69c6be52a6d784146a632dc216056be8e1898 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sat, 7 Dec 2024 15:31:50 +1100 Subject: [PATCH 16/45] Exclude windows aarch64 --- .github/workflows/cd.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 9f408b4..e35b760 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -34,6 +34,10 @@ jobs: - { os: linux, manylinux: musllinux_1_2, target: armv7 } # windows - { os: windows, target: i686, python-architecture: x86 } + exclude: + # https://github.com/rust-cross/cargo-xwin/issues/76 + - os: windows + target: aarch64 runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest env: MATURIN_USE_XWIN: ${{ matrix.os == 'windows' && matrix.target == 'aarch64' && '1' || '0' }} From 96c1a24c3b084579d4e7b2779ae5313b45d5ce86 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:11:55 +1100 Subject: [PATCH 17/45] fix: store/str type hints --- python/zarrs/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/zarrs/utils.py b/python/zarrs/utils.py index 5c5a2af..9e2a7b6 100644 --- a/python/zarrs/utils.py +++ b/python/zarrs/utils.py @@ -64,7 +64,7 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli def convert_chunk_to_primitive( byte_interface: ByteGetter | ByteSetter, chunk_spec: ArraySpec -) -> tuple[(Store, str), ChunkCoords, str, Any]: +) -> tuple[Store, str, ChunkCoords, str, Any]: return ( byte_interface.store, byte_interface.path, @@ -150,7 +150,7 @@ def make_chunk_info_for_rust_with_indices( tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], drop_axes: tuple[int, ...], -) -> list[tuple[tuple[(Store, str), ChunkCoords, str, Any], list[slice], list[slice]]]: +) -> list[tuple[tuple[Store, str, ChunkCoords, str, Any], list[slice], list[slice]]]: chunk_info_with_indices = [] for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info: chunk_info = convert_chunk_to_primitive(byte_getter, chunk_spec) @@ -179,7 +179,7 @@ def make_chunk_info_for_rust( batch_info: Iterable[ tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], -) -> list[tuple[(Store, str), ChunkCoords, str, Any]]: +) -> list[tuple[Store, str, ChunkCoords, str, Any]]: return list( convert_chunk_to_primitive(byte_getter, chunk_spec) for (byte_getter, chunk_spec, _, _) in batch_info From d57e0379a834b8ad0b44a058c093ae42ebe8d634 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:15:02 +1100 Subject: [PATCH 18/45] fix: get_store to get_store_from_config --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 40e3bd0..214ea61 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -93,9 +93,9 @@ impl<'py> FromPyObject<'py> for StoreConfig { } impl CodecPipelineImpl { - fn get_store( + fn get_store_from_config( &self, - store: &StoreConfig, + config: &StoreConfig, ) -> PyResult> { let mut gstore = self.store.lock().map_err(|_| { PyErr::new::("failed to lock the store mutex".to_string()) @@ -105,7 +105,7 @@ impl CodecPipelineImpl { match gstore.as_ref() { Some(gstore) => Ok(gstore.store()), None => { - match store { + match config { StoreConfig::Filesystem(config) => { *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new(config)?)); } @@ -128,7 +128,7 @@ impl CodecPipelineImpl { .into_iter() .map(|raw| { // TODO: Prefer to get the store once, and assume it is the same for all chunks - let store = self.get_store(raw.store_config())?; + let store = self.get_store_from_config(raw.store_config())?; let path = raw.path(); let key = StoreKey::new(path).map_py_err::()?; raw.into_item(store, key, shape) From 86883c588ad96e012134950f7ef706f18a1c532a Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:18:57 +1100 Subject: [PATCH 19/45] fix: use match in StoreConfig::extract_bound --- src/lib.rs | 52 ++++++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 214ea61..908f83e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,32 +62,36 @@ enum StoreConfig { impl<'py> FromPyObject<'py> for StoreConfig { fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { - if store.get_type().name()? == "LocalStore" { - let root: String = store - .getattr("root")? - .call_method("as_posix", (), None)? - .extract()?; - Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root))) - } else if store.get_type().name()? == "RemoteStore" { - let fs = store.getattr("fs")?; - let name = fs.get_type().name()?; - let path: String = store.getattr("path")?.extract()?; - let storage_options: HashMap> = - fs.getattr("storage_options")?.extract()?; - if name == "HTTPFileSystem" { - Ok(StoreConfig::HTTP(HTTPStoreConfig::new( - &path, - &storage_options, - )?)) - } else { - return Err(PyErr::new::( - "zarrs-python only supports a HTTPFileSystem RemoteStore".to_string(), - )); + let name = store.get_type().name()?; + let name = name.to_str()?; + match name { + "LocalStore" => { + let root: String = store + .getattr("root")? + .call_method("as_posix", (), None)? + .extract()?; + Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root))) } - } else { - Err(PyErr::new::( + "RemoteStore" => { + let fs = store.getattr("fs")?; + let name = fs.get_type().name()?; + let path: String = store.getattr("path")?.extract()?; + let storage_options: HashMap> = + fs.getattr("storage_options")?.extract()?; + if name == "HTTPFileSystem" { + Ok(StoreConfig::HTTP(HTTPStoreConfig::new( + &path, + &storage_options, + )?)) + } else { + return Err(PyErr::new::( + "zarrs-python only supports a HTTPFileSystem RemoteStore".to_string(), + )); + } + } + _ => Err(PyErr::new::( "zarrs-python only supports LocalStore and RemoteStore".to_string(), - )) + )), } } } From 5e80adf8358f553b0cba8119cf2d89983de68b8a Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:20:08 +1100 Subject: [PATCH 20/45] fix: elide 'py lifetimes in HTTPStoreConfig --- src/codec_pipeline_store_http.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/codec_pipeline_store_http.rs b/src/codec_pipeline_store_http.rs index 43434c2..4b03605 100644 --- a/src/codec_pipeline_store_http.rs +++ b/src/codec_pipeline_store_http.rs @@ -23,10 +23,7 @@ pub struct HTTPStoreConfig { } impl HTTPStoreConfig { - pub fn new<'py>( - path: &str, - storage_options: &HashMap>, - ) -> PyResult { + pub fn new(path: &str, storage_options: &HashMap>) -> PyResult { if !storage_options.is_empty() { for storage_option in storage_options.keys() { match storage_option.as_str() { From 31e9101320a1da49d5b09dec7ff60303a5f23104 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:20:37 +1100 Subject: [PATCH 21/45] fix: remove unneeded return statement --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 908f83e..5ca59b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,9 +84,9 @@ impl<'py> FromPyObject<'py> for StoreConfig { &storage_options, )?)) } else { - return Err(PyErr::new::( + Err(PyErr::new::( "zarrs-python only supports a HTTPFileSystem RemoteStore".to_string(), - )); + )) } } _ => Err(PyErr::new::( From 60e867b374f6dd927ee314eac32367787fa7988e Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:23:40 +1100 Subject: [PATCH 22/45] fix: address clippy::upper_case_acronyms --- python/zarrs/_internal.pyi | 4 ++-- src/codec_pipeline_store_http.rs | 6 +++--- src/lib.rs | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 852392b..581f3a5 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -52,8 +52,8 @@ class CodecPipelineImpl: ) -> None: ... class FilesystemStoreConfig: ... -class HTTPStoreConfig: ... +class HttpStoreConfig: ... class StoreConfig(Enum): Filesystem = auto() - HTTP = auto() + Http = auto() diff --git a/src/codec_pipeline_store_http.rs b/src/codec_pipeline_store_http.rs index 4b03605..ba16f02 100644 --- a/src/codec_pipeline_store_http.rs +++ b/src/codec_pipeline_store_http.rs @@ -18,11 +18,11 @@ pub struct CodecPipelineStoreHTTP { #[gen_stub_pyclass] #[pyclass] -pub struct HTTPStoreConfig { +pub struct HttpStoreConfig { pub root: String, } -impl HTTPStoreConfig { +impl HttpStoreConfig { pub fn new(path: &str, storage_options: &HashMap>) -> PyResult { if !storage_options.is_empty() { for storage_option in storage_options.keys() { @@ -45,7 +45,7 @@ impl HTTPStoreConfig { } impl CodecPipelineStoreHTTP { - pub fn new(config: &HTTPStoreConfig) -> PyResult { + pub fn new(config: &HttpStoreConfig) -> PyResult { let builder = opendal::services::Http::default().endpoint(&config.root); let operator = opendal::Operator::new(builder) .map_py_err::()? diff --git a/src/lib.rs b/src/lib.rs index 5ca59b9..eb554e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,7 @@ mod tests; mod utils; use codec_pipeline_store_filesystem::{CodecPipelineStoreFilesystem, FilesystemStoreConfig}; -use codec_pipeline_store_http::{CodecPipelineStoreHTTP, HTTPStoreConfig}; +use codec_pipeline_store_http::{CodecPipelineStoreHTTP, HttpStoreConfig}; use utils::{PyErrExt, PyUntypedArrayExt}; trait CodecPipelineStore: Send + Sync { @@ -56,7 +56,7 @@ pub struct CodecPipelineImpl { #[gen_stub_pyclass_enum] enum StoreConfig { Filesystem(FilesystemStoreConfig), - HTTP(HTTPStoreConfig), + Http(HttpStoreConfig), // TODO: Add support for more stores } @@ -79,7 +79,7 @@ impl<'py> FromPyObject<'py> for StoreConfig { let storage_options: HashMap> = fs.getattr("storage_options")?.extract()?; if name == "HTTPFileSystem" { - Ok(StoreConfig::HTTP(HTTPStoreConfig::new( + Ok(StoreConfig::Http(HttpStoreConfig::new( &path, &storage_options, )?)) @@ -113,7 +113,7 @@ impl CodecPipelineImpl { StoreConfig::Filesystem(config) => { *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new(config)?)); } - StoreConfig::HTTP(config) => { + StoreConfig::Http(config) => { *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(config)?)); } } From 63fa08122b3d3b67e81c7e59387c0785cec1289a Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:24:38 +1100 Subject: [PATCH 23/45] fix: address clippy::single_match_else --- src/lib.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index eb554e7..a2889a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -106,20 +106,19 @@ impl CodecPipelineImpl { })?; // TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here - match gstore.as_ref() { - Some(gstore) => Ok(gstore.store()), - None => { - match config { - StoreConfig::Filesystem(config) => { - *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new(config)?)); - } - StoreConfig::Http(config) => { - *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(config)?)); - } + if let Some(gstore) = gstore.as_ref() { + Ok(gstore.store()) + } else { + match config { + StoreConfig::Filesystem(config) => { + *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new(config)?)); + } + StoreConfig::Http(config) => { + *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(config)?)); } - let gstore = gstore.as_ref().expect("store was just initialised"); - Ok(gstore.store()) } + let gstore = gstore.as_ref().expect("store was just initialised"); + Ok(gstore.store()) } } From 301f6d58cb08045af6d4ae5bcf4b928bf0e5d1ee Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:25:13 +1100 Subject: [PATCH 24/45] Revert "CD: bump maturin to 1.7.8" This reverts commit 1bae158e866c240261fcce0c6653d1328140d914. --- .github/workflows/cd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index e35b760..562fd59 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -56,7 +56,7 @@ jobs: rust-toolchain: stable docker-options: -e CI # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846 - maturin-version: 1.7.8 + maturin-version: 1.7.4 before-script-linux: | # If we're running on rhel centos, install needed packages. if command -v yum &> /dev/null; then @@ -81,7 +81,7 @@ jobs: - uses: PyO3/maturin-action@v1 with: # https://github.com/PyO3/maturin/issues/2336#issuecomment-2507418846 - maturin-version: 1.7.8 + maturin-version: 1.7.4 command: sdist args: --out dist rust-toolchain: stable From 190becd8f013e6e442eaa98b029b6eefd0369c3c Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:29:59 +1100 Subject: [PATCH 25/45] fix: match on remote store name in StoreConfig::extract_bound --- src/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a2889a0..db63eaa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,19 +74,19 @@ impl<'py> FromPyObject<'py> for StoreConfig { } "RemoteStore" => { let fs = store.getattr("fs")?; - let name = fs.get_type().name()?; + let fs_name = fs.get_type().name()?; + let fs_name = fs_name.to_str()?; let path: String = store.getattr("path")?.extract()?; let storage_options: HashMap> = fs.getattr("storage_options")?.extract()?; - if name == "HTTPFileSystem" { - Ok(StoreConfig::Http(HttpStoreConfig::new( + match fs_name { + "HTTPFileSystem" => Ok(StoreConfig::Http(HttpStoreConfig::new( &path, &storage_options, - )?)) - } else { - Err(PyErr::new::( + )?)), + _ => Err(PyErr::new::( "zarrs-python only supports a HTTPFileSystem RemoteStore".to_string(), - )) + )), } } _ => Err(PyErr::new::( From be244e1f15f6c5e37dcf36b0ff9e60d2660dd863 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:32:59 +1100 Subject: [PATCH 26/45] fix: remove MATURIN_USE_XWIN This might need to come back next time we bump maturin. 1.7.8 was not correctly using xwin on windows aarch64, but it was broken anyway due to ring: https://github.com/rust-cross/cargo-xwin/issues/76 --- .github/workflows/cd.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 562fd59..e49f458 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -39,8 +39,6 @@ jobs: - os: windows target: aarch64 runs-on: ${{ (matrix.os == 'linux' && 'ubuntu') || matrix.os }}-latest - env: - MATURIN_USE_XWIN: ${{ matrix.os == 'windows' && matrix.target == 'aarch64' && '1' || '0' }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 From d1e3405f957454a2a132ddad4da9f64d11356612 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Tue, 10 Dec 2024 06:36:20 +1100 Subject: [PATCH 27/45] fix: remove zarrs config setup in http tests --- tests/test_zarrs_http.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_zarrs_http.py b/tests/test_zarrs_http.py index a7ea726..7b23db1 100644 --- a/tests/test_zarrs_http.py +++ b/tests/test_zarrs_http.py @@ -6,8 +6,6 @@ import zarr from zarr.storage.remote import RemoteStore -import zarrs # noqa: F401 - ARR_REF = np.array( [ [np.nan, np.nan, np.nan, np.nan, 0.1, 0.1, -0.6, 0.1], @@ -25,7 +23,6 @@ def test_zarrs_http(): - zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) arr = zarr.open(URL) assert arr.shape == (8, 8) assert np.allclose(arr[:], ARR_REF, equal_nan=True) @@ -33,7 +30,6 @@ def test_zarrs_http(): @pytest.mark.xfail(reason="Storage options are not supported for HTTP store") def test_zarrs_http_kwargs(): - zarr.config.set({"codec_pipeline.path": "zarrs.ZarrsCodecPipeline"}) store = RemoteStore.from_url( URL, storage_options={"auth": aiohttp.BasicAuth("user", "pass")} ) From 71f1698e83548711cd9c263c89465e656971df28 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 06:19:24 +1100 Subject: [PATCH 28/45] fix: TryFrom<&StoreConfig> for store --- src/lib.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index db63eaa..33d8f4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,6 +96,19 @@ impl<'py> FromPyObject<'py> for StoreConfig { } } +impl TryFrom<&StoreConfig> for Arc { + type Error = PyErr; + + fn try_from(value: &StoreConfig) -> Result { + match value { + StoreConfig::Filesystem(config) => { + Ok(Arc::new(CodecPipelineStoreFilesystem::new(config)?)) + } + StoreConfig::Http(config) => Ok(Arc::new(CodecPipelineStoreHTTP::new(config)?)), + } + } +} + impl CodecPipelineImpl { fn get_store_from_config( &self, @@ -109,14 +122,7 @@ impl CodecPipelineImpl { if let Some(gstore) = gstore.as_ref() { Ok(gstore.store()) } else { - match config { - StoreConfig::Filesystem(config) => { - *gstore = Some(Arc::new(CodecPipelineStoreFilesystem::new(config)?)); - } - StoreConfig::Http(config) => { - *gstore = Some(Arc::new(CodecPipelineStoreHTTP::new(config)?)); - } - } + *gstore = Some(config.try_into()?); let gstore = gstore.as_ref().expect("store was just initialised"); Ok(gstore.store()) } From 0643735b48cab42e2497e328e55871102f2e3f03 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 06:37:51 +1100 Subject: [PATCH 29/45] fix: add StoreConfig super class Does not actually work, not picked up by stub gen --- python/zarrs/_internal.pyi | 17 +++++++++++++---- src/chunk_item.rs | 10 +++++----- src/codec_pipeline_store_filesystem.rs | 4 ++-- src/codec_pipeline_store_http.rs | 4 ++-- src/lib.rs | 24 +++++++++++++++--------- 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 581f3a5..28ae9ab 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -23,7 +23,11 @@ class CodecPipelineImpl: chunk_descriptions: typing.Sequence[ tuple[ tuple[ - StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] + StoreConfigType, + str, + typing.Sequence[int], + str, + typing.Sequence[int], ], typing.Sequence[slice], typing.Sequence[slice], @@ -34,7 +38,7 @@ class CodecPipelineImpl: def retrieve_chunks( self, chunk_descriptions: typing.Sequence[ - tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]] + tuple[StoreConfigType, str, typing.Sequence[int], str, typing.Sequence[int]] ], ) -> list[numpy.typing.NDArray[numpy.uint8]]: ... def store_chunks_with_indices( @@ -42,7 +46,11 @@ class CodecPipelineImpl: chunk_descriptions: typing.Sequence[ tuple[ tuple[ - StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] + StoreConfigType, + str, + typing.Sequence[int], + str, + typing.Sequence[int], ], typing.Sequence[slice], typing.Sequence[slice], @@ -53,7 +61,8 @@ class CodecPipelineImpl: class FilesystemStoreConfig: ... class HttpStoreConfig: ... +class StoreConfig: ... -class StoreConfig(Enum): +class StoreConfigType(Enum): Filesystem = auto() Http = auto() diff --git a/src/chunk_item.rs b/src/chunk_item.rs index a573e86..4c3122b 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -12,11 +12,11 @@ use zarrs::{ storage::{MaybeBytes, ReadableWritableListableStorageTraits, StorageError, StoreKey}, }; -use crate::{utils::PyErrExt, StoreConfig}; +use crate::{utils::PyErrExt, StoreConfigType}; pub(crate) type Raw<'a> = ( // store - StoreConfig, + StoreConfigType, // path String, // shape @@ -36,7 +36,7 @@ pub(crate) type RawWithIndices<'a> = ( ); pub(crate) trait IntoItem: std::marker::Sized { - fn store_config(&self) -> &StoreConfig; + fn store_config(&self) -> &StoreConfigType; fn path(&self) -> &str; fn into_item( self, @@ -93,7 +93,7 @@ impl ChunksItem for WithSubset { } impl<'a> IntoItem for Raw<'a> { - fn store_config(&self) -> &StoreConfig { + fn store_config(&self) -> &StoreConfigType { &self.0 } @@ -118,7 +118,7 @@ impl<'a> IntoItem for Raw<'a> { } impl IntoItem for RawWithIndices<'_> { - fn store_config(&self) -> &StoreConfig { + fn store_config(&self) -> &StoreConfigType { &self.0 .0 } diff --git a/src/codec_pipeline_store_filesystem.rs b/src/codec_pipeline_store_filesystem.rs index 588070b..05a5ed4 100644 --- a/src/codec_pipeline_store_filesystem.rs +++ b/src/codec_pipeline_store_filesystem.rs @@ -4,14 +4,14 @@ use pyo3::{exceptions::PyRuntimeError, pyclass, PyResult}; use pyo3_stub_gen::derive::gen_stub_pyclass; use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorageTraits}; -use crate::{utils::PyErrExt, CodecPipelineStore}; +use crate::{utils::PyErrExt, CodecPipelineStore, StoreConfig}; pub struct CodecPipelineStoreFilesystem { store: Arc, } #[gen_stub_pyclass] -#[pyclass] +#[pyclass(extends=StoreConfig)] pub struct FilesystemStoreConfig { root: String, } diff --git a/src/codec_pipeline_store_http.rs b/src/codec_pipeline_store_http.rs index ba16f02..d6cec90 100644 --- a/src/codec_pipeline_store_http.rs +++ b/src/codec_pipeline_store_http.rs @@ -9,7 +9,7 @@ use zarrs_opendal::AsyncOpendalStore; use crate::{ runtime::{tokio_block_on, TokioBlockOn}, utils::PyErrExt, - CodecPipelineStore, + CodecPipelineStore, StoreConfig, }; pub struct CodecPipelineStoreHTTP { @@ -17,7 +17,7 @@ pub struct CodecPipelineStoreHTTP { } #[gen_stub_pyclass] -#[pyclass] +#[pyclass(extends=StoreConfig)] pub struct HttpStoreConfig { pub root: String, } diff --git a/src/lib.rs b/src/lib.rs index 33d8f4b..0e7cdb7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,14 +53,18 @@ pub struct CodecPipelineImpl { pub(crate) num_threads: usize, } +#[gen_stub_pyclass] +#[pyclass(subclass)] +pub struct StoreConfig; + #[gen_stub_pyclass_enum] -enum StoreConfig { +enum StoreConfigType { Filesystem(FilesystemStoreConfig), Http(HttpStoreConfig), // TODO: Add support for more stores } -impl<'py> FromPyObject<'py> for StoreConfig { +impl<'py> FromPyObject<'py> for StoreConfigType { fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { let name = store.get_type().name()?; let name = name.to_str()?; @@ -70,7 +74,9 @@ impl<'py> FromPyObject<'py> for StoreConfig { .getattr("root")? .call_method("as_posix", (), None)? .extract()?; - Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root))) + Ok(StoreConfigType::Filesystem(FilesystemStoreConfig::new( + root, + ))) } "RemoteStore" => { let fs = store.getattr("fs")?; @@ -80,7 +86,7 @@ impl<'py> FromPyObject<'py> for StoreConfig { let storage_options: HashMap> = fs.getattr("storage_options")?.extract()?; match fs_name { - "HTTPFileSystem" => Ok(StoreConfig::Http(HttpStoreConfig::new( + "HTTPFileSystem" => Ok(StoreConfigType::Http(HttpStoreConfig::new( &path, &storage_options, )?)), @@ -96,15 +102,15 @@ impl<'py> FromPyObject<'py> for StoreConfig { } } -impl TryFrom<&StoreConfig> for Arc { +impl TryFrom<&StoreConfigType> for Arc { type Error = PyErr; - fn try_from(value: &StoreConfig) -> Result { + fn try_from(value: &StoreConfigType) -> Result { match value { - StoreConfig::Filesystem(config) => { + StoreConfigType::Filesystem(config) => { Ok(Arc::new(CodecPipelineStoreFilesystem::new(config)?)) } - StoreConfig::Http(config) => Ok(Arc::new(CodecPipelineStoreHTTP::new(config)?)), + StoreConfigType::Http(config) => Ok(Arc::new(CodecPipelineStoreHTTP::new(config)?)), } } } @@ -112,7 +118,7 @@ impl TryFrom<&StoreConfig> for Arc { impl CodecPipelineImpl { fn get_store_from_config( &self, - config: &StoreConfig, + config: &StoreConfigType, ) -> PyResult> { let mut gstore = self.store.lock().map_err(|_| { PyErr::new::("failed to lock the store mutex".to_string()) From 94806cf06fcfd6948faf53a34ac10cfdbaf4b6e1 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 07:03:58 +1100 Subject: [PATCH 30/45] fix: convert local store root to string --- src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0e7cdb7..bc9802b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,10 +70,7 @@ impl<'py> FromPyObject<'py> for StoreConfigType { let name = name.to_str()?; match name { "LocalStore" => { - let root: String = store - .getattr("root")? - .call_method("as_posix", (), None)? - .extract()?; + let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?; Ok(StoreConfigType::Filesystem(FilesystemStoreConfig::new( root, ))) From 9b70af19b6d549ab1e0d38b4e94907ccb0a8c908 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 07:16:39 +1100 Subject: [PATCH 31/45] fix: change unsupported store to NotImplementedError and generalise error messages --- src/lib.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bc9802b..44fd554 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ use chunk_item::{ChunksItem, IntoItem}; use concurrency::ChunkConcurrentLimitAndCodecOptions; use numpy::npyffi::PyArrayObject; use numpy::{IntoPyArray, PyArray1, PyUntypedArray, PyUntypedArrayMethods}; -use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyNotImplementedError, PyRuntimeError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3_stub_gen::define_stub_info_gatherer; use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum, gen_stub_pymethods}; @@ -87,14 +87,14 @@ impl<'py> FromPyObject<'py> for StoreConfigType { &path, &storage_options, )?)), - _ => Err(PyErr::new::( - "zarrs-python only supports a HTTPFileSystem RemoteStore".to_string(), - )), + _ => Err(PyErr::new::(format!( + "zarrs-python does not support {fs_name} (RemoteStore) stores" + ))), } } - _ => Err(PyErr::new::( - "zarrs-python only supports LocalStore and RemoteStore".to_string(), - )), + _ => Err(PyErr::new::(format!( + "zarrs-python does not support {name} stores" + ))), } } } From 0a3bf0fa3cb327cb67325bf8bffe2c251dd4d992 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 07:17:38 +1100 Subject: [PATCH 32/45] fix: add docs for HTTP store --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d37742f..c70d2a3 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,14 @@ You can then use your `zarr` as normal (with some caveats)! We export a `ZarrsCodecPipeline` class so that `zarr-python` can use the class but it is not meant to be instantiated and we do not guarantee the stability of its API beyond what is required so that `zarr-python` can use it. Therefore, it is not documented here. We also export two errors, `DiscontiguousArrayError` and `CollapsedDimensionError` that can be thrown in the process of converting to indexers that `zarrs` can understand (see below for more details). -At the moment, we only support local filesystems but intend to support more in the future: https://github.com/ilan-gold/zarrs-python/issues/44 +At the moment, we only support a subset of the `zarr-python` stores: + +- [x] [LocalStore](https://zarr.readthedocs.io/en/main/_autoapi/zarr/storage/local/index.html) (FileSystem) +- [RemoteStore](https://zarr.readthedocs.io/en/main/_autoapi/zarr/storage/remote/index.html) + - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) + +A `NotImplemented` error will be raised if a store is not supported. +We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44. ### Configuration From 6e4279905ad8266c501f0dd99513d4717129cdd0 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 08:02:42 +1100 Subject: [PATCH 33/45] refactor: move store logic into store module --- src/lib.rs | 74 +---------------- src/store.rs | 79 +++++++++++++++++++ .../filesystem.rs} | 4 +- .../http.rs} | 3 +- 4 files changed, 88 insertions(+), 72 deletions(-) create mode 100644 src/store.rs rename src/{codec_pipeline_store_filesystem.rs => store/filesystem.rs} (92%) rename src/{codec_pipeline_store_http.rs => store/http.rs} (97%) diff --git a/src/lib.rs b/src/lib.rs index 44fd554..99362f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,15 +4,15 @@ use chunk_item::{ChunksItem, IntoItem}; use concurrency::ChunkConcurrentLimitAndCodecOptions; use numpy::npyffi::PyArrayObject; use numpy::{IntoPyArray, PyArray1, PyUntypedArray, PyUntypedArrayMethods}; -use pyo3::exceptions::{PyNotImplementedError, PyRuntimeError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3_stub_gen::define_stub_info_gatherer; -use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum, gen_stub_pymethods}; +use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon_iter_concurrent_limit::iter_concurrent_limit; use std::borrow::Cow; -use std::collections::HashMap; use std::sync::{Arc, Mutex}; +use store::{CodecPipelineStore, StoreConfigType}; use unsafe_cell_slice::UnsafeCellSlice; use zarrs::array::codec::{ ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder, StoragePartialDecoder, @@ -25,22 +25,15 @@ use zarrs::metadata::v3::MetadataV3; use zarrs::storage::{ReadableWritableListableStorageTraits, StorageHandle, StoreKey}; mod chunk_item; -mod codec_pipeline_store_filesystem; -mod codec_pipeline_store_http; mod concurrency; mod runtime; +mod store; #[cfg(test)] mod tests; mod utils; -use codec_pipeline_store_filesystem::{CodecPipelineStoreFilesystem, FilesystemStoreConfig}; -use codec_pipeline_store_http::{CodecPipelineStoreHTTP, HttpStoreConfig}; use utils::{PyErrExt, PyUntypedArrayExt}; -trait CodecPipelineStore: Send + Sync { - fn store(&self) -> Arc; -} - // TODO: Use a OnceLock for store with get_or_try_init when stabilised? #[gen_stub_pyclass] #[pyclass] @@ -53,65 +46,6 @@ pub struct CodecPipelineImpl { pub(crate) num_threads: usize, } -#[gen_stub_pyclass] -#[pyclass(subclass)] -pub struct StoreConfig; - -#[gen_stub_pyclass_enum] -enum StoreConfigType { - Filesystem(FilesystemStoreConfig), - Http(HttpStoreConfig), - // TODO: Add support for more stores -} - -impl<'py> FromPyObject<'py> for StoreConfigType { - fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { - let name = store.get_type().name()?; - let name = name.to_str()?; - match name { - "LocalStore" => { - let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?; - Ok(StoreConfigType::Filesystem(FilesystemStoreConfig::new( - root, - ))) - } - "RemoteStore" => { - let fs = store.getattr("fs")?; - let fs_name = fs.get_type().name()?; - let fs_name = fs_name.to_str()?; - let path: String = store.getattr("path")?.extract()?; - let storage_options: HashMap> = - fs.getattr("storage_options")?.extract()?; - match fs_name { - "HTTPFileSystem" => Ok(StoreConfigType::Http(HttpStoreConfig::new( - &path, - &storage_options, - )?)), - _ => Err(PyErr::new::(format!( - "zarrs-python does not support {fs_name} (RemoteStore) stores" - ))), - } - } - _ => Err(PyErr::new::(format!( - "zarrs-python does not support {name} stores" - ))), - } - } -} - -impl TryFrom<&StoreConfigType> for Arc { - type Error = PyErr; - - fn try_from(value: &StoreConfigType) -> Result { - match value { - StoreConfigType::Filesystem(config) => { - Ok(Arc::new(CodecPipelineStoreFilesystem::new(config)?)) - } - StoreConfigType::Http(config) => Ok(Arc::new(CodecPipelineStoreHTTP::new(config)?)), - } - } -} - impl CodecPipelineImpl { fn get_store_from_config( &self, diff --git a/src/store.rs b/src/store.rs new file mode 100644 index 0000000..5d3ea88 --- /dev/null +++ b/src/store.rs @@ -0,0 +1,79 @@ +use std::{collections::HashMap, sync::Arc}; + +use pyo3::{ + exceptions::PyNotImplementedError, + pyclass, + types::{PyAnyMethods, PyStringMethods, PyTypeMethods}, + Bound, FromPyObject, PyAny, PyErr, PyResult, +}; +use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum}; + +pub use filesystem::{CodecPipelineStoreFilesystem, FilesystemStoreConfig}; +pub use http::{CodecPipelineStoreHTTP, HttpStoreConfig}; +use zarrs::storage::ReadableWritableListableStorageTraits; + +mod filesystem; +mod http; + +pub trait CodecPipelineStore: Send + Sync { + fn store(&self) -> Arc; +} + +#[gen_stub_pyclass] +#[pyclass(subclass)] +pub struct StoreConfig; + +#[gen_stub_pyclass_enum] +pub enum StoreConfigType { + Filesystem(FilesystemStoreConfig), + Http(HttpStoreConfig), + // TODO: Add support for more stores +} + +impl<'py> FromPyObject<'py> for StoreConfigType { + fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { + let name = store.get_type().name()?; + let name = name.to_str()?; + match name { + "LocalStore" => { + let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?; + Ok(StoreConfigType::Filesystem(FilesystemStoreConfig::new( + root, + ))) + } + "RemoteStore" => { + let fs = store.getattr("fs")?; + let fs_name = fs.get_type().name()?; + let fs_name = fs_name.to_str()?; + let path: String = store.getattr("path")?.extract()?; + let storage_options: HashMap> = + fs.getattr("storage_options")?.extract()?; + match fs_name { + "HTTPFileSystem" => Ok(StoreConfigType::Http(HttpStoreConfig::new( + &path, + &storage_options, + )?)), + _ => Err(PyErr::new::(format!( + "zarrs-python does not support {fs_name} (RemoteStore) stores" + ))), + } + } + _ => Err(PyErr::new::(format!( + "zarrs-python does not support {name} stores" + ))), + } + } +} + +impl TryFrom<&StoreConfigType> for Arc { + type Error = PyErr; + + fn try_from(value: &StoreConfigType) -> Result { + match value { + StoreConfigType::Filesystem(config) => { + Ok(Arc::new(CodecPipelineStoreFilesystem::new(config)?)) + } + StoreConfigType::Http(config) => Ok(Arc::new(CodecPipelineStoreHTTP::new(config)?)), + } + } +} diff --git a/src/codec_pipeline_store_filesystem.rs b/src/store/filesystem.rs similarity index 92% rename from src/codec_pipeline_store_filesystem.rs rename to src/store/filesystem.rs index 05a5ed4..aeae809 100644 --- a/src/codec_pipeline_store_filesystem.rs +++ b/src/store/filesystem.rs @@ -4,7 +4,9 @@ use pyo3::{exceptions::PyRuntimeError, pyclass, PyResult}; use pyo3_stub_gen::derive::gen_stub_pyclass; use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorageTraits}; -use crate::{utils::PyErrExt, CodecPipelineStore, StoreConfig}; +use crate::utils::PyErrExt; + +use super::{CodecPipelineStore, StoreConfig}; pub struct CodecPipelineStoreFilesystem { store: Arc, diff --git a/src/codec_pipeline_store_http.rs b/src/store/http.rs similarity index 97% rename from src/codec_pipeline_store_http.rs rename to src/store/http.rs index d6cec90..7b5342e 100644 --- a/src/codec_pipeline_store_http.rs +++ b/src/store/http.rs @@ -9,9 +9,10 @@ use zarrs_opendal::AsyncOpendalStore; use crate::{ runtime::{tokio_block_on, TokioBlockOn}, utils::PyErrExt, - CodecPipelineStore, StoreConfig, }; +use super::{CodecPipelineStore, StoreConfig}; + pub struct CodecPipelineStoreHTTP { store: Arc>, } From db7c62236d0cb082ce7358babc593fb3bb33800c Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 08:18:29 +1100 Subject: [PATCH 34/45] refactor: remove CodecPipelineStore --- src/lib.rs | 9 +++++---- src/store.rs | 41 ++++++++++++++++++++++++++++------------- src/store/filesystem.rs | 24 ++++++++---------------- src/store/http.rs | 36 +++++++++--------------------------- 4 files changed, 50 insertions(+), 60 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 99362f1..0aa6ca9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![warn(clippy::pedantic)] +#![allow(clippy::module_name_repetitions)] use chunk_item::{ChunksItem, IntoItem}; use concurrency::ChunkConcurrentLimitAndCodecOptions; @@ -12,7 +13,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon_iter_concurrent_limit::iter_concurrent_limit; use std::borrow::Cow; use std::sync::{Arc, Mutex}; -use store::{CodecPipelineStore, StoreConfigType}; +use store::StoreConfigType; use unsafe_cell_slice::UnsafeCellSlice; use zarrs::array::codec::{ ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder, StoragePartialDecoder, @@ -39,7 +40,7 @@ use utils::{PyErrExt, PyUntypedArrayExt}; #[pyclass] pub struct CodecPipelineImpl { pub(crate) codec_chain: Arc, - pub(crate) store: Mutex>>, + pub(crate) store: Mutex>>, pub(crate) codec_options: CodecOptions, pub(crate) chunk_concurrent_minimum: usize, pub(crate) chunk_concurrent_maximum: usize, @@ -57,11 +58,11 @@ impl CodecPipelineImpl { // TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here if let Some(gstore) = gstore.as_ref() { - Ok(gstore.store()) + Ok(gstore.clone()) } else { *gstore = Some(config.try_into()?); let gstore = gstore.as_ref().expect("store was just initialised"); - Ok(gstore.store()) + Ok(gstore.clone()) } } diff --git a/src/store.rs b/src/store.rs index 5d3ea88..7664d4f 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,24 +1,30 @@ use std::{collections::HashMap, sync::Arc}; +use opendal::Builder; use pyo3::{ - exceptions::PyNotImplementedError, + exceptions::{PyNotImplementedError, PyValueError}, pyclass, types::{PyAnyMethods, PyStringMethods, PyTypeMethods}, Bound, FromPyObject, PyAny, PyErr, PyResult, }; use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum}; -pub use filesystem::{CodecPipelineStoreFilesystem, FilesystemStoreConfig}; -pub use http::{CodecPipelineStoreHTTP, HttpStoreConfig}; -use zarrs::storage::ReadableWritableListableStorageTraits; +pub use filesystem::FilesystemStoreConfig; +pub use http::HttpStoreConfig; +use zarrs::storage::{ + storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, + ReadableWritableListableStorageTraits, +}; +use zarrs_opendal::AsyncOpendalStore; + +use crate::{ + runtime::{tokio_block_on, TokioBlockOn}, + utils::PyErrExt, +}; mod filesystem; mod http; -pub trait CodecPipelineStore: Send + Sync { - fn store(&self) -> Arc; -} - #[gen_stub_pyclass] #[pyclass(subclass)] pub struct StoreConfig; @@ -65,15 +71,24 @@ impl<'py> FromPyObject<'py> for StoreConfigType { } } -impl TryFrom<&StoreConfigType> for Arc { +impl TryFrom<&StoreConfigType> for Arc { type Error = PyErr; fn try_from(value: &StoreConfigType) -> Result { match value { - StoreConfigType::Filesystem(config) => { - Ok(Arc::new(CodecPipelineStoreFilesystem::new(config)?)) - } - StoreConfigType::Http(config) => Ok(Arc::new(CodecPipelineStoreHTTP::new(config)?)), + StoreConfigType::Filesystem(config) => config.try_into(), + StoreConfigType::Http(config) => config.try_into(), } } } + +type OpendalStoreSync = Arc>; + +fn opendal_builder_to_sync_store(builder: B) -> PyResult { + let operator = opendal::Operator::new(builder) + .map_py_err::()? + .finish(); + let store = Arc::new(zarrs_opendal::AsyncOpendalStore::new(operator)); + let store = Arc::new(AsyncToSyncStorageAdapter::new(store, tokio_block_on())); + Ok(store) +} diff --git a/src/store/filesystem.rs b/src/store/filesystem.rs index aeae809..9cfe392 100644 --- a/src/store/filesystem.rs +++ b/src/store/filesystem.rs @@ -1,16 +1,12 @@ use std::sync::Arc; -use pyo3::{exceptions::PyRuntimeError, pyclass, PyResult}; +use pyo3::{exceptions::PyRuntimeError, pyclass, PyErr}; use pyo3_stub_gen::derive::gen_stub_pyclass; use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorageTraits}; use crate::utils::PyErrExt; -use super::{CodecPipelineStore, StoreConfig}; - -pub struct CodecPipelineStoreFilesystem { - store: Arc, -} +use super::StoreConfig; #[gen_stub_pyclass] #[pyclass(extends=StoreConfig)] @@ -24,16 +20,12 @@ impl FilesystemStoreConfig { } } -impl CodecPipelineStoreFilesystem { - pub fn new(config: &FilesystemStoreConfig) -> PyResult { - let store = - Arc::new(FilesystemStore::new(config.root.clone()).map_py_err::()?); - Ok(Self { store }) - } -} +impl TryInto> for &FilesystemStoreConfig { + type Error = PyErr; -impl CodecPipelineStore for CodecPipelineStoreFilesystem { - fn store(&self) -> Arc { - self.store.clone() + fn try_into(self) -> Result, Self::Error> { + let store: Arc = + Arc::new(FilesystemStore::new(self.root.clone()).map_py_err::()?); + Ok(store) } } diff --git a/src/store/http.rs b/src/store/http.rs index 7b5342e..110c0d3 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -1,21 +1,10 @@ use std::{collections::HashMap, sync::Arc}; -use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyResult}; +use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyErr, PyResult}; use pyo3_stub_gen::derive::gen_stub_pyclass; -use zarrs::storage::storage_adapter::async_to_sync::AsyncToSyncStorageAdapter; use zarrs::storage::ReadableWritableListableStorageTraits; -use zarrs_opendal::AsyncOpendalStore; -use crate::{ - runtime::{tokio_block_on, TokioBlockOn}, - utils::PyErrExt, -}; - -use super::{CodecPipelineStore, StoreConfig}; - -pub struct CodecPipelineStoreHTTP { - store: Arc>, -} +use super::{opendal_builder_to_sync_store, StoreConfig}; #[gen_stub_pyclass] #[pyclass(extends=StoreConfig)] @@ -45,20 +34,13 @@ impl HttpStoreConfig { } } -impl CodecPipelineStoreHTTP { - pub fn new(config: &HttpStoreConfig) -> PyResult { - let builder = opendal::services::Http::default().endpoint(&config.root); - let operator = opendal::Operator::new(builder) - .map_py_err::()? - .finish(); - let store = Arc::new(zarrs_opendal::AsyncOpendalStore::new(operator)); - let store = Arc::new(AsyncToSyncStorageAdapter::new(store, tokio_block_on())); - Ok(Self { store }) - } -} +impl TryInto> for &HttpStoreConfig { + type Error = PyErr; -impl CodecPipelineStore for CodecPipelineStoreHTTP { - fn store(&self) -> Arc { - self.store.clone() + fn try_into(self) -> Result, Self::Error> { + let builder = opendal::services::Http::default().endpoint(&self.root); + let store: Arc = + opendal_builder_to_sync_store(builder)?; + Ok(store) } } From c288bdae2137221f3fa67c6e24a92e1c197979aa Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 08:28:06 +1100 Subject: [PATCH 35/45] fix: return store directly from opendal_builder_to_sync_store --- src/store.rs | 12 ++++-------- src/store/filesystem.rs | 2 +- src/store/http.rs | 4 +--- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/store.rs b/src/store.rs index 7664d4f..1c28cd3 100644 --- a/src/store.rs +++ b/src/store.rs @@ -15,12 +15,8 @@ use zarrs::storage::{ storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, ReadableWritableListableStorageTraits, }; -use zarrs_opendal::AsyncOpendalStore; -use crate::{ - runtime::{tokio_block_on, TokioBlockOn}, - utils::PyErrExt, -}; +use crate::{runtime::tokio_block_on, utils::PyErrExt}; mod filesystem; mod http; @@ -82,9 +78,9 @@ impl TryFrom<&StoreConfigType> for Arc>; - -fn opendal_builder_to_sync_store(builder: B) -> PyResult { +fn opendal_builder_to_sync_store( + builder: B, +) -> PyResult> { let operator = opendal::Operator::new(builder) .map_py_err::()? .finish(); diff --git a/src/store/filesystem.rs b/src/store/filesystem.rs index 9cfe392..11da371 100644 --- a/src/store/filesystem.rs +++ b/src/store/filesystem.rs @@ -24,7 +24,7 @@ impl TryInto> for &FilesystemStor type Error = PyErr; fn try_into(self) -> Result, Self::Error> { - let store: Arc = + let store = Arc::new(FilesystemStore::new(self.root.clone()).map_py_err::()?); Ok(store) } diff --git a/src/store/http.rs b/src/store/http.rs index 110c0d3..e22d4c2 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -39,8 +39,6 @@ impl TryInto> for &HttpStoreConfi fn try_into(self) -> Result, Self::Error> { let builder = opendal::services::Http::default().endpoint(&self.root); - let store: Arc = - opendal_builder_to_sync_store(builder)?; - Ok(store) + opendal_builder_to_sync_store(builder) } } From 428710a94475cf01d2dbc077f51e17b4b0ce48f7 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Wed, 11 Dec 2024 08:36:24 +1100 Subject: [PATCH 36/45] fix: use zarrs::ReadableWritableListableStorage --- src/chunk_item.rs | 18 +++++++++--------- src/lib.rs | 12 ++++++------ src/store.rs | 7 +++---- src/store/filesystem.rs | 6 +++--- src/store/http.rs | 8 ++++---- 5 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/chunk_item.rs b/src/chunk_item.rs index 4c3122b..d4d3c8c 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -1,4 +1,4 @@ -use std::{num::NonZeroU64, sync::Arc}; +use std::num::NonZeroU64; use pyo3::{ exceptions::{PyRuntimeError, PyValueError}, @@ -9,7 +9,7 @@ use zarrs::{ array::{ChunkRepresentation, DataType, FillValue}, array_subset::ArraySubset, metadata::v3::{array::data_type::DataTypeMetadataV3, MetadataV3}, - storage::{MaybeBytes, ReadableWritableListableStorageTraits, StorageError, StoreKey}, + storage::{MaybeBytes, ReadableWritableListableStorage, StorageError, StoreKey}, }; use crate::{utils::PyErrExt, StoreConfigType}; @@ -40,14 +40,14 @@ pub(crate) trait IntoItem: std::marker::Sized { fn path(&self) -> &str; fn into_item( self, - store: Arc, + store: ReadableWritableListableStorage, key: StoreKey, shape: S, ) -> PyResult; } pub(crate) trait ChunksItem { - fn store(&self) -> Arc; + fn store(&self) -> ReadableWritableListableStorage; fn key(&self) -> &StoreKey; fn representation(&self) -> &ChunkRepresentation; @@ -57,7 +57,7 @@ pub(crate) trait ChunksItem { } pub(crate) struct Basic { - store: Arc, + store: ReadableWritableListableStorage, key: StoreKey, representation: ChunkRepresentation, } @@ -69,7 +69,7 @@ pub(crate) struct WithSubset { } impl ChunksItem for Basic { - fn store(&self) -> Arc { + fn store(&self) -> ReadableWritableListableStorage { self.store.clone() } fn key(&self) -> &StoreKey { @@ -81,7 +81,7 @@ impl ChunksItem for Basic { } impl ChunksItem for WithSubset { - fn store(&self) -> Arc { + fn store(&self) -> ReadableWritableListableStorage { self.item.store.clone() } fn key(&self) -> &StoreKey { @@ -103,7 +103,7 @@ impl<'a> IntoItem for Raw<'a> { fn into_item( self, - store: Arc, + store: ReadableWritableListableStorage, key: StoreKey, (): (), ) -> PyResult { @@ -128,7 +128,7 @@ impl IntoItem for RawWithIndices<'_> { fn into_item( self, - store: Arc, + store: ReadableWritableListableStorage, key: StoreKey, shape: &[u64], ) -> PyResult { diff --git a/src/lib.rs b/src/lib.rs index 0aa6ca9..131a67b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ use zarrs::array::{ }; use zarrs::array_subset::ArraySubset; use zarrs::metadata::v3::MetadataV3; -use zarrs::storage::{ReadableWritableListableStorageTraits, StorageHandle, StoreKey}; +use zarrs::storage::{ReadableWritableListableStorage, StorageHandle, StoreKey}; mod chunk_item; mod concurrency; @@ -40,7 +40,7 @@ use utils::{PyErrExt, PyUntypedArrayExt}; #[pyclass] pub struct CodecPipelineImpl { pub(crate) codec_chain: Arc, - pub(crate) store: Mutex>>, + pub(crate) store: Mutex>, pub(crate) codec_options: CodecOptions, pub(crate) chunk_concurrent_minimum: usize, pub(crate) chunk_concurrent_maximum: usize, @@ -51,7 +51,7 @@ impl CodecPipelineImpl { fn get_store_from_config( &self, config: &StoreConfigType, - ) -> PyResult> { + ) -> PyResult { let mut gstore = self.store.lock().map_err(|_| { PyErr::new::("failed to lock the store mutex".to_string()) })?; @@ -60,9 +60,9 @@ impl CodecPipelineImpl { if let Some(gstore) = gstore.as_ref() { Ok(gstore.clone()) } else { - *gstore = Some(config.try_into()?); - let gstore = gstore.as_ref().expect("store was just initialised"); - Ok(gstore.clone()) + let store: ReadableWritableListableStorage = config.try_into()?; + *gstore = Some(store.clone()); + Ok(store) } } diff --git a/src/store.rs b/src/store.rs index 1c28cd3..e4a407e 100644 --- a/src/store.rs +++ b/src/store.rs @@ -12,8 +12,7 @@ use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum}; pub use filesystem::FilesystemStoreConfig; pub use http::HttpStoreConfig; use zarrs::storage::{ - storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, - ReadableWritableListableStorageTraits, + storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, ReadableWritableListableStorage, }; use crate::{runtime::tokio_block_on, utils::PyErrExt}; @@ -67,7 +66,7 @@ impl<'py> FromPyObject<'py> for StoreConfigType { } } -impl TryFrom<&StoreConfigType> for Arc { +impl TryFrom<&StoreConfigType> for ReadableWritableListableStorage { type Error = PyErr; fn try_from(value: &StoreConfigType) -> Result { @@ -80,7 +79,7 @@ impl TryFrom<&StoreConfigType> for Arc( builder: B, -) -> PyResult> { +) -> PyResult { let operator = opendal::Operator::new(builder) .map_py_err::()? .finish(); diff --git a/src/store/filesystem.rs b/src/store/filesystem.rs index 11da371..6eb8977 100644 --- a/src/store/filesystem.rs +++ b/src/store/filesystem.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use pyo3::{exceptions::PyRuntimeError, pyclass, PyErr}; use pyo3_stub_gen::derive::gen_stub_pyclass; -use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorageTraits}; +use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorage}; use crate::utils::PyErrExt; @@ -20,10 +20,10 @@ impl FilesystemStoreConfig { } } -impl TryInto> for &FilesystemStoreConfig { +impl TryInto for &FilesystemStoreConfig { type Error = PyErr; - fn try_into(self) -> Result, Self::Error> { + fn try_into(self) -> Result { let store = Arc::new(FilesystemStore::new(self.root.clone()).map_py_err::()?); Ok(store) diff --git a/src/store/http.rs b/src/store/http.rs index e22d4c2..c4692ac 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -1,8 +1,8 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyErr, PyResult}; use pyo3_stub_gen::derive::gen_stub_pyclass; -use zarrs::storage::ReadableWritableListableStorageTraits; +use zarrs::storage::ReadableWritableListableStorage; use super::{opendal_builder_to_sync_store, StoreConfig}; @@ -34,10 +34,10 @@ impl HttpStoreConfig { } } -impl TryInto> for &HttpStoreConfig { +impl TryInto for &HttpStoreConfig { type Error = PyErr; - fn try_into(self) -> Result, Self::Error> { + fn try_into(self) -> Result { let builder = opendal::services::Http::default().endpoint(&self.root); opendal_builder_to_sync_store(builder) } From fd0c6b4915b67c0ccaa7c91881d1263c87d17f22 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 12 Dec 2024 07:19:42 +1100 Subject: [PATCH 37/45] fix(docs): clarify NotImplementedError Co-authored-by: Philipp A. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e13df3..c48bebe 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ At the moment, we only support a subset of the `zarr-python` stores: - [RemoteStore](https://zarr.readthedocs.io/en/main/_autoapi/zarr/storage/remote/index.html) - [x] [HTTPFileSystem](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.implementations.http.HTTPFileSystem) -A `NotImplemented` error will be raised if a store is not supported. +A `NotImplementedError` will be raised if a store is not supported. We intend to support more stores in the future: https://github.com/ilan-gold/zarrs-python/issues/44. ### Configuration From 78d9642812c928927d690afa85acb179fcc88d42 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 12 Dec 2024 11:44:03 +0100 Subject: [PATCH 38/45] Revert store enum changes --- src/chunk_item.rs | 10 +++++----- src/lib.rs | 4 ++-- src/store.rs | 20 ++++++++------------ src/store/filesystem.rs | 4 +--- src/store/http.rs | 4 ++-- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/chunk_item.rs b/src/chunk_item.rs index d4d3c8c..aea98d8 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -12,11 +12,11 @@ use zarrs::{ storage::{MaybeBytes, ReadableWritableListableStorage, StorageError, StoreKey}, }; -use crate::{utils::PyErrExt, StoreConfigType}; +use crate::{utils::PyErrExt, StoreConfig}; pub(crate) type Raw<'a> = ( // store - StoreConfigType, + StoreConfig, // path String, // shape @@ -36,7 +36,7 @@ pub(crate) type RawWithIndices<'a> = ( ); pub(crate) trait IntoItem: std::marker::Sized { - fn store_config(&self) -> &StoreConfigType; + fn store_config(&self) -> &StoreConfig; fn path(&self) -> &str; fn into_item( self, @@ -93,7 +93,7 @@ impl ChunksItem for WithSubset { } impl<'a> IntoItem for Raw<'a> { - fn store_config(&self) -> &StoreConfigType { + fn store_config(&self) -> &StoreConfig { &self.0 } @@ -118,7 +118,7 @@ impl<'a> IntoItem for Raw<'a> { } impl IntoItem for RawWithIndices<'_> { - fn store_config(&self) -> &StoreConfigType { + fn store_config(&self) -> &StoreConfig { &self.0 .0 } diff --git a/src/lib.rs b/src/lib.rs index 131a67b..123f8d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon_iter_concurrent_limit::iter_concurrent_limit; use std::borrow::Cow; use std::sync::{Arc, Mutex}; -use store::StoreConfigType; +use store::StoreConfig; use unsafe_cell_slice::UnsafeCellSlice; use zarrs::array::codec::{ ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder, StoragePartialDecoder, @@ -50,7 +50,7 @@ pub struct CodecPipelineImpl { impl CodecPipelineImpl { fn get_store_from_config( &self, - config: &StoreConfigType, + config: &StoreConfig, ) -> PyResult { let mut gstore = self.store.lock().map_err(|_| { PyErr::new::("failed to lock the store mutex".to_string()) diff --git a/src/store.rs b/src/store.rs index e4a407e..1683c4d 100644 --- a/src/store.rs +++ b/src/store.rs @@ -20,25 +20,21 @@ use crate::{runtime::tokio_block_on, utils::PyErrExt}; mod filesystem; mod http; -#[gen_stub_pyclass] -#[pyclass(subclass)] -pub struct StoreConfig; - #[gen_stub_pyclass_enum] -pub enum StoreConfigType { +pub enum StoreConfig { Filesystem(FilesystemStoreConfig), Http(HttpStoreConfig), // TODO: Add support for more stores } -impl<'py> FromPyObject<'py> for StoreConfigType { +impl<'py> FromPyObject<'py> for StoreConfig { fn extract_bound(store: &Bound<'py, PyAny>) -> PyResult { let name = store.get_type().name()?; let name = name.to_str()?; match name { "LocalStore" => { let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?; - Ok(StoreConfigType::Filesystem(FilesystemStoreConfig::new( + Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new( root, ))) } @@ -50,7 +46,7 @@ impl<'py> FromPyObject<'py> for StoreConfigType { let storage_options: HashMap> = fs.getattr("storage_options")?.extract()?; match fs_name { - "HTTPFileSystem" => Ok(StoreConfigType::Http(HttpStoreConfig::new( + "HTTPFileSystem" => Ok(StoreConfig::Http(HttpStoreConfig::new( &path, &storage_options, )?)), @@ -66,13 +62,13 @@ impl<'py> FromPyObject<'py> for StoreConfigType { } } -impl TryFrom<&StoreConfigType> for ReadableWritableListableStorage { +impl TryFrom<&StoreConfig> for ReadableWritableListableStorage { type Error = PyErr; - fn try_from(value: &StoreConfigType) -> Result { + fn try_from(value: &StoreConfig) -> Result { match value { - StoreConfigType::Filesystem(config) => config.try_into(), - StoreConfigType::Http(config) => config.try_into(), + StoreConfig::Filesystem(config) => config.try_into(), + StoreConfig::Http(config) => config.try_into(), } } } diff --git a/src/store/filesystem.rs b/src/store/filesystem.rs index 6eb8977..5f30854 100644 --- a/src/store/filesystem.rs +++ b/src/store/filesystem.rs @@ -6,10 +6,8 @@ use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorag use crate::utils::PyErrExt; -use super::StoreConfig; - #[gen_stub_pyclass] -#[pyclass(extends=StoreConfig)] +#[pyclass] pub struct FilesystemStoreConfig { root: String, } diff --git a/src/store/http.rs b/src/store/http.rs index c4692ac..084054d 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -4,10 +4,10 @@ use pyo3::{exceptions::PyValueError, pyclass, Bound, PyAny, PyErr, PyResult}; use pyo3_stub_gen::derive::gen_stub_pyclass; use zarrs::storage::ReadableWritableListableStorage; -use super::{opendal_builder_to_sync_store, StoreConfig}; +use super::opendal_builder_to_sync_store; #[gen_stub_pyclass] -#[pyclass(extends=StoreConfig)] +#[pyclass] pub struct HttpStoreConfig { pub root: String, } From 028826e42fab6cac9fa24224b1283f72a30056bb Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 12 Dec 2024 11:48:39 +0100 Subject: [PATCH 39/45] fmt --- src/store.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/store.rs b/src/store.rs index 1683c4d..75719a9 100644 --- a/src/store.rs +++ b/src/store.rs @@ -34,9 +34,7 @@ impl<'py> FromPyObject<'py> for StoreConfig { match name { "LocalStore" => { let root: String = store.getattr("root")?.call_method0("__str__")?.extract()?; - Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new( - root, - ))) + Ok(StoreConfig::Filesystem(FilesystemStoreConfig::new(root))) } "RemoteStore" => { let fs = store.getattr("fs")?; From 97f94285023c112810e465f1f5324b1c069601aa Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 12 Dec 2024 12:05:44 +0100 Subject: [PATCH 40/45] update stubs --- python/zarrs/_internal.pyi | 76 ++++++++++---------------------------- src/store.rs | 3 +- src/store/filesystem.rs | 4 +- src/store/http.rs | 2 + 4 files changed, 25 insertions(+), 60 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 28ae9ab..b218a9f 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -1,68 +1,30 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 -import typing -from enum import Enum, auto - import numpy import numpy.typing +import typing +from enum import Enum, auto class CodecPipelineImpl: - def __new__( - cls, - metadata, - *, - validate_checksums=..., - store_empty_chunks=..., - chunk_concurrent_minimum=..., - chunk_concurrent_maximum=..., - num_threads=..., - ): ... - def retrieve_chunks_and_apply_index( - self, - chunk_descriptions: typing.Sequence[ - tuple[ - tuple[ - StoreConfigType, - str, - typing.Sequence[int], - str, - typing.Sequence[int], - ], - typing.Sequence[slice], - typing.Sequence[slice], - ] - ], - value: numpy.NDArray[typing.Any], - ) -> None: ... - def retrieve_chunks( - self, - chunk_descriptions: typing.Sequence[ - tuple[StoreConfigType, str, typing.Sequence[int], str, typing.Sequence[int]] - ], - ) -> list[numpy.typing.NDArray[numpy.uint8]]: ... - def store_chunks_with_indices( - self, - chunk_descriptions: typing.Sequence[ - tuple[ - tuple[ - StoreConfigType, - str, - typing.Sequence[int], - str, - typing.Sequence[int], - ], - typing.Sequence[slice], - typing.Sequence[slice], - ] - ], - value: numpy.NDArray[typing.Any], - ) -> None: ... + def __new__(cls,metadata,*,validate_checksums = ...,store_empty_chunks = ...,chunk_concurrent_minimum = ...,chunk_concurrent_maximum = ...,num_threads = ...): ... + def retrieve_chunks_and_apply_index(self, chunk_descriptions:typing.Sequence[tuple[tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]], typing.Sequence[slice], typing.Sequence[slice]]], value:numpy.NDArray[typing.Any]) -> None: + ... -class FilesystemStoreConfig: ... -class HttpStoreConfig: ... -class StoreConfig: ... + def retrieve_chunks(self, chunk_descriptions:typing.Sequence[tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]]]) -> list[numpy.typing.NDArray[numpy.uint8]]: + ... -class StoreConfigType(Enum): + def store_chunks_with_indices(self, chunk_descriptions:typing.Sequence[tuple[tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]], typing.Sequence[slice], typing.Sequence[slice]]], value:numpy.NDArray[typing.Any]) -> None: + ... + + +class FilesystemStoreConfig: + root: str + +class HttpStoreConfig: + root: str + +class StoreConfig(Enum): Filesystem = auto() Http = auto() + diff --git a/src/store.rs b/src/store.rs index 75719a9..a44f77a 100644 --- a/src/store.rs +++ b/src/store.rs @@ -3,11 +3,10 @@ use std::{collections::HashMap, sync::Arc}; use opendal::Builder; use pyo3::{ exceptions::{PyNotImplementedError, PyValueError}, - pyclass, types::{PyAnyMethods, PyStringMethods, PyTypeMethods}, Bound, FromPyObject, PyAny, PyErr, PyResult, }; -use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pyclass_enum}; +use pyo3_stub_gen::derive::gen_stub_pyclass_enum; pub use filesystem::FilesystemStoreConfig; pub use http::HttpStoreConfig; diff --git a/src/store/filesystem.rs b/src/store/filesystem.rs index 5f30854..9bf5b16 100644 --- a/src/store/filesystem.rs +++ b/src/store/filesystem.rs @@ -6,10 +6,12 @@ use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorag use crate::utils::PyErrExt; +#[derive(Debug, Clone)] #[gen_stub_pyclass] #[pyclass] pub struct FilesystemStoreConfig { - root: String, + #[pyo3(get, set)] + pub root: String, } impl FilesystemStoreConfig { diff --git a/src/store/http.rs b/src/store/http.rs index 084054d..cbeb866 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -6,9 +6,11 @@ use zarrs::storage::ReadableWritableListableStorage; use super::opendal_builder_to_sync_store; +#[derive(Debug, Clone)] #[gen_stub_pyclass] #[pyclass] pub struct HttpStoreConfig { + #[pyo3(get, set)] pub root: String, } From 41ee02e8ef0cca32f21dad7b02ad1af052858dee Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Thu, 12 Dec 2024 12:07:58 +0100 Subject: [PATCH 41/45] wtf --- python/zarrs/_internal.pyi | 58 +++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index b218a9f..c63a087 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -1,22 +1,55 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 -import numpy -import numpy.typing import typing from enum import Enum, auto -class CodecPipelineImpl: - def __new__(cls,metadata,*,validate_checksums = ...,store_empty_chunks = ...,chunk_concurrent_minimum = ...,chunk_concurrent_maximum = ...,num_threads = ...): ... - def retrieve_chunks_and_apply_index(self, chunk_descriptions:typing.Sequence[tuple[tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]], typing.Sequence[slice], typing.Sequence[slice]]], value:numpy.NDArray[typing.Any]) -> None: - ... - - def retrieve_chunks(self, chunk_descriptions:typing.Sequence[tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]]]) -> list[numpy.typing.NDArray[numpy.uint8]]: - ... - - def store_chunks_with_indices(self, chunk_descriptions:typing.Sequence[tuple[tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]], typing.Sequence[slice], typing.Sequence[slice]]], value:numpy.NDArray[typing.Any]) -> None: - ... +import numpy +import numpy.typing +class CodecPipelineImpl: + def __new__( + cls, + metadata, + *, + validate_checksums=..., + store_empty_chunks=..., + chunk_concurrent_minimum=..., + chunk_concurrent_maximum=..., + num_threads=..., + ): ... + def retrieve_chunks_and_apply_index( + self, + chunk_descriptions: typing.Sequence[ + tuple[ + tuple[ + StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] + ], + typing.Sequence[slice], + typing.Sequence[slice], + ] + ], + value: numpy.NDArray[typing.Any], + ) -> None: ... + def retrieve_chunks( + self, + chunk_descriptions: typing.Sequence[ + tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]] + ], + ) -> list[numpy.typing.NDArray[numpy.uint8]]: ... + def store_chunks_with_indices( + self, + chunk_descriptions: typing.Sequence[ + tuple[ + tuple[ + StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] + ], + typing.Sequence[slice], + typing.Sequence[slice], + ] + ], + value: numpy.NDArray[typing.Any], + ) -> None: ... class FilesystemStoreConfig: root: str @@ -27,4 +60,3 @@ class HttpStoreConfig: class StoreConfig(Enum): Filesystem = auto() Http = auto() - From 5873e2ae2d065cb57b8c60783d8c4b32ddb69710 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Thu, 12 Dec 2024 22:30:20 +1100 Subject: [PATCH 42/45] fix: http store root to endpoint --- python/zarrs/_internal.pyi | 2 +- src/store/http.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index c63a087..7e28264 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -55,7 +55,7 @@ class FilesystemStoreConfig: root: str class HttpStoreConfig: - root: str + endpoint: str class StoreConfig(Enum): Filesystem = auto() diff --git a/src/store/http.rs b/src/store/http.rs index cbeb866..725d5a4 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -11,7 +11,7 @@ use super::opendal_builder_to_sync_store; #[pyclass] pub struct HttpStoreConfig { #[pyo3(get, set)] - pub root: String, + pub endpoint: String, } impl HttpStoreConfig { @@ -31,7 +31,7 @@ impl HttpStoreConfig { } Ok(Self { - root: path.to_string(), + endpoint: path.to_string(), }) } } @@ -40,7 +40,7 @@ impl TryInto for &HttpStoreConfig { type Error = PyErr; fn try_into(self) -> Result { - let builder = opendal::services::Http::default().endpoint(&self.root); + let builder = opendal::services::Http::default().endpoint(&self.endpoint); opendal_builder_to_sync_store(builder) } } From 223c57dd313e11e0b48f7de4851063ccebb9d5b4 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Sat, 14 Dec 2024 14:12:52 +0100 Subject: [PATCH 43/45] Instantiate Rust containers from Python (#72) Co-authored-by: Lachlan Deakin --- python/zarrs/_internal.pyi | 39 ++++----- python/zarrs/pipeline.py | 38 ++++----- python/zarrs/utils.py | 44 +++++------ src/chunk_item.rs | 157 +++++++++++++++---------------------- src/lib.rs | 116 +++++++++++---------------- src/store.rs | 1 + src/store/filesystem.rs | 2 +- src/store/http.rs | 2 +- src/tests.rs | 14 ++-- 9 files changed, 174 insertions(+), 239 deletions(-) diff --git a/python/zarrs/_internal.pyi b/python/zarrs/_internal.pyi index 7e28264..4223e3d 100644 --- a/python/zarrs/_internal.pyi +++ b/python/zarrs/_internal.pyi @@ -7,6 +7,10 @@ from enum import Enum, auto import numpy import numpy.typing +class Basic: + def __new__(cls, byte_interface: typing.Any, chunk_spec: typing.Any): ... + ... + class CodecPipelineImpl: def __new__( cls, @@ -20,34 +24,15 @@ class CodecPipelineImpl: ): ... def retrieve_chunks_and_apply_index( self, - chunk_descriptions: typing.Sequence[ - tuple[ - tuple[ - StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] - ], - typing.Sequence[slice], - typing.Sequence[slice], - ] - ], + chunk_descriptions: typing.Sequence[WithSubset], value: numpy.NDArray[typing.Any], ) -> None: ... def retrieve_chunks( - self, - chunk_descriptions: typing.Sequence[ - tuple[StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int]] - ], + self, chunk_descriptions: typing.Sequence[Basic] ) -> list[numpy.typing.NDArray[numpy.uint8]]: ... def store_chunks_with_indices( self, - chunk_descriptions: typing.Sequence[ - tuple[ - tuple[ - StoreConfig, str, typing.Sequence[int], str, typing.Sequence[int] - ], - typing.Sequence[slice], - typing.Sequence[slice], - ] - ], + chunk_descriptions: typing.Sequence[WithSubset], value: numpy.NDArray[typing.Any], ) -> None: ... @@ -57,6 +42,16 @@ class FilesystemStoreConfig: class HttpStoreConfig: endpoint: str +class WithSubset: + def __new__( + cls, + item: Basic, + chunk_subset: typing.Sequence[slice], + subset: typing.Sequence[slice], + shape: typing.Sequence[int], + ): ... + ... + class StoreConfig(Enum): Filesystem = auto() Http = auto() diff --git a/python/zarrs/pipeline.py b/python/zarrs/pipeline.py index f6552ba..86846d2 100644 --- a/python/zarrs/pipeline.py +++ b/python/zarrs/pipeline.py @@ -6,10 +6,7 @@ from typing import TYPE_CHECKING, TypedDict import numpy as np -from zarr.abc.codec import ( - Codec, - CodecPipeline, -) +from zarr.abc.codec import Codec, CodecPipeline from zarr.core.config import config if TYPE_CHECKING: @@ -18,7 +15,7 @@ from zarr.abc.store import ByteGetter, ByteSetter from zarr.core.array_spec import ArraySpec - from zarr.core.buffer import Buffer, NDBuffer + from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import ChunkCoords from zarr.core.indexing import SelectorTuple @@ -120,19 +117,20 @@ async def read( batch_info: Iterable[ tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple] ], - out: NDBuffer, + out: NDBuffer, # type: ignore drop_axes: tuple[int, ...] = (), # FIXME: unused ) -> None: - out = out.as_ndarray_like() # FIXME: Error if array is not in host memory + # FIXME: Error if array is not in host memory + out: NDArrayLike = out.as_ndarray_like() if not out.dtype.isnative: raise RuntimeError("Non-native byte order not supported") try: - chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes) - index_in_rust = True + chunks_desc = make_chunk_info_for_rust_with_indices( + batch_info, drop_axes, out.shape + ) except (DiscontiguousArrayError, CollapsedDimensionError): chunks_desc = make_chunk_info_for_rust(batch_info) - index_in_rust = False - if index_in_rust: + else: await asyncio.to_thread( self.impl.retrieve_chunks_and_apply_index, chunks_desc, @@ -140,10 +138,7 @@ async def read( ) return None chunks = await asyncio.to_thread(self.impl.retrieve_chunks, chunks_desc) - for chunk, chunk_info in zip(chunks, batch_info): - out_selection = chunk_info[3] - selection = chunk_info[2] - spec = chunk_info[1] + for chunk, (_, spec, selection, out_selection) in zip(chunks, batch_info): chunk_reshaped = chunk.view(spec.dtype).reshape(spec.shape) chunk_selected = chunk_reshaped[selection] if drop_axes: @@ -155,18 +150,17 @@ async def write( batch_info: Iterable[ tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], - value: NDBuffer, + value: NDBuffer, # type: ignore drop_axes: tuple[int, ...] = (), ) -> None: - value = value.as_ndarray_like() # FIXME: Error if array is not in host memory + # FIXME: Error if array is not in host memory + value: NDArrayLike | np.ndarray = value.as_ndarray_like() if not value.dtype.isnative: value = np.ascontiguousarray(value, dtype=value.dtype.newbyteorder("=")) elif not value.flags.c_contiguous: value = np.ascontiguousarray(value) - chunks_desc = make_chunk_info_for_rust_with_indices(batch_info, drop_axes) - await asyncio.to_thread( - self.impl.store_chunks_with_indices, - chunks_desc, - value, + chunks_desc = make_chunk_info_for_rust_with_indices( + batch_info, drop_axes, value.shape ) + await asyncio.to_thread(self.impl.store_chunks_with_indices, chunks_desc, value) return None diff --git a/python/zarrs/utils.py b/python/zarrs/utils.py index 9e2a7b6..0aa0d0d 100644 --- a/python/zarrs/utils.py +++ b/python/zarrs/utils.py @@ -3,18 +3,19 @@ import operator import os from functools import reduce -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import numpy as np from zarr.core.indexing import SelectorTuple, is_integer +from zarrs._internal import Basic, WithSubset + if TYPE_CHECKING: from collections.abc import Iterable from types import EllipsisType - from zarr.abc.store import ByteGetter, ByteSetter, Store + from zarr.abc.store import ByteGetter, ByteSetter from zarr.core.array_spec import ArraySpec - from zarr.core.common import ChunkCoords # adapted from https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor @@ -62,18 +63,6 @@ def selector_tuple_to_slice_selection(selector_tuple: SelectorTuple) -> list[sli return make_slice_selection(selector_tuple) -def convert_chunk_to_primitive( - byte_interface: ByteGetter | ByteSetter, chunk_spec: ArraySpec -) -> tuple[Store, str, ChunkCoords, str, Any]: - return ( - byte_interface.store, - byte_interface.path, - chunk_spec.shape, - str(chunk_spec.dtype), - chunk_spec.fill_value.tobytes(), - ) - - def resulting_shape_from_index( array_shape: tuple[int, ...], index_tuple: tuple[int | slice | EllipsisType | np.ndarray], @@ -150,10 +139,12 @@ def make_chunk_info_for_rust_with_indices( tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], drop_axes: tuple[int, ...], -) -> list[tuple[tuple[Store, str, ChunkCoords, str, Any], list[slice], list[slice]]]: - chunk_info_with_indices = [] + shape: tuple[int, ...], +) -> list[WithSubset]: + shape = shape if shape else (1,) # constant array + chunk_info_with_indices: list[WithSubset] = [] for byte_getter, chunk_spec, chunk_selection, out_selection in batch_info: - chunk_info = convert_chunk_to_primitive(byte_getter, chunk_spec) + chunk_info = Basic(byte_getter, chunk_spec) out_selection_as_slices = selector_tuple_to_slice_selection(out_selection) chunk_selection_as_slices = selector_tuple_to_slice_selection(chunk_selection) shape_chunk_selection_slices = get_shape_for_selector( @@ -170,7 +161,12 @@ def make_chunk_info_for_rust_with_indices( f"{shape_chunk_selection} != {shape_chunk_selection_slices}" ) chunk_info_with_indices.append( - (chunk_info, out_selection_as_slices, chunk_selection_as_slices) + WithSubset( + chunk_info, + chunk_subset=chunk_selection_as_slices, + subset=out_selection_as_slices, + shape=shape, + ) ) return chunk_info_with_indices @@ -179,8 +175,8 @@ def make_chunk_info_for_rust( batch_info: Iterable[ tuple[ByteGetter | ByteSetter, ArraySpec, SelectorTuple, SelectorTuple] ], -) -> list[tuple[Store, str, ChunkCoords, str, Any]]: - return list( - convert_chunk_to_primitive(byte_getter, chunk_spec) - for (byte_getter, chunk_spec, _, _) in batch_info - ) +) -> list[Basic]: + return [ + Basic(byte_interface, chunk_spec) + for (byte_interface, chunk_spec, _, _) in batch_info + ] diff --git a/src/chunk_item.rs b/src/chunk_item.rs index aea98d8..6aab47a 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -2,74 +2,92 @@ use std::num::NonZeroU64; use pyo3::{ exceptions::{PyRuntimeError, PyValueError}, - types::{PySlice, PySliceMethods}, - Bound, PyErr, PyResult, + pyclass, pymethods, + types::{PyAnyMethods as _, PySlice, PySliceMethods as _}, + Bound, PyAny, PyErr, PyResult, }; +use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; use zarrs::{ array::{ChunkRepresentation, DataType, FillValue}, array_subset::ArraySubset, metadata::v3::{array::data_type::DataTypeMetadataV3, MetadataV3}, - storage::{MaybeBytes, ReadableWritableListableStorage, StorageError, StoreKey}, + storage::StoreKey, }; -use crate::{utils::PyErrExt, StoreConfig}; - -pub(crate) type Raw<'a> = ( - // store - StoreConfig, - // path - String, - // shape - Vec, - // data type - String, - // fill value bytes - Vec, -); - -pub(crate) type RawWithIndices<'a> = ( - Raw<'a>, - // out selection - Vec>, - // chunk selection - Vec>, -); - -pub(crate) trait IntoItem: std::marker::Sized { - fn store_config(&self) -> &StoreConfig; - fn path(&self) -> &str; - fn into_item( - self, - store: ReadableWritableListableStorage, - key: StoreKey, - shape: S, - ) -> PyResult; -} +use crate::{store::StoreConfig, utils::PyErrExt}; pub(crate) trait ChunksItem { - fn store(&self) -> ReadableWritableListableStorage; + fn store_config(&self) -> StoreConfig; fn key(&self) -> &StoreKey; fn representation(&self) -> &ChunkRepresentation; - - fn get(&self) -> Result { - self.store().get(self.key()) - } } +#[derive(Clone)] +#[gen_stub_pyclass] +#[pyclass] pub(crate) struct Basic { - store: ReadableWritableListableStorage, + store: StoreConfig, key: StoreKey, representation: ChunkRepresentation, } +#[gen_stub_pymethods] +#[pymethods] +impl Basic { + #[new] + fn new(byte_interface: &Bound<'_, PyAny>, chunk_spec: &Bound<'_, PyAny>) -> PyResult { + let store: StoreConfig = byte_interface.getattr("store")?.extract()?; + let path: String = byte_interface.getattr("path")?.extract()?; + + let chunk_shape = chunk_spec.getattr("shape")?.extract()?; + let dtype: String = chunk_spec + .getattr("dtype")? + .call_method0("__str__")? + .extract()?; + let fill_value = chunk_spec + .getattr("fill_value")? + .call_method0("tobytes")? + .extract()?; + Ok(Self { + store, + key: StoreKey::new(path).map_py_err::()?, + representation: get_chunk_representation(chunk_shape, &dtype, fill_value)?, + }) + } +} + +#[derive(Clone)] +#[gen_stub_pyclass] +#[pyclass] pub(crate) struct WithSubset { pub item: Basic, pub chunk_subset: ArraySubset, pub subset: ArraySubset, } +#[gen_stub_pymethods] +#[pymethods] +impl WithSubset { + #[new] + fn new( + item: Basic, + chunk_subset: Vec>, + subset: Vec>, + shape: Vec, + ) -> PyResult { + let chunk_subset = + selection_to_array_subset(&chunk_subset, &item.representation.shape_u64())?; + let subset = selection_to_array_subset(&subset, &shape)?; + Ok(Self { + item, + chunk_subset, + subset, + }) + } +} + impl ChunksItem for Basic { - fn store(&self) -> ReadableWritableListableStorage { + fn store_config(&self) -> StoreConfig { self.store.clone() } fn key(&self) -> &StoreKey { @@ -81,7 +99,7 @@ impl ChunksItem for Basic { } impl ChunksItem for WithSubset { - fn store(&self) -> ReadableWritableListableStorage { + fn store_config(&self) -> StoreConfig { self.item.store.clone() } fn key(&self) -> &StoreKey { @@ -92,57 +110,6 @@ impl ChunksItem for WithSubset { } } -impl<'a> IntoItem for Raw<'a> { - fn store_config(&self) -> &StoreConfig { - &self.0 - } - - fn path(&self) -> &str { - &self.1 - } - - fn into_item( - self, - store: ReadableWritableListableStorage, - key: StoreKey, - (): (), - ) -> PyResult { - let (_, _, chunk_shape, dtype, fill_value) = self; - let representation = get_chunk_representation(chunk_shape, &dtype, fill_value)?; - Ok(Basic { - store, - key, - representation, - }) - } -} - -impl IntoItem for RawWithIndices<'_> { - fn store_config(&self) -> &StoreConfig { - &self.0 .0 - } - - fn path(&self) -> &str { - &self.0 .1 - } - - fn into_item( - self, - store: ReadableWritableListableStorage, - key: StoreKey, - shape: &[u64], - ) -> PyResult { - let (raw, selection, chunk_selection) = self; - let chunk_shape = raw.2.clone(); - let item = raw.into_item(store.clone(), key, ())?; - Ok(WithSubset { - item, - chunk_subset: selection_to_array_subset(&chunk_selection, &chunk_shape)?, - subset: selection_to_array_subset(&selection, shape)?, - }) - } -} - fn get_chunk_representation( chunk_shape: Vec, dtype: &str, diff --git a/src/lib.rs b/src/lib.rs index 123f8d2..4c1d91c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,17 +1,18 @@ #![warn(clippy::pedantic)] #![allow(clippy::module_name_repetitions)] -use chunk_item::{ChunksItem, IntoItem}; +use chunk_item::ChunksItem; use concurrency::ChunkConcurrentLimitAndCodecOptions; use numpy::npyffi::PyArrayObject; use numpy::{IntoPyArray, PyArray1, PyUntypedArray, PyUntypedArrayMethods}; -use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyKeyError, PyRuntimeError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3_stub_gen::define_stub_info_gatherer; use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon_iter_concurrent_limit::iter_concurrent_limit; use std::borrow::Cow; +use std::collections::BTreeMap; use std::sync::{Arc, Mutex}; use store::StoreConfig; use unsafe_cell_slice::UnsafeCellSlice; @@ -23,7 +24,7 @@ use zarrs::array::{ }; use zarrs::array_subset::ArraySubset; use zarrs::metadata::v3::MetadataV3; -use zarrs::storage::{ReadableWritableListableStorage, StorageHandle, StoreKey}; +use zarrs::storage::{MaybeBytes, ReadableWritableListableStorage, StorageHandle}; mod chunk_item; mod concurrency; @@ -39,8 +40,8 @@ use utils::{PyErrExt, PyUntypedArrayExt}; #[gen_stub_pyclass] #[pyclass] pub struct CodecPipelineImpl { + pub(crate) stores: Mutex>, pub(crate) codec_chain: Arc, - pub(crate) store: Mutex>, pub(crate) codec_options: CodecOptions, pub(crate) chunk_concurrent_minimum: usize, pub(crate) chunk_concurrent_maximum: usize, @@ -48,47 +49,30 @@ pub struct CodecPipelineImpl { } impl CodecPipelineImpl { - fn get_store_from_config( - &self, - config: &StoreConfig, - ) -> PyResult { - let mut gstore = self.store.lock().map_err(|_| { - PyErr::new::("failed to lock the store mutex".to_string()) - })?; - - // TODO: Request upstream change to get store on codec pipeline initialisation, do not want to do all of this here - if let Some(gstore) = gstore.as_ref() { - Ok(gstore.clone()) - } else { - let store: ReadableWritableListableStorage = config.try_into()?; - *gstore = Some(store.clone()); - Ok(store) + fn store(&self, item: &I) -> PyResult { + use std::collections::btree_map::Entry::{Occupied, Vacant}; + match self + .stores + .lock() + .map_py_err::()? + .entry(item.store_config()) + { + Occupied(e) => Ok(e.get().clone()), + Vacant(e) => Ok(e.insert((&item.store_config()).try_into()?).clone()), } } - fn collect_chunk_descriptions, I, S: Copy>( - &self, - chunk_descriptions: Vec, - shape: S, - ) -> PyResult> { - chunk_descriptions - .into_iter() - .map(|raw| { - // TODO: Prefer to get the store once, and assume it is the same for all chunks - let store = self.get_store_from_config(raw.store_config())?; - let path = raw.path(); - let key = StoreKey::new(path).map_py_err::()?; - raw.into_item(store, key, shape) - }) - .collect() + fn get(&self, item: &I) -> PyResult { + self.store(item)?.get(item.key()).map_py_err::() } fn retrieve_chunk_bytes<'a, I: ChunksItem>( + &self, item: &I, codec_chain: &CodecChain, codec_options: &CodecOptions, ) -> PyResult> { - let value_encoded = item.get().map_py_err::()?; + let value_encoded = self.get(item).map_py_err::()?; let value_decoded = if let Some(value_encoded) = value_encoded { let value_encoded: Vec = value_encoded.into(); // zero-copy in this case codec_chain @@ -105,6 +89,7 @@ impl CodecPipelineImpl { } fn store_chunk_bytes( + &self, item: &I, codec_chain: &CodecChain, value_decoded: ArrayBytes, @@ -118,7 +103,7 @@ impl CodecPipelineImpl { .map_py_err::()?; if value_decoded.is_fill_value(item.representation().fill_value()) { - item.store().erase(item.key()) + self.store(item)?.erase(item.key()) } else { let value_encoded = codec_chain .encode(value_decoded, item.representation(), codec_options) @@ -126,40 +111,38 @@ impl CodecPipelineImpl { .map_py_err::()?; // Store the encoded chunk - item.store().set(item.key(), value_encoded.into()) + self.store(item)?.set(item.key(), value_encoded.into()) } .map_py_err::() } fn store_chunk_subset_bytes( + &self, item: &I, codec_chain: &CodecChain, chunk_subset_bytes: ArrayBytes, chunk_subset: &ArraySubset, codec_options: &CodecOptions, ) -> PyResult<()> { - if !chunk_subset.inbounds(&item.representation().shape_u64()) { - return Err(PyErr::new::( - "chunk subset is out of bounds".to_string(), - )); + let array_shape = item.representation().shape_u64(); + if !chunk_subset.inbounds(&array_shape) { + return Err(PyErr::new::(format!( + "chunk subset ({chunk_subset}) is out of bounds for array shape ({array_shape:?})" + ))); } + let data_type_size = item.representation().data_type().size(); - if chunk_subset.start().iter().all(|&o| o == 0) - && chunk_subset.shape() == item.representation().shape_u64() - { + if chunk_subset.start().iter().all(|&o| o == 0) && chunk_subset.shape() == array_shape { // Fast path if the chunk subset spans the entire chunk, no read required - Self::store_chunk_bytes(item, codec_chain, chunk_subset_bytes, codec_options) + self.store_chunk_bytes(item, codec_chain, chunk_subset_bytes, codec_options) } else { // Validate the chunk subset bytes chunk_subset_bytes - .validate( - chunk_subset.num_elements(), - item.representation().data_type().size(), - ) + .validate(chunk_subset.num_elements(), data_type_size) .map_py_err::()?; // Retrieve the chunk - let chunk_bytes_old = Self::retrieve_chunk_bytes(item, codec_chain, codec_options)?; + let chunk_bytes_old = self.retrieve_chunk_bytes(item, codec_chain, codec_options)?; // Update the chunk let chunk_bytes_new = unsafe { @@ -170,15 +153,15 @@ impl CodecPipelineImpl { // - output bytes and output subset bytes are compatible (same data type) update_array_bytes( chunk_bytes_old, - &item.representation().shape_u64(), + &array_shape, chunk_subset, &chunk_subset_bytes, - item.representation().data_type().size(), + data_type_size, ) }; // Store the updated chunk - Self::store_chunk_bytes(item, codec_chain, chunk_bytes_new, codec_options) + self.store_chunk_bytes(item, codec_chain, chunk_bytes_new, codec_options) } } @@ -270,8 +253,8 @@ impl CodecPipelineImpl { let num_threads = num_threads.unwrap_or(rayon::current_num_threads()); Ok(Self { + stores: Mutex::default(), codec_chain, - store: Mutex::new(None), codec_options, chunk_concurrent_minimum, chunk_concurrent_maximum, @@ -282,7 +265,7 @@ impl CodecPipelineImpl { fn retrieve_chunks_and_apply_index( &self, py: Python, - chunk_descriptions: Vec, // FIXME: Ref / iterable? + chunk_descriptions: Vec, // FIXME: Ref / iterable? value: &Bound<'_, PyUntypedArray>, ) -> PyResult<()> { // Get input array @@ -293,8 +276,6 @@ impl CodecPipelineImpl { } let output = Self::nparray_to_unsafe_cell_slice(value); let output_shape: Vec = value.shape_zarr()?; - let chunk_descriptions = - self.collect_chunk_descriptions(chunk_descriptions, &output_shape)?; // Adjust the concurrency based on the codec chain and the first chunk description let Some((chunk_concurrent_limit, codec_options)) = @@ -310,7 +291,7 @@ impl CodecPipelineImpl { && item.chunk_subset.shape() == item.representation().shape_u64() { // See zarrs::array::Array::retrieve_chunk_into - let chunk_encoded = item.get().map_py_err::()?; + let chunk_encoded = self.get(&item)?; if let Some(chunk_encoded) = chunk_encoded { // Decode the encoded data into the output buffer let chunk_encoded: Vec = chunk_encoded.into(); @@ -345,7 +326,7 @@ impl CodecPipelineImpl { } } else { // Partially decode the chunk into the output buffer - let storage_handle = Arc::new(StorageHandle::new(item.store().clone())); + let storage_handle = Arc::new(StorageHandle::new(self.store(&item)?)); // NOTE: Normally a storage transformer would exist between the storage handle and the input handle // but zarr-python does not support them nor forward them to the codec pipeline let input_handle = Arc::new(StoragePartialDecoder::new( @@ -388,10 +369,8 @@ impl CodecPipelineImpl { fn retrieve_chunks<'py>( &self, py: Python<'py>, - chunk_descriptions: Vec, // FIXME: Ref / iterable? + chunk_descriptions: Vec, // FIXME: Ref / iterable? ) -> PyResult>>> { - let chunk_descriptions = self.collect_chunk_descriptions(chunk_descriptions, ())?; - // Adjust the concurrency based on the codec chain and the first chunk description let Some((chunk_concurrent_limit, codec_options)) = chunk_descriptions.get_chunk_concurrent_limit_and_codec_options(self)? @@ -401,7 +380,7 @@ impl CodecPipelineImpl { let chunk_bytes = py.allow_threads(move || { let get_chunk_subset = |item: chunk_item::Basic| { - let chunk_encoded = item.get().map_py_err::()?; + let chunk_encoded = self.get(&item).map_py_err::()?; Ok(if let Some(chunk_encoded) = chunk_encoded { let chunk_encoded: Vec = chunk_encoded.into(); self.codec_chain @@ -439,7 +418,7 @@ impl CodecPipelineImpl { fn store_chunks_with_indices( &self, py: Python, - chunk_descriptions: Vec, + chunk_descriptions: Vec, value: &Bound<'_, PyUntypedArray>, ) -> PyResult<()> { enum InputValue<'a> { @@ -460,10 +439,7 @@ impl CodecPipelineImpl { } else { InputValue::Constant(FillValue::new(input_slice.to_vec())) }; - let input_shape: Vec = value.shape_zarr()?; - let chunk_descriptions = - self.collect_chunk_descriptions(chunk_descriptions, &input_shape)?; // Adjust the concurrency based on the codec chain and the first chunk description let Some((chunk_concurrent_limit, codec_options)) = @@ -482,7 +458,7 @@ impl CodecPipelineImpl { item.item.representation().data_type(), ) .map_py_err::()?; - Self::store_chunk_subset_bytes( + self.store_chunk_subset_bytes( &item, &self.codec_chain, chunk_subset_bytes, @@ -499,7 +475,7 @@ impl CodecPipelineImpl { constant_value, ); - Self::store_chunk_subset_bytes( + self.store_chunk_subset_bytes( &item, &self.codec_chain, chunk_subset_bytes, @@ -526,6 +502,8 @@ impl CodecPipelineImpl { fn _internal(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/store.rs b/src/store.rs index a44f77a..3eeb0b8 100644 --- a/src/store.rs +++ b/src/store.rs @@ -19,6 +19,7 @@ use crate::{runtime::tokio_block_on, utils::PyErrExt}; mod filesystem; mod http; +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[gen_stub_pyclass_enum] pub enum StoreConfig { Filesystem(FilesystemStoreConfig), diff --git a/src/store/filesystem.rs b/src/store/filesystem.rs index 9bf5b16..8ee865b 100644 --- a/src/store/filesystem.rs +++ b/src/store/filesystem.rs @@ -6,7 +6,7 @@ use zarrs::{filesystem::FilesystemStore, storage::ReadableWritableListableStorag use crate::utils::PyErrExt; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[gen_stub_pyclass] #[pyclass] pub struct FilesystemStoreConfig { diff --git a/src/store/http.rs b/src/store/http.rs index 725d5a4..0c7820b 100644 --- a/src/store/http.rs +++ b/src/store/http.rs @@ -6,7 +6,7 @@ use zarrs::storage::ReadableWritableListableStorage; use super::opendal_builder_to_sync_store; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[gen_stub_pyclass] #[pyclass] pub struct HttpStoreConfig { diff --git a/src/tests.rs b/src/tests.rs index 355e8ec..2cf4570 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,3 +1,5 @@ +use pyo3::ffi::c_str; + use numpy::PyUntypedArray; use pyo3::{ types::{PyAnyMethods, PyModule}, @@ -10,13 +12,15 @@ use crate::CodecPipelineImpl; fn test_nparray_to_unsafe_cell_slice_empty() -> PyResult<()> { pyo3::prepare_freethreaded_python(); Python::with_gil(|py| { - let arr: Bound<'_, PyUntypedArray> = PyModule::from_code_bound( + let arr: Bound<'_, PyUntypedArray> = PyModule::from_code( py, - "def empty_array(): + c_str!( + "def empty_array(): import numpy as np - return np.empty(0, dtype=np.uint8)", - "", - "", + return np.empty(0, dtype=np.uint8)" + ), + c_str!(""), + c_str!(""), )? .getattr("empty_array")? .call0()? From 014b04fe8b8d9a3757259fe3bb1bf6133b0cfa8e Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Sat, 14 Dec 2024 17:20:45 +0100 Subject: [PATCH 44/45] Abstract away store manager --- src/chunk_item.rs | 1 + src/lib.rs | 64 ++++++++++++-------------------------------- src/store.rs | 8 +++--- src/store/manager.rs | 61 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 50 deletions(-) create mode 100644 src/store/manager.rs diff --git a/src/chunk_item.rs b/src/chunk_item.rs index 6aab47a..785bed0 100644 --- a/src/chunk_item.rs +++ b/src/chunk_item.rs @@ -69,6 +69,7 @@ pub(crate) struct WithSubset { #[pymethods] impl WithSubset { #[new] + #[allow(clippy::needless_pass_by_value)] fn new( item: Basic, chunk_subset: Vec>, diff --git a/src/lib.rs b/src/lib.rs index 4c1d91c..cbf7739 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,30 +1,24 @@ #![warn(clippy::pedantic)] #![allow(clippy::module_name_repetitions)] -use chunk_item::ChunksItem; -use concurrency::ChunkConcurrentLimitAndCodecOptions; +use std::borrow::Cow; +use std::sync::Arc; + use numpy::npyffi::PyArrayObject; use numpy::{IntoPyArray, PyArray1, PyUntypedArray, PyUntypedArrayMethods}; -use pyo3::exceptions::{PyKeyError, PyRuntimeError, PyTypeError, PyValueError}; +use pyo3::exceptions::{PyRuntimeError, PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3_stub_gen::define_stub_info_gatherer; use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon_iter_concurrent_limit::iter_concurrent_limit; -use std::borrow::Cow; -use std::collections::BTreeMap; -use std::sync::{Arc, Mutex}; -use store::StoreConfig; use unsafe_cell_slice::UnsafeCellSlice; -use zarrs::array::codec::{ - ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder, StoragePartialDecoder, -}; +use zarrs::array::codec::{ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder}; use zarrs::array::{ copy_fill_value_into, update_array_bytes, ArrayBytes, ArraySize, CodecChain, FillValue, }; use zarrs::array_subset::ArraySubset; use zarrs::metadata::v3::MetadataV3; -use zarrs::storage::{MaybeBytes, ReadableWritableListableStorage, StorageHandle}; mod chunk_item; mod concurrency; @@ -34,13 +28,16 @@ mod store; mod tests; mod utils; -use utils::{PyErrExt, PyUntypedArrayExt}; +use crate::chunk_item::ChunksItem; +use crate::concurrency::ChunkConcurrentLimitAndCodecOptions; +use crate::store::StoreManager; +use crate::utils::{PyErrExt as _, PyUntypedArrayExt as _}; // TODO: Use a OnceLock for store with get_or_try_init when stabilised? #[gen_stub_pyclass] #[pyclass] pub struct CodecPipelineImpl { - pub(crate) stores: Mutex>, + pub(crate) stores: StoreManager, pub(crate) codec_chain: Arc, pub(crate) codec_options: CodecOptions, pub(crate) chunk_concurrent_minimum: usize, @@ -49,30 +46,13 @@ pub struct CodecPipelineImpl { } impl CodecPipelineImpl { - fn store(&self, item: &I) -> PyResult { - use std::collections::btree_map::Entry::{Occupied, Vacant}; - match self - .stores - .lock() - .map_py_err::()? - .entry(item.store_config()) - { - Occupied(e) => Ok(e.get().clone()), - Vacant(e) => Ok(e.insert((&item.store_config()).try_into()?).clone()), - } - } - - fn get(&self, item: &I) -> PyResult { - self.store(item)?.get(item.key()).map_py_err::() - } - fn retrieve_chunk_bytes<'a, I: ChunksItem>( &self, item: &I, codec_chain: &CodecChain, codec_options: &CodecOptions, ) -> PyResult> { - let value_encoded = self.get(item).map_py_err::()?; + let value_encoded = self.stores.get(item)?; let value_decoded = if let Some(value_encoded) = value_encoded { let value_encoded: Vec = value_encoded.into(); // zero-copy in this case codec_chain @@ -103,7 +83,7 @@ impl CodecPipelineImpl { .map_py_err::()?; if value_decoded.is_fill_value(item.representation().fill_value()) { - self.store(item)?.erase(item.key()) + self.stores.erase(item) } else { let value_encoded = codec_chain .encode(value_decoded, item.representation(), codec_options) @@ -111,9 +91,8 @@ impl CodecPipelineImpl { .map_py_err::()?; // Store the encoded chunk - self.store(item)?.set(item.key(), value_encoded.into()) + self.stores.set(item, value_encoded.into()) } - .map_py_err::() } fn store_chunk_subset_bytes( @@ -253,7 +232,7 @@ impl CodecPipelineImpl { let num_threads = num_threads.unwrap_or(rayon::current_num_threads()); Ok(Self { - stores: Mutex::default(), + stores: StoreManager::default(), codec_chain, codec_options, chunk_concurrent_minimum, @@ -291,8 +270,7 @@ impl CodecPipelineImpl { && item.chunk_subset.shape() == item.representation().shape_u64() { // See zarrs::array::Array::retrieve_chunk_into - let chunk_encoded = self.get(&item)?; - if let Some(chunk_encoded) = chunk_encoded { + if let Some(chunk_encoded) = self.stores.get(&item)? { // Decode the encoded data into the output buffer let chunk_encoded: Vec = chunk_encoded.into(); unsafe { @@ -325,14 +303,7 @@ impl CodecPipelineImpl { } } } else { - // Partially decode the chunk into the output buffer - let storage_handle = Arc::new(StorageHandle::new(self.store(&item)?)); - // NOTE: Normally a storage transformer would exist between the storage handle and the input handle - // but zarr-python does not support them nor forward them to the codec pipeline - let input_handle = Arc::new(StoragePartialDecoder::new( - storage_handle, - item.key().clone(), - )); + let input_handle = Arc::new(self.stores.decoder(&item)?); let partial_decoder = self .codec_chain .clone() @@ -380,8 +351,7 @@ impl CodecPipelineImpl { let chunk_bytes = py.allow_threads(move || { let get_chunk_subset = |item: chunk_item::Basic| { - let chunk_encoded = self.get(&item).map_py_err::()?; - Ok(if let Some(chunk_encoded) = chunk_encoded { + Ok(if let Some(chunk_encoded) = self.stores.get(&item)? { let chunk_encoded: Vec = chunk_encoded.into(); self.codec_chain .decode( diff --git a/src/store.rs b/src/store.rs index 3eeb0b8..7bc8abb 100644 --- a/src/store.rs +++ b/src/store.rs @@ -7,9 +7,6 @@ use pyo3::{ Bound, FromPyObject, PyAny, PyErr, PyResult, }; use pyo3_stub_gen::derive::gen_stub_pyclass_enum; - -pub use filesystem::FilesystemStoreConfig; -pub use http::HttpStoreConfig; use zarrs::storage::{ storage_adapter::async_to_sync::AsyncToSyncStorageAdapter, ReadableWritableListableStorage, }; @@ -18,6 +15,11 @@ use crate::{runtime::tokio_block_on, utils::PyErrExt}; mod filesystem; mod http; +mod manager; + +pub use self::filesystem::FilesystemStoreConfig; +pub use self::http::HttpStoreConfig; +pub(crate) use self::manager::StoreManager; #[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[gen_stub_pyclass_enum] diff --git a/src/store/manager.rs b/src/store/manager.rs new file mode 100644 index 0000000..d32a68d --- /dev/null +++ b/src/store/manager.rs @@ -0,0 +1,61 @@ +use std::{ + collections::BTreeMap, + sync::{Arc, Mutex}, +}; + +use pyo3::{exceptions::PyRuntimeError, PyResult}; +use zarrs::{ + array::codec::StoragePartialDecoder, + storage::{Bytes, MaybeBytes, ReadableWritableListableStorage, StorageHandle}, +}; + +use crate::{chunk_item::ChunksItem, store::PyErrExt as _}; + +use super::StoreConfig; + +#[derive(Default)] +pub(crate) struct StoreManager(Mutex>); + +impl StoreManager { + fn store(&self, item: &I) -> PyResult { + use std::collections::btree_map::Entry::{Occupied, Vacant}; + match self + .0 + .lock() + .map_py_err::()? + .entry(item.store_config()) + { + Occupied(e) => Ok(e.get().clone()), + Vacant(e) => Ok(e.insert((&item.store_config()).try_into()?).clone()), + } + } + + pub(crate) fn get(&self, item: &I) -> PyResult { + self.store(item)? + .get(item.key()) + .map_py_err::() + } + + pub(crate) fn set(&self, item: &I, value: Bytes) -> PyResult<()> { + self.store(item)? + .set(item.key(), value) + .map_py_err::() + } + + pub(crate) fn erase(&self, item: &I) -> PyResult<()> { + self.store(item)? + .erase(item.key()) + .map_py_err::() + } + + pub(crate) fn decoder(&self, item: &I) -> PyResult { + // Partially decode the chunk into the output buffer + let storage_handle = Arc::new(StorageHandle::new(self.store(item)?)); + // NOTE: Normally a storage transformer would exist between the storage handle and the input handle + // but zarr-python does not support them nor forward them to the codec pipeline + Ok(StoragePartialDecoder::new( + storage_handle, + item.key().clone(), + )) + } +} From 2296ca173e00e6e803a2669b66243b3d72611ece Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sun, 15 Dec 2024 12:48:35 +1100 Subject: [PATCH 45/45] fix(deps): constrain zarr<=3.0.0b3 RemoteStore will be renamed to FsspecStore in the next beta / RC --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 77bf9c4..87acc4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ 'donfig', 'pytest', 'universal_pathlib>=0.2.0', - 'zarr>=3.0.0b2', + 'zarr>=3.0.0b2,<=3.0.0b3', ] [project.optional-dependencies]