From c997590f530bd973273a9c7e2c785b21c177f8b9 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 26 Mar 2024 19:49:04 +0800 Subject: [PATCH] feat: Implement RFC-4382 Range Based Read (#4381) --- .../{action.yml => disable_action.yml} | 0 .github/services/sftp/sftp/action.yml | 1 + .github/workflows/test_behavior.yml | 12 +- .../workflows/test_behavior_binding_java.yml | 1 + .../test_behavior_binding_nodejs.yml | 1 + .../test_behavior_binding_python.yml | 1 + .github/workflows/test_behavior_core.yml | 1 + bin/oay/Cargo.lock | 427 +--------- bin/oli/Cargo.lock | 4 +- bin/oli/src/commands/cat.rs | 10 +- bin/oli/src/commands/cp.rs | 9 +- bindings/c/include/opendal.h | 29 +- bindings/c/src/operator.rs | 13 +- bindings/c/src/operator_info.rs | 9 - bindings/c/src/reader.rs | 22 +- bindings/c/tests/opinfo.cpp | 6 - bindings/cpp/src/lib.rs | 7 +- bindings/cpp/src/reader.rs | 8 +- bindings/java/src/lib.rs | 5 +- .../java/org/apache/opendal/Capability.java | 23 +- bindings/nodejs/generated.d.ts | 8 - bindings/nodejs/src/capability.rs | 18 - bindings/nodejs/src/lib.rs | 47 +- bindings/nodejs/tests/suites/async.suite.mjs | 37 +- .../nodejs/tests/suites/services.suite.mjs | 1 - bindings/nodejs/tests/suites/sync.suite.mjs | 4 +- bindings/ocaml/lib/operator.ml | 11 +- bindings/ocaml/lib/operator.mli | 28 +- bindings/ocaml/src/operator.ml | 3 +- bindings/ocaml/src/operator.mli | 3 +- bindings/ocaml/src/operator/reader.rs | 18 +- bindings/ocaml/test/test.ml | 4 +- bindings/python/Cargo.toml | 1 + bindings/python/python/opendal/__init__.pyi | 63 +- bindings/python/src/capability.rs | 9 - bindings/python/src/file.rs | 34 +- bindings/python/src/operator.rs | 6 +- core/Cargo.lock | 101 +-- core/Cargo.toml | 11 +- core/benches/ops/read.rs | 47 +- core/benches/vs_s3/src/main.rs | 4 +- core/fuzz/fuzz_reader.rs | 82 +- core/src/layers/blocking.rs | 9 +- core/src/layers/chaos.rs | 36 +- core/src/layers/complete.rs | 151 +--- core/src/layers/concurrent_limit.rs | 19 +- core/src/layers/dtrace.rs | 50 +- core/src/layers/error_context.rs | 33 +- core/src/layers/immutable_index.rs | 2 +- core/src/layers/logging.rs | 128 +-- core/src/layers/madsim.rs | 15 +- core/src/layers/metrics.rs | 92 +- core/src/layers/minitrace.rs | 21 +- core/src/layers/oteltrace.rs | 17 +- core/src/layers/prometheus.rs | 32 +- core/src/layers/prometheus_client.rs | 65 +- core/src/layers/retry.rs | 118 +-- core/src/layers/throttle.rs | 18 +- core/src/layers/timeout.rs | 27 +- core/src/layers/tracing.rs | 26 +- core/src/raw/adapters/kv/backend.rs | 40 +- core/src/raw/adapters/typed_kv/backend.rs | 39 +- core/src/raw/enum_utils.rs | 99 +-- core/src/raw/http_util/body.rs | 165 +--- core/src/raw/http_util/bytes_range.rs | 175 +--- core/src/raw/http_util/client.rs | 53 +- core/src/raw/http_util/mod.rs | 1 - core/src/raw/http_util/multipart.rs | 126 +-- core/src/raw/oio/buf/buffer.rs | 193 +++++ core/src/raw/oio/buf/mod.rs | 3 + core/src/raw/oio/cursor.rs | 151 ---- core/src/raw/oio/list/api.rs | 1 - core/src/raw/oio/mod.rs | 3 - core/src/raw/oio/read/api.rs | 124 ++- core/src/raw/oio/read/buffer_reader.rs | 802 ------------------ core/src/raw/oio/read/file_read.rs | 319 ------- core/src/raw/oio/read/futures_read.rs | 84 -- .../src/raw/oio/read/into_read_from_stream.rs | 64 -- core/src/raw/oio/read/into_streamable_read.rs | 131 --- core/src/raw/oio/read/lazy_read.rs | 109 --- core/src/raw/oio/read/mod.rs | 29 - core/src/raw/oio/read/range_read.rs | 603 ------------- core/src/raw/oio/read/std_read.rs | 77 -- core/src/raw/oio/read/tokio_read.rs | 84 -- core/src/raw/oio/stream/api.rs | 17 +- core/src/raw/oio/stream/into_stream.rs | 94 -- .../raw/oio/stream/into_stream_from_reader.rs | 92 -- core/src/raw/oio/stream/mod.rs | 6 - core/src/raw/oio/write/api.rs | 3 +- core/src/raw/oio/write/block_write.rs | 1 - core/src/raw/oio/write/exact_buf_write.rs | 5 +- core/src/raw/oio/write/multipart_write.rs | 1 - core/src/raw/oio/write/range_write.rs | 1 - core/src/raw/ops.rs | 44 - core/src/raw/std_io_util.rs | 3 +- core/src/raw/tests/read.rs | 115 +-- core/src/services/alluxio/backend.rs | 15 +- core/src/services/alluxio/core.rs | 47 +- core/src/services/alluxio/error.rs | 12 +- core/src/services/alluxio/mod.rs | 1 + core/src/services/alluxio/reader.rs | 53 ++ core/src/services/alluxio/writer.rs | 1 - core/src/services/atomicserver/backend.rs | 25 +- core/src/services/azblob/backend.rs | 47 +- core/src/services/azblob/core.rs | 46 +- core/src/services/azblob/error.rs | 6 +- core/src/services/azblob/lister.rs | 2 +- core/src/services/azblob/mod.rs | 1 + core/src/services/azblob/reader.rs | 61 ++ core/src/services/azblob/writer.rs | 20 +- core/src/services/azdls/backend.rs | 42 +- core/src/services/azdls/core.rs | 28 +- core/src/services/azdls/error.rs | 6 +- core/src/services/azdls/lister.rs | 6 +- core/src/services/azdls/mod.rs | 1 + core/src/services/azdls/reader.rs | 57 ++ core/src/services/azdls/writer.rs | 19 +- core/src/services/azfile/backend.rs | 43 +- core/src/services/azfile/core.rs | 39 +- core/src/services/azfile/error.rs | 6 +- core/src/services/azfile/lister.rs | 12 +- core/src/services/azfile/mod.rs | 1 + core/src/services/azfile/reader.rs | 58 ++ core/src/services/azfile/writer.rs | 14 +- core/src/services/b2/backend.rs | 43 +- core/src/services/b2/core.rs | 49 +- core/src/services/b2/error.rs | 12 +- core/src/services/b2/lister.rs | 2 +- core/src/services/b2/mod.rs | 1 + core/src/services/b2/reader.rs | 61 ++ core/src/services/b2/writer.rs | 25 +- core/src/services/chainsafe/backend.rs | 34 +- core/src/services/chainsafe/core.rs | 25 +- core/src/services/chainsafe/error.rs | 12 +- core/src/services/chainsafe/lister.rs | 5 +- core/src/services/chainsafe/mod.rs | 1 + core/src/services/chainsafe/reader.rs | 57 ++ core/src/services/chainsafe/writer.rs | 5 +- core/src/services/cloudflare_kv/backend.rs | 20 +- core/src/services/cloudflare_kv/error.rs | 6 +- core/src/services/cos/backend.rs | 39 +- core/src/services/cos/core.rs | 35 +- core/src/services/cos/error.rs | 6 +- core/src/services/cos/lister.rs | 2 +- core/src/services/cos/mod.rs | 1 + core/src/services/cos/reader.rs | 60 ++ core/src/services/cos/writer.rs | 20 +- core/src/services/d1/backend.rs | 6 +- core/src/services/d1/error.rs | 6 +- core/src/services/dbfs/backend.rs | 23 +- core/src/services/dbfs/core.rs | 12 +- core/src/services/dbfs/error.rs | 7 +- core/src/services/dbfs/lister.rs | 7 +- core/src/services/dbfs/reader.rs | 30 +- core/src/services/dbfs/writer.rs | 6 +- core/src/services/dropbox/backend.rs | 37 +- core/src/services/dropbox/core.rs | 28 +- core/src/services/dropbox/error.rs | 7 +- core/src/services/dropbox/mod.rs | 1 + core/src/services/dropbox/reader.rs | 57 ++ core/src/services/dropbox/writer.rs | 5 +- core/src/services/fs/backend.rs | 11 +- core/src/services/fs/mod.rs | 1 + core/src/services/fs/reader.rs | 106 +++ core/src/services/fs/writer.rs | 3 +- core/src/services/ftp/backend.rs | 52 +- core/src/services/ftp/lister.rs | 1 - core/src/services/ftp/mod.rs | 2 +- core/src/services/ftp/reader.rs | 62 ++ core/src/services/ftp/util.rs | 105 --- core/src/services/ftp/writer.rs | 1 - core/src/services/gcs/backend.rs | 28 +- core/src/services/gcs/core.rs | 54 +- core/src/services/gcs/error.rs | 7 +- core/src/services/gcs/lister.rs | 5 +- core/src/services/gcs/mod.rs | 1 + core/src/services/gcs/reader.rs | 60 ++ core/src/services/gcs/writer.rs | 15 +- core/src/services/gdrive/backend.rs | 42 +- core/src/services/gdrive/core.rs | 29 +- core/src/services/gdrive/error.rs | 7 +- core/src/services/gdrive/lister.rs | 2 +- core/src/services/gdrive/mod.rs | 1 + core/src/services/gdrive/reader.rs | 57 ++ core/src/services/gdrive/writer.rs | 7 +- core/src/services/ghac/backend.rs | 59 +- core/src/services/ghac/error.rs | 7 +- core/src/services/ghac/mod.rs | 1 + core/src/services/ghac/reader.rs | 56 ++ core/src/services/ghac/writer.rs | 2 - core/src/services/github/backend.rs | 24 +- core/src/services/github/core.rs | 19 +- core/src/services/github/error.rs | 12 +- core/src/services/github/mod.rs | 1 + core/src/services/github/reader.rs | 57 ++ core/src/services/github/writer.rs | 5 +- core/src/services/hdfs/backend.rs | 42 +- core/src/services/hdfs/mod.rs | 1 + core/src/services/hdfs/reader.rs | 88 ++ core/src/services/hdfs/writer.rs | 2 - core/src/services/hdfs_native/reader.rs | 21 +- core/src/services/hdfs_native/writer.rs | 2 - core/src/services/http/backend.rs | 235 +---- core/src/services/http/error.rs | 7 +- core/src/services/http/mod.rs | 1 + core/src/services/http/reader.rs | 55 ++ core/src/services/huggingface/backend.rs | 35 +- core/src/services/huggingface/core.rs | 18 +- core/src/services/huggingface/error.rs | 7 +- core/src/services/huggingface/lister.rs | 8 +- core/src/services/huggingface/mod.rs | 1 + core/src/services/huggingface/reader.rs | 57 ++ core/src/services/icloud/backend.rs | 26 +- core/src/services/icloud/core.rs | 49 +- core/src/services/icloud/mod.rs | 1 + core/src/services/icloud/reader.rs | 57 ++ core/src/services/ipfs/backend.rs | 22 +- core/src/services/ipfs/error.rs | 7 +- core/src/services/ipfs/mod.rs | 1 + core/src/services/ipfs/reader.rs | 55 ++ core/src/services/ipmfs/backend.rs | 56 +- core/src/services/ipmfs/error.rs | 7 +- core/src/services/ipmfs/lister.rs | 5 +- core/src/services/ipmfs/mod.rs | 1 + core/src/services/ipmfs/reader.rs | 55 ++ core/src/services/ipmfs/writer.rs | 6 +- core/src/services/koofr/backend.rs | 42 +- core/src/services/koofr/core.rs | 27 +- core/src/services/koofr/error.rs | 13 +- core/src/services/koofr/lister.rs | 6 +- core/src/services/koofr/mod.rs | 1 + core/src/services/koofr/reader.rs | 57 ++ core/src/services/koofr/writer.rs | 5 +- core/src/services/libsql/backend.rs | 5 +- core/src/services/libsql/error.rs | 7 +- core/src/services/memory/backend.rs | 6 +- core/src/services/mod.rs | 4 +- core/src/services/obs/backend.rs | 39 +- core/src/services/obs/core.rs | 37 +- core/src/services/obs/error.rs | 6 +- core/src/services/obs/lister.rs | 2 +- core/src/services/obs/mod.rs | 1 + core/src/services/obs/reader.rs | 60 ++ core/src/services/obs/writer.rs | 20 +- core/src/services/onedrive/backend.rs | 53 +- core/src/services/onedrive/error.rs | 7 +- core/src/services/onedrive/lister.rs | 8 +- core/src/services/onedrive/mod.rs | 1 + core/src/services/onedrive/reader.rs | 55 ++ core/src/services/onedrive/writer.rs | 14 +- core/src/services/oss/backend.rs | 62 +- core/src/services/oss/core.rs | 54 +- core/src/services/oss/error.rs | 6 +- core/src/services/oss/lister.rs | 2 +- core/src/services/oss/mod.rs | 1 + core/src/services/oss/reader.rs | 60 ++ core/src/services/oss/writer.rs | 20 +- core/src/services/pcloud/backend.rs | 42 +- core/src/services/pcloud/core.rs | 38 +- core/src/services/pcloud/error.rs | 17 +- core/src/services/pcloud/lister.rs | 9 +- core/src/services/pcloud/mod.rs | 1 + core/src/services/pcloud/reader.rs | 57 ++ core/src/services/pcloud/writer.rs | 5 +- core/src/services/s3/backend.rs | 44 +- core/src/services/s3/core.rs | 44 +- core/src/services/s3/error.rs | 6 +- core/src/services/s3/lister.rs | 2 +- core/src/services/s3/mod.rs | 1 + core/src/services/s3/reader.rs | 57 ++ core/src/services/s3/writer.rs | 24 +- core/src/services/seafile/backend.rs | 27 +- core/src/services/seafile/core.rs | 47 +- core/src/services/seafile/error.rs | 12 +- core/src/services/seafile/lister.rs | 5 +- core/src/services/seafile/mod.rs | 1 + core/src/services/seafile/reader.rs | 57 ++ core/src/services/seafile/writer.rs | 5 +- core/src/services/sftp/backend.rs | 265 +++--- core/src/services/sftp/error.rs | 9 +- core/src/services/sftp/mod.rs | 1 + core/src/services/sftp/reader.rs | 80 ++ core/src/services/sftp/writer.rs | 2 +- core/src/services/supabase/backend.rs | 15 +- core/src/services/supabase/core.rs | 13 +- core/src/services/supabase/error.rs | 7 +- core/src/services/supabase/mod.rs | 1 + core/src/services/supabase/reader.rs | 57 ++ core/src/services/supabase/writer.rs | 6 +- core/src/services/swift/backend.rs | 31 +- core/src/services/swift/core.rs | 25 +- core/src/services/swift/error.rs | 6 +- core/src/services/swift/lister.rs | 6 +- core/src/services/swift/mod.rs | 1 + core/src/services/swift/reader.rs | 57 ++ core/src/services/swift/writer.rs | 6 +- core/src/services/upyun/backend.rs | 37 +- core/src/services/upyun/core.rs | 25 +- core/src/services/upyun/error.rs | 12 +- core/src/services/upyun/lister.rs | 8 +- core/src/services/upyun/mod.rs | 1 + core/src/services/upyun/reader.rs | 57 ++ core/src/services/upyun/writer.rs | 23 +- core/src/services/vercel_artifacts/backend.rs | 32 +- core/src/services/vercel_artifacts/error.rs | 7 +- core/src/services/vercel_artifacts/mod.rs | 1 + core/src/services/vercel_artifacts/reader.rs | 58 ++ core/src/services/vercel_artifacts/writer.rs | 5 +- core/src/services/vercel_blob/backend.rs | 36 +- core/src/services/vercel_blob/core.rs | 27 +- core/src/services/vercel_blob/error.rs | 13 +- core/src/services/vercel_blob/mod.rs | 1 + core/src/services/vercel_blob/reader.rs | 57 ++ core/src/services/vercel_blob/writer.rs | 24 +- core/src/services/webdav/backend.rs | 26 +- core/src/services/webdav/core.rs | 26 +- core/src/services/webdav/error.rs | 7 +- core/src/services/webdav/lister.rs | 4 +- core/src/services/webdav/mod.rs | 1 + core/src/services/webdav/reader.rs | 57 ++ core/src/services/webdav/writer.rs | 6 +- core/src/services/webhdfs/backend.rs | 95 +-- core/src/services/webhdfs/error.rs | 13 +- core/src/services/webhdfs/lister.rs | 13 +- core/src/services/webhdfs/mod.rs | 1 + core/src/services/webhdfs/reader.rs | 68 ++ core/src/services/webhdfs/writer.rs | 26 +- core/src/services/yandex_disk/backend.rs | 47 +- core/src/services/yandex_disk/core.rs | 25 +- core/src/services/yandex_disk/error.rs | 12 +- core/src/services/yandex_disk/lister.rs | 6 +- core/src/services/yandex_disk/mod.rs | 1 + core/src/services/yandex_disk/reader.rs | 66 ++ core/src/services/yandex_disk/writer.rs | 5 +- core/src/types/blocking_reader.rs | 306 +++++++ core/src/types/capability.rs | 6 - core/src/types/list.rs | 1 + core/src/types/mod.rs | 9 +- core/src/types/operator/blocking_operator.rs | 29 +- core/src/types/operator/operator.rs | 109 +-- core/src/types/operator/operator_functions.rs | 16 +- core/src/types/operator/operator_futures.rs | 43 +- core/src/types/reader.rs | 759 +++++++---------- core/src/types/writer.rs | 6 +- core/tests/behavior/async_fuzz.rs | 93 +- core/tests/behavior/async_read.rs | 285 +------ core/tests/behavior/async_write.rs | 4 +- core/tests/behavior/blocking_read.rs | 8 - core/tests/behavior/main.rs | 3 +- integrations/object_store/src/lib.rs | 35 +- 350 files changed, 5121 insertions(+), 8387 deletions(-) rename .github/services/chainsafe/chainsafe/{action.yml => disable_action.yml} (100%) create mode 100644 core/src/raw/oio/buf/buffer.rs delete mode 100644 core/src/raw/oio/cursor.rs delete mode 100644 core/src/raw/oio/read/buffer_reader.rs delete mode 100644 core/src/raw/oio/read/file_read.rs delete mode 100644 core/src/raw/oio/read/futures_read.rs delete mode 100644 core/src/raw/oio/read/into_read_from_stream.rs delete mode 100644 core/src/raw/oio/read/into_streamable_read.rs delete mode 100644 core/src/raw/oio/read/lazy_read.rs delete mode 100644 core/src/raw/oio/read/range_read.rs delete mode 100644 core/src/raw/oio/read/std_read.rs delete mode 100644 core/src/raw/oio/read/tokio_read.rs delete mode 100644 core/src/raw/oio/stream/into_stream.rs delete mode 100644 core/src/raw/oio/stream/into_stream_from_reader.rs create mode 100644 core/src/services/alluxio/reader.rs create mode 100644 core/src/services/azblob/reader.rs create mode 100644 core/src/services/azdls/reader.rs create mode 100644 core/src/services/azfile/reader.rs create mode 100644 core/src/services/b2/reader.rs create mode 100644 core/src/services/chainsafe/reader.rs create mode 100644 core/src/services/cos/reader.rs create mode 100644 core/src/services/dropbox/reader.rs create mode 100644 core/src/services/fs/reader.rs create mode 100644 core/src/services/ftp/reader.rs delete mode 100644 core/src/services/ftp/util.rs create mode 100644 core/src/services/gcs/reader.rs create mode 100644 core/src/services/gdrive/reader.rs create mode 100644 core/src/services/ghac/reader.rs create mode 100644 core/src/services/github/reader.rs create mode 100644 core/src/services/hdfs/reader.rs create mode 100644 core/src/services/http/reader.rs create mode 100644 core/src/services/huggingface/reader.rs create mode 100644 core/src/services/icloud/reader.rs create mode 100644 core/src/services/ipfs/reader.rs create mode 100644 core/src/services/ipmfs/reader.rs create mode 100644 core/src/services/koofr/reader.rs create mode 100644 core/src/services/obs/reader.rs create mode 100644 core/src/services/onedrive/reader.rs create mode 100644 core/src/services/oss/reader.rs create mode 100644 core/src/services/pcloud/reader.rs create mode 100644 core/src/services/s3/reader.rs create mode 100644 core/src/services/seafile/reader.rs create mode 100644 core/src/services/sftp/reader.rs create mode 100644 core/src/services/supabase/reader.rs create mode 100644 core/src/services/swift/reader.rs create mode 100644 core/src/services/upyun/reader.rs create mode 100644 core/src/services/vercel_artifacts/reader.rs create mode 100644 core/src/services/vercel_blob/reader.rs create mode 100644 core/src/services/webdav/reader.rs create mode 100644 core/src/services/webhdfs/reader.rs create mode 100644 core/src/services/yandex_disk/reader.rs create mode 100644 core/src/types/blocking_reader.rs diff --git a/.github/services/chainsafe/chainsafe/action.yml b/.github/services/chainsafe/chainsafe/disable_action.yml similarity index 100% rename from .github/services/chainsafe/chainsafe/action.yml rename to .github/services/chainsafe/chainsafe/disable_action.yml diff --git a/.github/services/sftp/sftp/action.yml b/.github/services/sftp/sftp/action.yml index c3586ca3ab0d..08fd7b9aa2db 100644 --- a/.github/services/sftp/sftp/action.yml +++ b/.github/services/sftp/sftp/action.yml @@ -38,3 +38,4 @@ runs: OPENDAL_SFTP_KEY=${{ github.workspace }}/fixtures/sftp/test_ssh_key OPENDAL_SFTP_KNOWN_HOSTS_STRATEGY=accept EOF + diff --git a/.github/workflows/test_behavior.yml b/.github/workflows/test_behavior.yml index 4c65bbd4e357..57918c64102e 100644 --- a/.github/workflows/test_behavior.yml +++ b/.github/workflows/test_behavior.yml @@ -79,10 +79,11 @@ jobs: test_core: name: core / ${{ matrix.os }} - needs: [plan] + needs: [ plan ] if: fromJson(needs.plan.outputs.plan).components.core secrets: inherit strategy: + fail-fast: false matrix: include: ${{ fromJson(needs.plan.outputs.plan).core }} uses: ./.github/workflows/test_behavior_core.yml @@ -92,10 +93,11 @@ jobs: test_binding_java: name: binding_java / ${{ matrix.os }} - needs: [plan] + needs: [ plan ] if: fromJson(needs.plan.outputs.plan).components.binding_java secrets: inherit strategy: + fail-fast: false matrix: include: ${{ fromJson(needs.plan.outputs.plan).binding_java }} uses: ./.github/workflows/test_behavior_binding_java.yml @@ -105,10 +107,11 @@ jobs: test_binding_python: name: binding_python / ${{ matrix.os }} - needs: [plan] + needs: [ plan ] if: fromJson(needs.plan.outputs.plan).components.binding_python secrets: inherit strategy: + fail-fast: false matrix: include: ${{ fromJson(needs.plan.outputs.plan).binding_python }} uses: ./.github/workflows/test_behavior_binding_python.yml @@ -118,10 +121,11 @@ jobs: test_binding_nodejs: name: binding_nodejs / ${{ matrix.os }} - needs: [plan] + needs: [ plan ] if: fromJson(needs.plan.outputs.plan).components.binding_nodejs secrets: inherit strategy: + fail-fast: false matrix: include: ${{ fromJson(needs.plan.outputs.plan).binding_nodejs }} uses: ./.github/workflows/test_behavior_binding_nodejs.yml diff --git a/.github/workflows/test_behavior_binding_java.yml b/.github/workflows/test_behavior_binding_java.yml index 52dd117870e9..82143fc7d153 100644 --- a/.github/workflows/test_behavior_binding_java.yml +++ b/.github/workflows/test_behavior_binding_java.yml @@ -32,6 +32,7 @@ jobs: name: ${{ matrix.cases.service }} / ${{ matrix.cases.setup }} runs-on: ${{ inputs.os }} strategy: + fail-fast: false matrix: cases: ${{ fromJson(inputs.cases) }} steps: diff --git a/.github/workflows/test_behavior_binding_nodejs.yml b/.github/workflows/test_behavior_binding_nodejs.yml index cb08366c5fcd..0dfc748226be 100644 --- a/.github/workflows/test_behavior_binding_nodejs.yml +++ b/.github/workflows/test_behavior_binding_nodejs.yml @@ -32,6 +32,7 @@ jobs: name: ${{ matrix.cases.service }} / ${{ matrix.cases.setup }} runs-on: ${{ inputs.os }} strategy: + fail-fast: false matrix: cases: ${{ fromJson(inputs.cases) }} steps: diff --git a/.github/workflows/test_behavior_binding_python.yml b/.github/workflows/test_behavior_binding_python.yml index 42fd1de81831..8f3d4bbaf48a 100644 --- a/.github/workflows/test_behavior_binding_python.yml +++ b/.github/workflows/test_behavior_binding_python.yml @@ -32,6 +32,7 @@ jobs: name: ${{ matrix.cases.service }} / ${{ matrix.cases.setup }} runs-on: ${{ inputs.os }} strategy: + fail-fast: false matrix: cases: ${{ fromJson(inputs.cases) }} steps: diff --git a/.github/workflows/test_behavior_core.yml b/.github/workflows/test_behavior_core.yml index f6831e84919c..ef49723c5ed1 100644 --- a/.github/workflows/test_behavior_core.yml +++ b/.github/workflows/test_behavior_core.yml @@ -32,6 +32,7 @@ jobs: name: ${{ matrix.cases.service }} / ${{ matrix.cases.setup }} runs-on: ${{ inputs.os }} strategy: + fail-fast: false matrix: cases: ${{ fromJson(inputs.cases) }} steps: diff --git a/bin/oay/Cargo.lock b/bin/oay/Cargo.lock index 7ed6313e9541..8c65de24ce21 100644 --- a/bin/oay/Cargo.lock +++ b/bin/oay/Cargo.lock @@ -181,9 +181,9 @@ dependencies = [ [[package]] name = "backon" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c1a6197b2120bb2185a267f6515038558b019e92b832bb0320e96d66268dcf9" +checksum = "c491fa80d69c03084223a4e73c378dd9f9a1e612eb54051213f88b2d5249b458" dependencies = [ "fastrand", "futures-core", @@ -212,12 +212,6 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" -[[package]] -name = "base64ct" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" - [[package]] name = "bitflags" version = "1.3.2" @@ -245,12 +239,6 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.5.0" @@ -319,32 +307,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "const-oid" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" - -[[package]] -name = "const-random" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom", - "once_cell", - "tiny-keccak", -] - [[package]] name = "core-foundation" version = "0.9.4" @@ -370,12 +332,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - [[package]] name = "crypto-common" version = "0.1.6" @@ -434,17 +390,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "der" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" -dependencies = [ - "const-oid", - "pem-rfc7468", - "zeroize", -] - [[package]] name = "deranged" version = "0.3.11" @@ -461,9 +406,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", - "const-oid", "crypto-common", - "subtle", ] [[package]] @@ -487,15 +430,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "dlv-list" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" -dependencies = [ - "const-random", -] - [[package]] name = "encoding_rs" version = "0.8.33" @@ -513,12 +447,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "fastrand" -version = "1.9.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" [[package]] name = "flagset" @@ -718,30 +649,6 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "htmlescape" version = "0.3.1" @@ -869,15 +776,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "ipnet" version = "2.9.0" @@ -899,29 +797,11 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "jsonwebtoken" -version = "9.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4" -dependencies = [ - "base64", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -dependencies = [ - "spin 0.5.2", -] [[package]] name = "libc" @@ -929,12 +809,6 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - [[package]] name = "libredox" version = "0.0.1" @@ -1048,61 +922,12 @@ dependencies = [ "winapi", ] -[[package]] -name = "num-bigint" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-bigint-dig" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" -dependencies = [ - "byteorder", - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand", - "smallvec", - "zeroize", -] - [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.17" @@ -1110,7 +935,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", - "libm", ] [[package]] @@ -1184,37 +1008,19 @@ dependencies = [ "once_cell", "percent-encoding", "quick-xml", - "reqsign", "reqwest", "serde", "serde_json", - "sha2", "tokio", "uuid", ] -[[package]] -name = "openssl-probe" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" - [[package]] name = "option-ext" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-multimap" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4d6a8c22fc714f0c2373e6091bf6f5e9b37b1bc0b1184874b7e0a4e303d318f" -dependencies = [ - "dlv-list", - "hashbrown", -] - [[package]] name = "overload" version = "0.1.1" @@ -1244,25 +1050,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "pem" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" -dependencies = [ - "base64", - "serde", -] - -[[package]] -name = "pem-rfc7468" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" -dependencies = [ - "base64ct", -] - [[package]] name = "percent-encoding" version = "2.3.1" @@ -1301,27 +1088,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pkcs1" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" -dependencies = [ - "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs8" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "spki", -] - [[package]] name = "powerfmt" version = "0.2.0" @@ -1456,37 +1222,6 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" -[[package]] -name = "reqsign" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed08ac3aa0676637644b1b892202f1ae789c28c15ebfa906128d111ae8086062" -dependencies = [ - "anyhow", - "async-trait", - "base64", - "chrono", - "form_urlencoded", - "getrandom", - "hex", - "hmac", - "home", - "http", - "jsonwebtoken", - "log", - "once_cell", - "percent-encoding", - "quick-xml", - "rand", - "reqwest", - "rsa", - "rust-ini", - "serde", - "serde_json", - "sha1", - "sha2", -] - [[package]] name = "reqwest" version = "0.11.24" @@ -1511,7 +1246,6 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-native-certs", "rustls-pemfile", "serde", "serde_json", @@ -1527,6 +1261,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", + "webpki-roots", "winreg", ] @@ -1539,41 +1274,11 @@ dependencies = [ "cc", "getrandom", "libc", - "spin 0.9.8", + "spin", "untrusted", "windows-sys 0.48.0", ] -[[package]] -name = "rsa" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" -dependencies = [ - "const-oid", - "digest", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core", - "signature", - "spki", - "subtle", - "zeroize", -] - -[[package]] -name = "rust-ini" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0698206bcb8882bf2a9ecb4c1e7785db57ff052297085a6efd4fe42302068a" -dependencies = [ - "cfg-if", - "ordered-multimap", -] - [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1592,18 +1297,6 @@ dependencies = [ "sct", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile", - "schannel", - "security-framework", -] - [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -1635,15 +1328,6 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" -[[package]] -name = "schannel" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "scopeguard" version = "1.2.0" @@ -1660,29 +1344,6 @@ dependencies = [ "untrusted", ] -[[package]] -name = "security-framework" -version = "2.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "serde" version = "1.0.197" @@ -1756,17 +1417,6 @@ dependencies = [ "digest", ] -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "sharded-slab" version = "0.1.7" @@ -1776,28 +1426,6 @@ dependencies = [ "lazy_static", ] -[[package]] -name = "signature" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" -dependencies = [ - "digest", - "rand_core", -] - -[[package]] -name = "simple_asn1" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" -dependencies = [ - "num-bigint", - "num-traits", - "thiserror", - "time", -] - [[package]] name = "slab" version = "0.4.9" @@ -1823,40 +1451,18 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -[[package]] -name = "spki" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" -dependencies = [ - "base64ct", - "der", -] - [[package]] name = "strsim" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - [[package]] name = "syn" version = "2.0.48" @@ -1956,15 +1562,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinyvec" version = "1.6.0" @@ -2367,6 +1964,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.25.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" + [[package]] name = "winapi" version = "0.3.9" @@ -2583,9 +2186,3 @@ dependencies = [ "quote", "syn", ] - -[[package]] -name = "zeroize" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/bin/oli/Cargo.lock b/bin/oli/Cargo.lock index 843d9d7050da..02b8fc353cb9 100644 --- a/bin/oli/Cargo.lock +++ b/bin/oli/Cargo.lock @@ -1271,9 +1271,9 @@ dependencies = [ [[package]] name = "hdrs" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00a7b465f2d12e45db2af56af13a1cbfe1d5616d54355f5610b26c0961dec7b7" +checksum = "f7c42a693bfe5dc8fcad1f24044c5ec355c5f157b8ce63c7d62f51cecbc7878d" dependencies = [ "blocking", "errno", diff --git a/bin/oli/src/commands/cat.rs b/bin/oli/src/commands/cat.rs index ff12038415f3..06f139fb3390 100644 --- a/bin/oli/src/commands/cat.rs +++ b/bin/oli/src/commands/cat.rs @@ -22,7 +22,7 @@ use anyhow::Result; use clap::Arg; use clap::ArgMatches; use clap::Command; -use tokio::io; +use futures::io; use crate::config::Config; @@ -37,9 +37,11 @@ pub async fn main(args: &ArgMatches) -> Result<()> { .ok_or_else(|| anyhow!("missing target"))?; let (op, path) = cfg.parse_location(target)?; - let mut reader = op.reader(&path).await?; - let mut stdout = io::stdout(); - io::copy(&mut reader, &mut stdout).await?; + let reader = op.reader(&path).await?; + let meta = op.stat(&path).await?; + let mut buf_reader = reader.into_futures_io_async_read(0..meta.content_length()); + let mut stdout = io::AllowStdIo::new(std::io::stdout()); + io::copy_buf(&mut buf_reader, &mut stdout).await?; Ok(()) } diff --git a/bin/oli/src/commands/cp.rs b/bin/oli/src/commands/cp.rs index 216fddbe5d5f..b433b90bb295 100644 --- a/bin/oli/src/commands/cp.rs +++ b/bin/oli/src/commands/cp.rs @@ -47,8 +47,11 @@ pub async fn main(args: &ArgMatches) -> Result<()> { if !recursive { let mut dst_w = dst_op.writer(&dst_path).await?; + let src_meta = src_op.stat(&src_path).await?; let reader = src_op.reader(&src_path).await?; - let buf_reader = futures::io::BufReader::with_capacity(8 * 1024 * 1024, reader); + let buf_reader = reader + .into_futures_io_async_read(0..src_meta.content_length()) + .with_capacity(8 * 1024 * 1024); futures::io::copy_buf(buf_reader, &mut dst_w).await?; // flush data dst_w.close().await?; @@ -70,7 +73,9 @@ pub async fn main(args: &ArgMatches) -> Result<()> { .strip_prefix(prefix) .expect("invalid path"); let reader = src_op.reader(de.path()).await?; - let buf_reader = futures::io::BufReader::with_capacity(8 * 1024 * 1024, reader); + let buf_reader = reader + .into_futures_io_async_read(0..meta.content_length()) + .with_capacity(8 * 1024 * 1024); let mut writer = dst_op.writer(&dst_root.join(fp).to_string_lossy()).await?; diff --git a/bindings/c/include/opendal.h b/bindings/c/include/opendal.h index a7298caed57a..ca9de2042aed 100644 --- a/bindings/c/include/opendal.h +++ b/bindings/c/include/opendal.h @@ -144,12 +144,6 @@ typedef struct BlockingLister BlockingLister; */ typedef struct BlockingOperator BlockingOperator; -/** - * BlockingReader is designed to read data from given path in an blocking - * manner. - */ -typedef struct BlockingReader BlockingReader; - /** * Entry returned by [`Lister`] or [`BlockingLister`] to represent a path and it's relative metadata. * @@ -211,6 +205,15 @@ typedef struct Metadata Metadata; */ typedef struct OperatorInfo OperatorInfo; +/** + * StdReader is the adapter of [`Read`], [`Seek`] and [`BufRead`] for [`BlockingReader`][crate::BlockingReader]. + * + * Users can use this adapter in cases where they need to use [`Read`] or [`BufRead`] trait. + * + * StdReader also implements [`Send`] and [`Sync`]. + */ +typedef struct StdIoReader StdIoReader; + /** * \brief opendal_bytes carries raw-bytes with its length * @@ -405,7 +408,7 @@ typedef struct opendal_result_read { * a opendal::BlockingReader, which is inside the Rust core code. */ typedef struct opendal_reader { - struct BlockingReader *inner; + struct StdIoReader *inner; } opendal_reader; /** @@ -511,18 +514,6 @@ typedef struct opendal_capability { * If operator supports read. */ bool read; - /** - * If operator supports seek on returning reader. - */ - bool read_can_seek; - /** - * If operator supports next on returning reader. - */ - bool read_can_next; - /** - * If operator supports read with range. - */ - bool read_with_range; /** * If operator supports read with if match. */ diff --git a/bindings/c/src/operator.rs b/bindings/c/src/operator.rs index c8fd31b25047..021769bcc00f 100644 --- a/bindings/c/src/operator.rs +++ b/bindings/c/src/operator.rs @@ -367,10 +367,21 @@ pub unsafe extern "C" fn opendal_operator_reader( panic!("The path given is pointing at NULL"); } let op = (*op).as_ref(); + let path = unsafe { std::ffi::CStr::from_ptr(path).to_str().unwrap() }; + let meta = match op.stat(path) { + Ok(meta) => meta, + Err(err) => { + return opendal_result_operator_reader { + reader: std::ptr::null_mut(), + error: opendal_error::new(err), + } + } + }; + match op.reader(path) { Ok(reader) => opendal_result_operator_reader { - reader: Box::into_raw(Box::new(opendal_reader::new(reader))), + reader: Box::into_raw(Box::new(opendal_reader::new(reader, meta.content_length()))), error: std::ptr::null_mut(), }, Err(e) => opendal_result_operator_reader { diff --git a/bindings/c/src/operator_info.rs b/bindings/c/src/operator_info.rs index 6c7fc69d1ba7..c67092ad761b 100644 --- a/bindings/c/src/operator_info.rs +++ b/bindings/c/src/operator_info.rs @@ -42,12 +42,6 @@ pub struct opendal_capability { /// If operator supports read. pub read: bool, - /// If operator supports seek on returning reader. - pub read_can_seek: bool, - /// If operator supports next on returning reader. - pub read_can_next: bool, - /// If operator supports read with range. - pub read_with_range: bool, /// If operator supports read with if match. pub read_with_if_match: bool, /// If operator supports read with if none match. @@ -237,9 +231,6 @@ impl From for opendal_capability { stat_with_if_match: value.stat_with_if_match, stat_with_if_none_match: value.stat_with_if_none_match, read: value.read, - read_can_seek: value.read_can_seek, - read_can_next: value.read_can_next, - read_with_range: value.read_with_range, read_with_if_match: value.read_with_if_match, read_with_if_none_match: value.read_with_if_none_match, read_with_override_content_type: value.read_with_override_content_type, diff --git a/bindings/c/src/reader.rs b/bindings/c/src/reader.rs index 8f233dd8e4d5..5d93f7d28969 100644 --- a/bindings/c/src/reader.rs +++ b/bindings/c/src/reader.rs @@ -16,6 +16,7 @@ // under the License. use ::opendal as core; +use std::io::Read; use super::*; @@ -25,13 +26,13 @@ use super::*; /// a opendal::BlockingReader, which is inside the Rust core code. #[repr(C)] pub struct opendal_reader { - inner: *mut core::BlockingReader, + inner: *mut core::StdIoReader, } impl opendal_reader { - pub(crate) fn new(reader: core::BlockingReader) -> Self { + pub(crate) fn new(reader: core::BlockingReader, size: u64) -> Self { Self { - inner: Box::into_raw(Box::new(reader)), + inner: Box::into_raw(Box::new(reader.into_std_io_read(0..size))), } } @@ -49,15 +50,12 @@ impl opendal_reader { let buf = unsafe { std::slice::from_raw_parts_mut(buf, len) }; let inner = unsafe { &mut *(*reader).inner }; - let r = inner.read(buf.len()); - match r { - Ok(bs) => { - buf[..bs.len()].copy_from_slice(&bs); - opendal_result_reader_read { - size: bs.len(), - error: std::ptr::null_mut(), - } - } + let n = inner.read(buf); + match n { + Ok(n) => opendal_result_reader_read { + size: n, + error: std::ptr::null_mut(), + }, Err(e) => opendal_result_reader_read { size: 0, error: opendal_error::new( diff --git a/bindings/c/tests/opinfo.cpp b/bindings/c/tests/opinfo.cpp index 42c70fde2457..9684b0360473 100644 --- a/bindings/c/tests/opinfo.cpp +++ b/bindings/c/tests/opinfo.cpp @@ -65,9 +65,6 @@ TEST_F(OpendalOperatorInfoTest, CapabilityTest) EXPECT_TRUE(full_cap.blocking); EXPECT_TRUE(full_cap.read); - EXPECT_TRUE(full_cap.read_can_seek); - EXPECT_TRUE(full_cap.read_can_next); - EXPECT_TRUE(full_cap.read_with_range); EXPECT_TRUE(full_cap.stat); EXPECT_TRUE(full_cap.write); EXPECT_TRUE(full_cap.write_can_empty); @@ -78,9 +75,6 @@ TEST_F(OpendalOperatorInfoTest, CapabilityTest) EXPECT_TRUE(native_cap.blocking); EXPECT_TRUE(native_cap.read); - EXPECT_TRUE(native_cap.read_can_seek); - EXPECT_TRUE(native_cap.read_can_next); - EXPECT_TRUE(native_cap.read_with_range); EXPECT_TRUE(native_cap.stat); EXPECT_TRUE(native_cap.write); EXPECT_TRUE(native_cap.write_can_empty); diff --git a/bindings/cpp/src/lib.rs b/bindings/cpp/src/lib.rs index e59c1482d5e8..88d6c0092a45 100644 --- a/bindings/cpp/src/lib.rs +++ b/bindings/cpp/src/lib.rs @@ -154,7 +154,12 @@ impl Operator { } fn reader(&self, path: &str) -> Result> { - Ok(Box::new(Reader(self.0.reader(path)?))) + let meta = self.0.stat(path)?; + Ok(Box::new(Reader( + self.0 + .reader(path)? + .into_std_io_read(0..meta.content_length()), + ))) } fn lister(&self, path: &str) -> Result> { diff --git a/bindings/cpp/src/reader.rs b/bindings/cpp/src/reader.rs index 6f9bfd8fe9ec..7c083501bca4 100644 --- a/bindings/cpp/src/reader.rs +++ b/bindings/cpp/src/reader.rs @@ -17,16 +17,16 @@ use anyhow::Result; use opendal as od; +use std::io::{Read, Seek}; use super::ffi; -pub struct Reader(pub od::BlockingReader); +pub struct Reader(pub od::StdIoReader); impl Reader { pub fn read(&mut self, buf: &mut [u8]) -> Result { - let bs = self.0.read(buf.len())?; - buf[..bs.len()].copy_from_slice(&bs); - Ok(bs.len()) + let n = self.0.read(buf)?; + Ok(n) } pub fn seek(&mut self, offset: u64, dir: ffi::SeekFrom) -> Result { diff --git a/bindings/java/src/lib.rs b/bindings/java/src/lib.rs index 63d4c347aea0..c9950a479693 100644 --- a/bindings/java/src/lib.rs +++ b/bindings/java/src/lib.rs @@ -94,15 +94,12 @@ fn make_operator_info<'a>(env: &mut JNIEnv<'a>, info: OperatorInfo) -> Result(env: &mut JNIEnv<'a>, cap: Capability) -> Result> { let capability = env.new_object( "org/apache/opendal/Capability", - "(ZZZZZZZZZZZZZZZZZZJJJZZZZZZZZZZZZZZJZ)V", + "(ZZZZZZZZZZZZZZZJJJZZZZZZZZZZZZZZJZ)V", &[ JValue::Bool(cap.stat as jboolean), JValue::Bool(cap.stat_with_if_match as jboolean), JValue::Bool(cap.stat_with_if_none_match as jboolean), JValue::Bool(cap.read as jboolean), - JValue::Bool(cap.read_can_seek as jboolean), - JValue::Bool(cap.read_can_next as jboolean), - JValue::Bool(cap.read_with_range as jboolean), JValue::Bool(cap.read_with_if_match as jboolean), JValue::Bool(cap.read_with_if_none_match as jboolean), JValue::Bool(cap.read_with_override_cache_control as jboolean), diff --git a/bindings/java/src/main/java/org/apache/opendal/Capability.java b/bindings/java/src/main/java/org/apache/opendal/Capability.java index 45a76e2b7fa4..40c9eee83e2d 100644 --- a/bindings/java/src/main/java/org/apache/opendal/Capability.java +++ b/bindings/java/src/main/java/org/apache/opendal/Capability.java @@ -24,7 +24,7 @@ @Data public class Capability { /** - * If operator supports stat. + * If operator supports stat. */ public final boolean stat; @@ -43,21 +43,6 @@ public class Capability { */ public final boolean read; - /** - * If operator supports seek on returning reader. - */ - public final boolean readCanSeek; - - /** - * If operator supports next on returning reader. - */ - public final boolean readCanNext; - - /** - * If operator supports read with range. - */ - public final boolean readWithRange; - /** * If operator supports read with if matched. */ @@ -216,9 +201,6 @@ public Capability( boolean statWithIfMatch, boolean statWithIfNoneMatch, boolean read, - boolean readCanSeek, - boolean readCanNext, - boolean readWithRange, boolean readWithIfMatch, boolean readWithIfNoneMatch, boolean readWithOverrideCacheControl, @@ -253,9 +235,6 @@ public Capability( this.statWithIfMatch = statWithIfMatch; this.statWithIfNoneMatch = statWithIfNoneMatch; this.read = read; - this.readCanSeek = readCanSeek; - this.readCanNext = readCanNext; - this.readWithRange = readWithRange; this.readWithIfMatch = readWithIfMatch; this.readWithIfNoneMatch = readWithIfNoneMatch; this.readWithOverrideCacheControl = readWithOverrideCacheControl; diff --git a/bindings/nodejs/generated.d.ts b/bindings/nodejs/generated.d.ts index a0c5329afeea..9048102520c7 100644 --- a/bindings/nodejs/generated.d.ts +++ b/bindings/nodejs/generated.d.ts @@ -62,12 +62,6 @@ export class Capability { get statWithIfNoneMatch(): boolean /** If operator supports read. */ get read(): boolean - /** If operator supports seek on returning reader. */ - get readCanSeek(): boolean - /** If operator supports next on returning reader. */ - get readCanNext(): boolean - /** If operator supports read with range. */ - get readWithRange(): boolean /** If operator supports read with if match. */ get readWithIfMatch(): boolean /** If operator supports read with if none match. */ @@ -563,8 +557,6 @@ export class Reader { * > &mut self in async napi methods should be marked as unsafe * * Read bytes from this reader into given buffer. - * - * TODO: change api into stream based. */ read(buf: Buffer): Promise } diff --git a/bindings/nodejs/src/capability.rs b/bindings/nodejs/src/capability.rs index 742f3c9a6652..b19861359ecd 100644 --- a/bindings/nodejs/src/capability.rs +++ b/bindings/nodejs/src/capability.rs @@ -60,24 +60,6 @@ impl Capability { self.0.read } - /// If operator supports seek on returning reader. - #[napi(getter)] - pub fn read_can_seek(&self) -> bool { - self.0.read_can_seek - } - - /// If operator supports next on returning reader. - #[napi(getter)] - pub fn read_can_next(&self) -> bool { - self.0.read_can_next - } - - /// If operator supports read with range. - #[napi(getter)] - pub fn read_with_range(&self) -> bool { - self.0.read_with_range - } - /// If operator supports read with if match. #[napi(getter)] pub fn read_with_if_match(&self) -> bool { diff --git a/bindings/nodejs/src/lib.rs b/bindings/nodejs/src/lib.rs index fec882b879f8..fe0b3e6c53a8 100644 --- a/bindings/nodejs/src/lib.rs +++ b/bindings/nodejs/src/lib.rs @@ -182,7 +182,10 @@ impl Operator { #[napi] pub async fn reader(&self, path: String) -> Result { let r = self.0.reader(&path).await.map_err(format_napi_error)?; - Ok(Reader(r)) + Ok(Reader { + inner: r, + offset: 0, + }) } /// Read the whole path into a buffer synchronously. @@ -203,7 +206,10 @@ impl Operator { #[napi] pub fn reader_sync(&self, path: String) -> Result { let r = self.0.blocking().reader(&path).map_err(format_napi_error)?; - Ok(BlockingReader(r)) + Ok(BlockingReader { + inner: r, + offset: 0, + }) } /// Write bytes into path. @@ -641,23 +647,33 @@ pub struct ListOptions { /// BlockingReader is designed to read data from given path in an blocking /// manner. #[napi] -pub struct BlockingReader(opendal::BlockingReader); +pub struct BlockingReader { + inner: opendal::BlockingReader, + offset: u64, +} #[napi] impl BlockingReader { #[napi] pub fn read(&mut self, mut buf: Buffer) -> Result { - let buf = buf.as_mut(); - let bs = self.0.read(buf.len()).map_err(format_napi_error)?; - buf[..bs.len()].copy_from_slice(&bs); - Ok(bs.len()) + let mut buf = buf.as_mut(); + let size = buf.len(); + let n = self + .inner + .read(&mut buf, self.offset, size) + .map_err(format_napi_error)?; + self.offset += n as u64; + Ok(n) } } /// Reader is designed to read data from given path in an asynchronous /// manner. #[napi] -pub struct Reader(opendal::Reader); +pub struct Reader { + inner: opendal::Reader, + offset: u64, +} #[napi] impl Reader { @@ -666,14 +682,17 @@ impl Reader { /// > &mut self in async napi methods should be marked as unsafe /// /// Read bytes from this reader into given buffer. - /// - /// TODO: change api into stream based. #[napi] pub async unsafe fn read(&mut self, mut buf: Buffer) -> Result { - let buf = buf.as_mut(); - let bs = self.0.read(buf.len()).await.map_err(format_napi_error)?; - buf[..bs.len()].copy_from_slice(&bs); - Ok(bs.len()) + let mut buf = buf.as_mut(); + let size = buf.len(); + let n = self + .inner + .read(&mut buf, self.offset, size) + .await + .map_err(format_napi_error)?; + self.offset += n as u64; + Ok(n) } } diff --git a/bindings/nodejs/tests/suites/async.suite.mjs b/bindings/nodejs/tests/suites/async.suite.mjs index b1196dda4562..b021c273d781 100644 --- a/bindings/nodejs/tests/suites/async.suite.mjs +++ b/bindings/nodejs/tests/suites/async.suite.mjs @@ -35,28 +35,31 @@ export function run(op) { } }) - test.runIf(op.capability().write && op.capability().writeCanMulti)('reader/writer stream pipeline', async () => { - const filename = `random_file_${randomUUID()}` - const buf = generateFixedBytes(5 * 1024 * 1024) - const rs = Readable.from(buf, { - highWaterMark: 5 * 1024 * 1024, // to buffer 5MB data to read - }) - const w = await op.writer(filename) - const ws = w.createWriteStream() - await pipeline(rs, ws) + test.runIf(op.capability().read && op.capability().write && op.capability().writeCanMulti)( + 'reader/writer stream pipeline', + async () => { + const filename = `random_file_${randomUUID()}` + const buf = generateFixedBytes(5 * 1024 * 1024) + const rs = Readable.from(buf, { + highWaterMark: 5 * 1024 * 1024, // to buffer 5MB data to read + }) + const w = await op.writer(filename) + const ws = w.createWriteStream() + await pipeline(rs, ws) - await finished(ws) + await finished(ws) - const t = await op.stat(filename) - assert.equal(t.contentLength, buf.length) + const t = await op.stat(filename) + assert.equal(t.contentLength, buf.length) - const content = await op.read(filename) - assert.equal(Buffer.compare(content, buf), 0) // 0 means equal + const content = await op.read(filename) + assert.equal(Buffer.compare(content, buf), 0) // 0 means equal - await op.delete(filename) - }) + await op.delete(filename) + }, + ) - test.runIf(op.capability().write)('read stream', async () => { + test.runIf(op.capability().read && op.capability().write)('read stream', async () => { let c = generateFixedBytes(3 * 1024 * 1024) const filename = `random_file_${randomUUID()}` diff --git a/bindings/nodejs/tests/suites/services.suite.mjs b/bindings/nodejs/tests/suites/services.suite.mjs index 031257f9f0a7..004a8d22474b 100644 --- a/bindings/nodejs/tests/suites/services.suite.mjs +++ b/bindings/nodejs/tests/suites/services.suite.mjs @@ -20,7 +20,6 @@ export function run(operator) { test('get capability', () => { assert.ok(operator.capability()) - assert.ok(operator.capability().read) }) test('try to non-exist capability', () => { diff --git a/bindings/nodejs/tests/suites/sync.suite.mjs b/bindings/nodejs/tests/suites/sync.suite.mjs index 38c186d57d39..43a7224fffcf 100644 --- a/bindings/nodejs/tests/suites/sync.suite.mjs +++ b/bindings/nodejs/tests/suites/sync.suite.mjs @@ -35,7 +35,7 @@ export function run(op) { } }) - test.runIf(op.capability().write && op.capability().writeCanMulti)( + test.runIf(op.capability().read && op.capability().write && op.capability().writeCanMulti)( 'blocking reader/writer stream pipeline', async () => { const filename = `random_file_${randomUUID()}` @@ -59,7 +59,7 @@ export function run(op) { }, ) - test.runIf(op.capability().write)('blocking read stream', async () => { + test.runIf(op.capability().read && op.capability().write)('blocking read stream', async () => { let c = generateFixedBytes(3 * 1024 * 1024) const filename = `random_file_${randomUUID()}` diff --git a/bindings/ocaml/lib/operator.ml b/bindings/ocaml/lib/operator.ml index 94841a8b39fc..5c6f6e502bf5 100644 --- a/bindings/ocaml/lib/operator.ml +++ b/bindings/ocaml/lib/operator.ml @@ -32,16 +32,7 @@ let remove = Opendal_core.Operator.blocking_remove let remove_all = Opendal_core.Operator.blocking_remove_all module Reader = struct - let read = Opendal_core.Operator.reader_read - - let seek reader pos mode = - let inner_pos = - match mode with - | Unix.SEEK_CUR -> Opendal_core.Seek_from.Current pos - | Unix.SEEK_END -> Opendal_core.Seek_from.End pos - | Unix.SEEK_SET -> Opendal_core.Seek_from.Start pos - in - Opendal_core.Operator.reader_seek reader inner_pos + let pread = Opendal_core.Operator.reader_pread end module Metadata = struct diff --git a/bindings/ocaml/lib/operator.mli b/bindings/ocaml/lib/operator.mli index 2403dadd5f8b..a7a31a6c4f4a 100644 --- a/bindings/ocaml/lib/operator.mli +++ b/bindings/ocaml/lib/operator.mli @@ -22,7 +22,7 @@ val new_operator : (string * string) list -> (Opendal_core.Operator.operator, string) result (** [new_operator scheme config_map] Create a new block operator from given scheme and config_map. - + @param scheme Supported services, for details, refer to https://opendal.apache.org/docs/category/services/ @param config_map Configuration information required by the target service @return The block operator @@ -38,7 +38,7 @@ val stat : string -> (Opendal_core.Operator.metadata, string) result (** [is_exist operator path] Get current path's metadata **without cache** directly. - + @param operator The operator @param path want to stat @return metadata @@ -46,7 +46,7 @@ val stat : val is_exist : Opendal_core.Operator.operator -> string -> (bool, string) result (** [is_exist operator path] Check if this path exists or not. - + @param operator The operator @param path want to check @return is exists @@ -55,15 +55,15 @@ val is_exist : Opendal_core.Operator.operator -> string -> (bool, string) result val create_dir : Opendal_core.Operator.operator -> string -> (bool, string) result (** [create_dir operator path] Create a dir at given path. - + # Notes - + To indicate that a path is a directory, it is compulsory to include a trailing / in the path. Failure to do so may result in `NotADirectory` error being returned by OpenDAL. - + # Behavior - + - Create on existing dir will succeed. - Create dir is always recursive, works like `mkdir -p` @param operator The operator @@ -73,7 +73,7 @@ val create_dir : val read : Opendal_core.Operator.operator -> string -> (char array, string) result (** [read operator path] Read the whole path into a bytes. - + @param operator The operator @param path want to read @return data of path @@ -84,7 +84,7 @@ val reader : string -> (Opendal_core.Operator.reader, string) result (** [read operator path] Create a new reader which can read the whole path. - + @param operator The operator @param path want to read @return reader @@ -146,15 +146,9 @@ val remove_all : *) module Reader : sig - val read : Opendal_core.Operator.reader -> bytes -> (int, string) result + val pread : + Opendal_core.Operator.reader -> bytes -> int64 -> (int, string) result (** [read reader buf] Read data to [buf] and return data size.*) - - val seek : - Opendal_core.Operator.reader -> - int64 -> - Unix.seek_command -> - (int64, string) result - (** [seek reader pos mode] is a function that seeks data to the given position [pos].*) end module Metadata : sig diff --git a/bindings/ocaml/src/operator.ml b/bindings/ocaml/src/operator.ml index 4c6e012dc5e3..91d8a16e36cf 100644 --- a/bindings/ocaml/src/operator.ml +++ b/bindings/ocaml/src/operator.ml @@ -44,5 +44,4 @@ external blocking_remove_all: operator -> string -> (unit, string) Result.t = " (* file: reader.rs *) -external reader_read: reader -> bytes -> (int, string) Result.t = "reader_read" -external reader_seek: reader -> Seek_from.seek_from -> (int64, string) Result.t = "reader_seek" +external reader_pread: reader -> bytes -> int64 -> (int, string) Result.t = "reader_pread" diff --git a/bindings/ocaml/src/operator.mli b/bindings/ocaml/src/operator.mli index 4c6e012dc5e3..91d8a16e36cf 100644 --- a/bindings/ocaml/src/operator.mli +++ b/bindings/ocaml/src/operator.mli @@ -44,5 +44,4 @@ external blocking_remove_all: operator -> string -> (unit, string) Result.t = " (* file: reader.rs *) -external reader_read: reader -> bytes -> (int, string) Result.t = "reader_read" -external reader_seek: reader -> Seek_from.seek_from -> (int64, string) Result.t = "reader_seek" +external reader_pread: reader -> bytes -> int64 -> (int, string) Result.t = "reader_pread" diff --git a/bindings/ocaml/src/operator/reader.rs b/bindings/ocaml/src/operator/reader.rs index 8ae7b4583b97..8cae9ca88ca4 100644 --- a/bindings/ocaml/src/operator/reader.rs +++ b/bindings/ocaml/src/operator/reader.rs @@ -15,20 +15,12 @@ // specific language governing permissions and limitations // under the License. -use std::io; - use super::*; #[ocaml::func] -#[ocaml::sig("reader -> bytes -> (int, string) Result.t ")] -pub fn reader_read(reader: &mut Reader, buf: &mut [u8]) -> Result { - let bs = map_res_error(reader.0.read(buf.len()))?; - buf[..bs.len()].copy_from_slice(&bs); - Ok(bs.len()) -} - -#[ocaml::func] -#[ocaml::sig("reader -> Seek_from.seek_from -> (int64, string) Result.t ")] -pub fn reader_seek(reader: &mut Reader, pos: seek_from::SeekFrom) -> Result { - map_res_error(reader.0.seek(io::SeekFrom::from(pos))) +#[ocaml::sig("reader -> bytes -> int64 -> (int, string) Result.t ")] +pub fn reader_pread(reader: &mut Reader, mut buf: &mut [u8], offset: u64) -> Result { + let size = buf.len(); + let n = map_res_error(reader.0.read(&mut buf, offset, size))?; + Ok(n) } diff --git a/bindings/ocaml/test/test.ml b/bindings/ocaml/test/test.ml index 35c23f988548..f9770d9aa0ab 100644 --- a/bindings/ocaml/test/test.ml +++ b/bindings/ocaml/test/test.ml @@ -56,10 +56,8 @@ let test_operator_reader test_ctxt = (test_check_result (Operator.write bo "tempfile" (Bytes.of_string "helloworld"))); let reader = Operator.reader bo "tempfile" |> test_check_result in - let s = Operator.Reader.seek reader 5L SEEK_CUR |> test_check_result in - assert_equal 5 (Int64.to_int s); let data = Bytes.create 5 in - let i = Operator.Reader.read reader data |> test_check_result in + let i = Operator.Reader.pread reader data 5L |> test_check_result in assert_equal 5 i; assert_equal "world" (Bytes.to_string data) diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 5d416c9cff79..b1fac6e2dd8a 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -161,6 +161,7 @@ opendal = { version = "0.45.1", path = "../../core", features = [ pyo3 = "0.20.1" pyo3-asyncio = { version = "0.20", features = ["tokio-runtime"] } tokio = "1" +bytes = "1.5.0" [target.'cfg(unix)'.dependencies.opendal] version = "0.45.1" diff --git a/bindings/python/python/opendal/__init__.pyi b/bindings/python/python/opendal/__init__.pyi index 79162f0d9ce6..abb136f3e2f5 100644 --- a/bindings/python/python/opendal/__init__.pyi +++ b/bindings/python/python/opendal/__init__.pyi @@ -19,11 +19,16 @@ from typing import AsyncIterable, Iterable, Optional from opendal.layers import Layer + class Operator: def __init__(self, scheme: str, **kwargs): ... + def layer(self, layer: Layer): ... + def open(self, path: str, mode: str) -> File: ... + def read(self, path: str) -> memoryview: ... + def write( self, path: str, @@ -34,22 +39,37 @@ class Operator: content_disposition: Optional[str] = None, cache_control: Optional[str] = None, ): ... + def stat(self, path: str) -> Metadata: ... + def create_dir(self, path: str): ... + def delete(self, path: str): ... + def list(self, path: str) -> Iterable[Entry]: ... + def scan(self, path: str) -> Iterable[Entry]: ... + def capability(self) -> Capability: ... + def copy(self, source: str, target: str): ... + def rename(self, source: str, target: str): ... + def remove_all(self, path: str): ... + def to_async_operator(self) -> AsyncOperator: ... + class AsyncOperator: def __init__(self, scheme: str, **kwargs): ... + def layer(self, layer: Layer): ... + async def open(self, path: str, mode: str) -> AsyncFile: ... + async def read(self, path: str) -> memoryview: ... + async def write( self, path: str, @@ -60,79 +80,116 @@ class AsyncOperator: content_disposition: Optional[str] = None, cache_control: Optional[str] = None, ): ... + async def stat(self, path: str) -> Metadata: ... + async def create_dir(self, path: str): ... + async def delete(self, path: str): ... + async def list(self, path: str) -> AsyncIterable[Entry]: ... + async def scan(self, path: str) -> AsyncIterable[Entry]: ... + async def presign_stat(self, path: str, expire_second: int) -> PresignedRequest: ... + async def presign_read(self, path: str, expire_second: int) -> PresignedRequest: ... + async def presign_write( self, path: str, expire_second: int ) -> PresignedRequest: ... + def capability(self) -> Capability: ... + async def copy(self, source: str, target: str): ... + async def rename(self, source: str, target: str): ... + async def remove_all(self, path: str): ... + def to_operator(self) -> Operator: ... + class File: def read(self, size: Optional[int] = None) -> memoryview: ... + def write(self, bs: bytes): ... + def seek(self, offset: int, whence: int = 0) -> int: ... + def tell(self) -> int: ... + def close(self): ... + def __enter__(self) -> File: ... + def __exit__(self, exc_type, exc_value, traceback) -> None: ... + class AsyncFile: async def read(self, size: Optional[int] = None) -> memoryview: ... + async def write(self, bs: bytes): ... + async def seek(self, offset: int, whence: int = 0) -> int: ... + async def tell(self) -> int: ... + async def close(self): ... + def __aenter__(self) -> AsyncFile: ... + def __aexit__(self, exc_type, exc_value, traceback) -> None: ... + class Entry: @property def path(self) -> str: ... + class Metadata: @property def content_disposition(self) -> Optional[str]: ... + @property def content_length(self) -> int: ... + @property def content_md5(self) -> Optional[str]: ... + @property def content_type(self) -> Optional[str]: ... + @property def etag(self) -> Optional[str]: ... + @property def mode(self) -> EntryMode: ... + class EntryMode: def is_file(self) -> bool: ... + def is_dir(self) -> bool: ... + class PresignedRequest: @property def url(self) -> str: ... + @property def method(self) -> str: ... + @property def headers(self) -> dict[str, str]: ... + class Capability: stat: bool stat_with_if_match: bool stat_with_if_none_match: bool read: bool - read_can_seek: bool - read_can_next: bool - read_with_range: bool read_with_if_match: bool read_with_if_none_match: bool read_with_override_cache_control: bool diff --git a/bindings/python/src/capability.rs b/bindings/python/src/capability.rs index 723949272dca..bcd98b32c110 100644 --- a/bindings/python/src/capability.rs +++ b/bindings/python/src/capability.rs @@ -30,12 +30,6 @@ pub struct Capability { /// If operator supports read. pub read: bool, - /// If operator supports seek on returning reader. - pub read_can_seek: bool, - /// If operator supports next on returning reader. - pub read_can_next: bool, - /// If operator supports read with range. - pub read_with_range: bool, /// If operator supports read with if match. pub read_with_if_match: bool, /// If operator supports read with if none match. @@ -126,9 +120,6 @@ impl Capability { stat_with_if_match: capability.stat_with_if_match, stat_with_if_none_match: capability.stat_with_if_none_match, read: capability.read, - read_can_seek: capability.read_can_seek, - read_can_next: capability.read_can_next, - read_with_range: capability.read_with_range, read_with_if_match: capability.read_with_if_match, read_with_if_none_match: capability.read_with_if_none_match, read_with_override_cache_control: capability.read_with_override_cache_control, diff --git a/bindings/python/src/file.rs b/bindings/python/src/file.rs index 3847eddb385c..a72719ec76eb 100644 --- a/bindings/python/src/file.rs +++ b/bindings/python/src/file.rs @@ -18,14 +18,15 @@ // Remove this `allow` after fixed. #![allow(clippy::unnecessary_fallible_conversions)] +use std::io::Read; use std::io::Seek; use std::io::SeekFrom; use std::io::Write; use std::ops::DerefMut; use std::sync::Arc; -use futures::AsyncSeekExt; use futures::AsyncWriteExt; +use futures::{AsyncReadExt, AsyncSeekExt}; use pyo3::exceptions::PyIOError; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -40,14 +41,14 @@ use crate::*; pub struct File(FileState); enum FileState { - Reader(ocore::BlockingReader), + Reader(ocore::StdIoReader), Writer(ocore::BlockingWriter), Closed, } impl File { - pub fn new_reader(reader: ocore::BlockingReader) -> Self { - Self(FileState::Reader(reader)) + pub fn new_reader(reader: ocore::BlockingReader, size: u64) -> Self { + Self(FileState::Reader(reader.into_std_io_read(0..size))) } pub fn new_writer(writer: ocore::BlockingWriter) -> Self { @@ -76,10 +77,12 @@ impl File { let buffer = match size { Some(size) => { - let bs = reader - .read(size) + let mut bs = vec![0; size]; + let n = reader + .read(&mut bs) .map_err(|err| PyIOError::new_err(err.to_string()))?; - bs.to_vec() + bs.truncate(n); + bs } None => { let mut buffer = Vec::new(); @@ -202,14 +205,16 @@ impl File { pub struct AsyncFile(Arc>); enum AsyncFileState { - Reader(ocore::Reader), + Reader(ocore::FuturesIoAsyncReader), Writer(ocore::Writer), Closed, } impl AsyncFile { - pub fn new_reader(reader: ocore::Reader) -> Self { - Self(Arc::new(Mutex::new(AsyncFileState::Reader(reader)))) + pub fn new_reader(reader: ocore::Reader, size: u64) -> Self { + Self(Arc::new(Mutex::new(AsyncFileState::Reader( + reader.into_futures_io_async_read(0..size), + )))) } pub fn new_writer(writer: ocore::Writer) -> Self { @@ -241,11 +246,14 @@ impl AsyncFile { let buffer = match size { Some(size) => { - let buffer = reader - .read(size) + // TODO: optimize here by using uninit slice. + let mut bs = vec![0; size]; + let n = reader + .read(&mut bs) .await .map_err(|err| PyIOError::new_err(err.to_string()))?; - buffer.to_vec() + bs.truncate(n); + bs } None => { let mut buffer = Vec::new(); diff --git a/bindings/python/src/operator.rs b/bindings/python/src/operator.rs index e5194886e38a..32e82cdd6c54 100644 --- a/bindings/python/src/operator.rs +++ b/bindings/python/src/operator.rs @@ -80,8 +80,9 @@ impl Operator { let this = self.0.clone(); if mode == "rb" { + let meta = this.stat(&path).map_err(format_pyerr)?; let r = this.reader(&path).map_err(format_pyerr)?; - Ok(File::new_reader(r)) + Ok(File::new_reader(r, meta.content_length())) } else if mode == "wb" { let w = this.writer(&path).map_err(format_pyerr)?; Ok(File::new_writer(w)) @@ -243,8 +244,9 @@ impl AsyncOperator { future_into_py(py, async move { if mode == "rb" { + let meta = this.stat(&path).await.map_err(format_pyerr)?; let r = this.reader(&path).await.map_err(format_pyerr)?; - Ok(AsyncFile::new_reader(r)) + Ok(AsyncFile::new_reader(r, meta.content_length())) } else if mode == "wb" { let w = this.writer(&path).await.map_err(format_pyerr)?; Ok(AsyncFile::new_writer(w)) diff --git a/core/Cargo.lock b/core/Cargo.lock index 60f55fda267f..b602bb8e4003 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -208,16 +208,6 @@ dependencies = [ "term", ] -[[package]] -name = "assert-json-diff" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "async-backtrace" version = "0.2.6" @@ -2042,25 +2032,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" -[[package]] -name = "deadpool" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "421fe0f90f2ab22016f32a9881be5134fdd71c65298917084b0c7477cbc3856e" -dependencies = [ - "async-trait", - "deadpool-runtime", - "num_cpus", - "retain_mut", - "tokio", -] - -[[package]] -name = "deadpool-runtime" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63dfa964fe2a66f3fde91fc70b267fe193d822c7e603e2a675a49a7f46ad3f49" - [[package]] name = "der" version = "0.6.1" @@ -3157,9 +3128,9 @@ dependencies = [ [[package]] name = "hdrs" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00a7b465f2d12e45db2af56af13a1cbfe1d5616d54355f5610b26c0961dec7b7" +checksum = "f7c42a693bfe5dc8fcad1f24044c5ec355c5f157b8ce63c7d62f51cecbc7878d" dependencies = [ "blocking", "errno", @@ -3274,27 +3245,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-types" -version = "2.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" -dependencies = [ - "anyhow", - "async-channel 1.9.0", - "base64 0.13.1", - "futures-lite 1.13.0", - "http 0.2.11", - "infer", - "pin-project-lite", - "rand 0.7.3", - "serde", - "serde_json", - "serde_qs", - "serde_urlencoded", - "url", -] - [[package]] name = "httparse" version = "1.8.0" @@ -3443,12 +3393,6 @@ version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c40411d0e5c63ef1323c3d09ce5ec6d84d71531e18daed0743fccea279d7deb6" -[[package]] -name = "infer" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" - [[package]] name = "inout" version = "0.1.3" @@ -4607,7 +4551,6 @@ dependencies = [ "tracing", "tracing-subscriber", "uuid", - "wiremock", ] [[package]] @@ -6002,12 +5945,6 @@ dependencies = [ "quick-error", ] -[[package]] -name = "retain_mut" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" - [[package]] name = "revision" version = "0.5.0" @@ -6580,17 +6517,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_qs" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" -dependencies = [ - "percent-encoding", - "serde", - "thiserror", -] - [[package]] name = "serde_spanned" version = "0.6.5" @@ -8035,7 +7961,6 @@ dependencies = [ "form_urlencoded", "idna 0.5.0", "percent-encoding", - "serde", ] [[package]] @@ -8635,28 +8560,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "wiremock" -version = "0.5.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13a3a53eaf34f390dd30d7b1b078287dd05df2aa2e21a589ccb80f5c7253c2e9" -dependencies = [ - "assert-json-diff", - "async-trait", - "base64 0.21.7", - "deadpool", - "futures", - "futures-timer", - "http-types", - "hyper", - "log", - "once_cell", - "regex", - "serde", - "serde_json", - "tokio", -] - [[package]] name = "ws_stream_wasm" version = "0.7.4" diff --git a/core/Cargo.toml b/core/Cargo.toml index 696288e634c4..b40282fceb8b 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -104,7 +104,11 @@ services-azdls = [ "reqsign?/services-azblob", "reqsign?/reqwest_request", ] -services-azfile = [] +services-azfile = [ + "dep:reqsign", + "reqsign?/services-azblob", + "reqsign?/reqwest_request", +] services-b2 = [] services-cacache = ["dep:cacache"] services-chainsafe = [] @@ -169,7 +173,7 @@ services-s3 = [ "reqsign?/reqwest_request", ] services-seafile = [] -services-sftp = ["dep:openssh", "dep:openssh-sftp-client"] +services-sftp = ["dep:openssh", "dep:openssh-sftp-client", "dep:bb8"] services-sled = ["dep:sled", "internal-tokio-rt"] services-sqlite = ["dep:rusqlite", "dep:r2d2", "internal-tokio-rt"] services-supabase = [] @@ -270,7 +274,7 @@ foundationdb = { version = "0.8.0", features = [ "embedded-fdb-include", ], optional = true } # for services-hdfs -hdrs = { version = "0.3.0", optional = true, features = ["async_file"] } +hdrs = { version = "0.3.2", optional = true, features = ["async_file"] } # for services-upyun hmac = { version = "0.12.1", optional = true } # for services-libsql @@ -364,4 +368,3 @@ tracing-subscriber = { version = "0.3", features = [ "env-filter", "tracing-log", ] } -wiremock = "0.5" diff --git a/core/benches/ops/read.rs b/core/benches/ops/read.rs index 401e72ff7b80..84a1e9b48cf3 100644 --- a/core/benches/ops/read.rs +++ b/core/benches/ops/read.rs @@ -28,7 +28,6 @@ use super::utils::*; pub fn bench(c: &mut Criterion) { if let Some(op) = init_test_service().unwrap() { bench_read_full(c, op.info().scheme().into_static(), op.clone()); - bench_read_part(c, op.info().scheme().into_static(), op.clone()); bench_read_parallel(c, op.info().scheme().into_static(), op.clone()); } } @@ -51,47 +50,14 @@ fn bench_read_full(c: &mut Criterion, name: &str, op: Operator) { group.throughput(criterion::Throughput::Bytes(size.bytes() as u64)); group.bench_with_input(size.to_string(), &(op.clone(), &path), |b, (op, path)| { b.to_async(&*TEST_RUNTIME).iter(|| async { - let r = op - .reader_with(path) - .range(0..=size.bytes() as u64) - .await - .unwrap(); - io::copy(r, &mut io::sink()).await.unwrap(); - }) - }); - - drop(temp_data); - } - - group.finish() -} + let r = op.reader_with(path).await.unwrap(); -/// Read from 1/4 to 3/4 and than drop the reader without consuming all data; -fn bench_read_part(c: &mut Criterion, name: &str, op: Operator) { - let mut group = c.benchmark_group(format!("service_{name}_read_part")); - - let mut rng = thread_rng(); - - for size in [ - Size::from_kibibytes(4), - Size::from_kibibytes(256), - Size::from_mebibytes(4), - Size::from_mebibytes(16), - ] { - let content = gen_bytes(&mut rng, (size.bytes() * 2) as usize); - let path = uuid::Uuid::new_v4().to_string(); - let offset = (size.bytes() / 2) as u64; - let temp_data = TempData::generate(op.clone(), &path, content.clone()); - - group.throughput(criterion::Throughput::Bytes(size.bytes() as u64)); - group.bench_with_input(size.to_string(), &(op.clone(), &path), |b, (op, path)| { - b.to_async(&*TEST_RUNTIME).iter(|| async { - let r = op.reader_with(path).range(offset..).await.unwrap(); + let r = r.into_futures_io_async_read(0..size.bytes() as u64); io::copy(r, &mut io::sink()).await.unwrap(); }) }); - std::mem::drop(temp_data); + drop(temp_data); } group.finish() @@ -123,12 +89,11 @@ fn bench_read_parallel(c: &mut Criterion, name: &str, op: Operator) { b.to_async(&*TEST_RUNTIME).iter(|| async { let futures = (0..parallel) .map(|_| async { - let mut r = op - .reader_with(path) - .range(offset..=offset + size.bytes() as u64) + let mut buf = Vec::with_capacity(*buf_size); + let r = op.reader_with(path).await.unwrap(); + r.read_range(&mut buf, offset..=offset + size.bytes() as u64) .await .unwrap(); - r.read_exact(*buf_size).await.unwrap(); let mut d = 0; // mock same little cpu work diff --git a/core/benches/vs_s3/src/main.rs b/core/benches/vs_s3/src/main.rs index b8fd9c3fb857..dd28226257df 100644 --- a/core/benches/vs_s3/src/main.rs +++ b/core/benches/vs_s3/src/main.rs @@ -73,7 +73,7 @@ fn bench_read(c: &mut Criterion, op: Operator, s3_client: aws_sdk_s3::Client, bu group.bench_function("opendal_s3_reader", |b| { b.to_async(&*TEST_RUNTIME).iter(|| async { - let mut r = op.reader("file").await.unwrap(); + let r = op.reader("file").await.unwrap(); let mut bs = Vec::new(); let _ = r.read_to_end(&mut bs).await.unwrap(); }); @@ -96,7 +96,7 @@ fn bench_read(c: &mut Criterion, op: Operator, s3_client: aws_sdk_s3::Client, bu group.bench_function("opendal_s3_reader_with_capacity", |b| { b.to_async(&*TEST_RUNTIME).iter(|| async { - let mut r = op.reader("file").await.unwrap(); + let r = op.reader("file").await.unwrap(); let mut bs = Vec::with_capacity(16 * 1024 * 1024); let _ = r.read_to_end(&mut bs).await.unwrap(); }); diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index c18a348cef84..3c91d01e8aaf 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -19,7 +19,6 @@ use std::fmt::Debug; use std::fmt::Formatter; -use std::io::SeekFrom; use libfuzzer_sys::arbitrary::Arbitrary; use libfuzzer_sys::arbitrary::Unstructured; @@ -28,10 +27,8 @@ use opendal::raw::tests::init_test_service; use opendal::raw::tests::ReadAction; use opendal::raw::tests::ReadChecker; use opendal::raw::tests::TEST_RUNTIME; -use opendal::raw::BytesRange; use opendal::Operator; use opendal::Result; -use tracing::warn; const MAX_DATA_SIZE: usize = 16 * 1024 * 1024; @@ -39,8 +36,6 @@ const MAX_DATA_SIZE: usize = 16 * 1024 * 1024; struct FuzzInput { path: String, size: usize, - range: BytesRange, - buffer: Option, actions: Vec, } @@ -48,14 +43,12 @@ impl Debug for FuzzInput { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let mut actions = self.actions.clone(); // Remove all Read(0) entry. - let empty = ReadAction::Read(0); + let empty = ReadAction::Read(0, 0); actions.retain(|e| e != &empty); f.debug_struct("FuzzInput") .field("path", &self.path) .field("size", &self.size) - .field("range", &self.range.to_string()) - .field("buffer", &self.buffer) .field("actions", &actions) .finish() } @@ -65,85 +58,29 @@ impl Arbitrary<'_> for FuzzInput { fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { let total_size = u.int_in_range(1..=MAX_DATA_SIZE)?; - // TODO: it's valid that size is larger than total_size. - let (offset, size) = match u.int_in_range(0..=3)? { - // Full range - 0 => (None, None), - 1 => { - let offset = u.int_in_range(0..=total_size as u64 - 1)?; - (Some(offset), None) - } - 2 => { - let size = u.int_in_range(1..=total_size as u64)?; - (None, Some(size)) - } - 3 => { - let offset = u.int_in_range(0..=total_size as u64 - 1)?; - let size = u.int_in_range(1..=total_size as u64 - offset)?; - (Some(offset), Some(size)) - } - _ => unreachable!("invalid int generated by arbitrary"), - }; - let range = BytesRange::new(offset, size); - - let buffer = if u.int_in_range(0..=1)? == 1 { - Some(u.int_in_range(1..=8 * 1024 * 1024)?) - } else { - None - }; - let count = u.int_in_range(1..=1024)?; let mut actions = vec![]; for _ in 0..count { - let action = match u.int_in_range(0..=3)? { - // Read - 0 => { - let size = u.int_in_range(0..=total_size * 2)?; - ReadAction::Read(size) - } - // Seek Start - 1 => { - // NOTE: seek out of the end of file is valid. - let offset = u.int_in_range(0..=total_size * 2)?; - ReadAction::Seek(SeekFrom::Start(offset as u64)) - } - // Seek Current - 2 => { - let offset = u.int_in_range(-(total_size as i64)..=(total_size as i64))?; - ReadAction::Seek(SeekFrom::Current(offset)) - } - // Seek End - 3 => { - let offset = u.int_in_range(-(total_size as i64)..=(total_size as i64))?; - ReadAction::Seek(SeekFrom::End(offset)) - } - _ => unreachable!("invalid int generated by arbitrary"), - }; - - actions.push(action); + let offset = u.int_in_range(0..=total_size)?; + let size = u.int_in_range(0..=total_size * 2)?; + + actions.push(ReadAction::Read(offset, size)); } Ok(FuzzInput { path: uuid::Uuid::new_v4().to_string(), size: total_size, - range, - buffer, actions, }) } } async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { - let mut checker = ReadChecker::new(input.size, input.range); + let mut checker = ReadChecker::new(input.size); op.write(&input.path, checker.data()).await?; - let mut r = op.reader_with(&input.path); - r = r.range(input.range.to_range()); - if let Some(buffer) = input.buffer { - r = r.buffer(buffer); - } - let r = r.await?; + let r = op.reader(&input.path).await?; checker.check(r, &input.actions).await; @@ -160,11 +97,6 @@ fuzz_target!(|input: FuzzInput| { let op = init_test_service().expect("operator init must succeed"); if let Some(op) = op { - if !op.info().full_capability().read_with_range { - warn!("service doesn't support read with range, skip fuzzing"); - return; - } - TEST_RUNTIME.block_on(async { fuzz_reader(op, input.clone()) .await diff --git a/core/src/layers/blocking.rs b/core/src/layers/blocking.rs index bc6ff054820b..37cc798109a2 100644 --- a/core/src/layers/blocking.rs +++ b/core/src/layers/blocking.rs @@ -18,7 +18,6 @@ use async_trait::async_trait; use bytes; use bytes::Bytes; - use tokio::runtime::Handle; use crate::raw::*; @@ -288,12 +287,8 @@ impl BlockingWrapper { } impl oio::BlockingRead for BlockingWrapper { - fn read(&mut self, limit: usize) -> Result { - self.handle.block_on(self.inner.read(limit)) - } - - fn seek(&mut self, pos: std::io::SeekFrom) -> Result { - self.handle.block_on(self.inner.seek(pos)) + fn read_at(&self, offset: u64, limit: usize) -> Result { + self.handle.block_on(self.inner.read_at(offset, limit)) } } diff --git a/core/src/layers/chaos.rs b/core/src/layers/chaos.rs index 69c16e8adf70..063bc3f9f6bf 100644 --- a/core/src/layers/chaos.rs +++ b/core/src/layers/chaos.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::io; +use std::sync::Arc; +use std::sync::Mutex; use async_trait::async_trait; -use bytes::Bytes; use futures::FutureExt; use rand::prelude::*; use rand::rngs::StdRng; @@ -145,7 +145,7 @@ impl LayeredAccessor for ChaosAccessor { /// ChaosReader will inject error into read operations. pub struct ChaosReader { inner: R, - rng: StdRng, + rng: Arc>, error_ratio: f64, } @@ -154,15 +154,15 @@ impl ChaosReader { fn new(inner: R, rng: StdRng, error_ratio: f64) -> Self { Self { inner, - rng, + rng: Arc::new(Mutex::new(rng)), error_ratio, } } /// If I feel lucky, we can return the correct response. Otherwise, /// we need to generate an error. - fn i_feel_lucky(&mut self) -> bool { - let point = self.rng.gen_range(0..=100); + fn i_feel_lucky(&self) -> bool { + let point = self.rng.lock().unwrap().gen_range(0..=100); point >= (self.error_ratio * 100.0) as i32 } @@ -174,17 +174,9 @@ impl ChaosReader { } impl oio::Read for ChaosReader { - async fn read(&mut self, limit: usize) -> Result { - if self.i_feel_lucky() { - self.inner.read(limit).await - } else { - Err(Self::unexpected_eof()) - } - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { if self.i_feel_lucky() { - self.inner.seek(pos).await + self.inner.read_at(offset, limit).await } else { Err(Self::unexpected_eof()) } @@ -192,17 +184,9 @@ impl oio::Read for ChaosReader { } impl oio::BlockingRead for ChaosReader { - fn read(&mut self, limit: usize) -> Result { - if self.i_feel_lucky() { - self.inner.read(limit) - } else { - Err(Self::unexpected_eof()) - } - } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { if self.i_feel_lucky() { - self.inner.seek(pos) + self.inner.read_at(offset, limit) } else { Err(Self::unexpected_eof()) } diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index d4985a0f5235..b694c6a6258b 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -18,19 +18,13 @@ use std::cmp; use std::fmt::Debug; use std::fmt::Formatter; - use std::sync::Arc; use async_trait::async_trait; use bytes::Bytes; -use crate::raw::oio::BufferReader; -use crate::raw::oio::FileReader; use crate::raw::oio::FlatLister; -use crate::raw::oio::LazyReader; use crate::raw::oio::PrefixLister; -use crate::raw::oio::RangeReader; -use crate::raw::oio::StreamableReader; use crate::raw::TwoWays; use crate::raw::*; use crate::*; @@ -280,92 +274,6 @@ impl CompleteAccessor { }) } - async fn complete_read( - &self, - path: &str, - args: OpRead, - ) -> Result<(RpRead, CompleteReader)> { - let capability = self.meta.native_capability(); - if !capability.read { - return Err(self.new_unsupported_error(Operation::Read)); - } - - let seekable = capability.read_can_seek; - let streamable = capability.read_can_next; - let buffer_cap = args.buffer(); - - let r = match (seekable, streamable) { - (true, true) => { - let r = LazyReader::new(self.inner.clone(), path, args); - InnerCompleteReader::One(r) - } - (true, false) => { - let r = FileReader::new(self.inner.clone(), path, args); - InnerCompleteReader::Two(r) - } - _ => { - let r = RangeReader::new(self.inner.clone(), path, args); - - if streamable { - InnerCompleteReader::Three(r) - } else { - let r = oio::into_streamable_read(r, 256 * 1024); - InnerCompleteReader::Four(r) - } - } - }; - - let r = match buffer_cap { - None => CompleteReader::One(r), - Some(cap) => CompleteReader::Two(BufferReader::new(r, cap)), - }; - - Ok((RpRead::new(), r)) - } - - fn complete_blocking_read( - &self, - path: &str, - args: OpRead, - ) -> Result<(RpRead, CompleteReader)> { - let capability = self.meta.full_capability(); - if !capability.read || !capability.blocking { - return Err(self.new_unsupported_error(Operation::BlockingRead)); - } - - let seekable = capability.read_can_seek; - let streamable = capability.read_can_next; - let buffer_cap = args.buffer(); - - let r = match (seekable, streamable) { - (true, true) => { - let r = LazyReader::new(self.inner.clone(), path, args); - InnerCompleteReader::One(r) - } - (true, false) => { - let r = FileReader::new(self.inner.clone(), path, args); - InnerCompleteReader::Two(r) - } - _ => { - let r = RangeReader::new(self.inner.clone(), path, args); - - if streamable { - InnerCompleteReader::Three(r) - } else { - let r = oio::into_streamable_read(r, 256 * 1024); - InnerCompleteReader::Four(r) - } - } - }; - - let r = match buffer_cap { - None => CompleteReader::One(r), - Some(cap) => CompleteReader::Two(BufferReader::new(r, cap)), - }; - - Ok((RpRead::new(), r)) - } - async fn complete_list( &self, path: &str, @@ -467,8 +375,8 @@ impl CompleteAccessor { #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl LayeredAccessor for CompleteAccessor { type Inner = A; - type Reader = CompleteReader; - type BlockingReader = CompleteReader; + type Reader = CompleteReader; + type BlockingReader = CompleteReader; type Writer = TwoWays, oio::ExactBufWriter>>; type BlockingWriter = CompleteWriter; @@ -482,10 +390,6 @@ impl LayeredAccessor for CompleteAccessor { fn metadata(&self) -> AccessorInfo { let mut meta = self.meta.clone(); let cap = meta.full_capability_mut(); - if cap.read { - cap.read_can_next = true; - cap.read_can_seek = true; - } if cap.list && cap.write_can_empty { cap.create_dir = true; } @@ -497,7 +401,14 @@ impl LayeredAccessor for CompleteAccessor { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - self.complete_read(path, args).await + let capability = self.meta.full_capability(); + if !capability.read { + return Err(self.new_unsupported_error(Operation::Read)); + } + self.inner + .read(path, args) + .await + .map(|(rp, r)| (rp, CompleteReader(r))) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -606,7 +517,13 @@ impl LayeredAccessor for CompleteAccessor { } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - self.complete_blocking_read(path, args) + let capability = self.meta.full_capability(); + if !capability.read || !capability.blocking { + return Err(self.new_unsupported_error(Operation::Read)); + } + self.inner + .blocking_read(path, args) + .map(|(rp, r)| (rp, CompleteReader(r))) } fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { @@ -671,19 +588,31 @@ impl LayeredAccessor for CompleteAccessor { } } -pub type CompleteReader = - TwoWays, BufferReader>>; - -type InnerCompleteReader = FourWays< - LazyReader, - FileReader, - RangeReader, - StreamableReader>, ->; - pub type CompleteLister = FourWays, P>, PrefixLister

, PrefixLister, P>>>; +pub struct CompleteReader(R); + +impl oio::Read for CompleteReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + if limit == 0 { + return Ok(oio::Buffer::new()); + } + + self.0.read_at(offset, limit).await + } +} + +impl oio::BlockingRead for CompleteReader { + fn read_at(&self, offset: u64, limit: usize) -> Result { + if limit == 0 { + return Ok(oio::Buffer::new()); + } + + self.0.read_at(offset, limit) + } +} + pub struct CompleteWriter { inner: Option, } @@ -806,7 +735,7 @@ mod tests { } async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { - Ok((RpRead::new(), Box::new(oio::Cursor::new()))) + Ok((RpRead::new(), Box::new(Bytes::new()))) } async fn write(&self, _: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/layers/concurrent_limit.rs b/core/src/layers/concurrent_limit.rs index 992042ef0ae4..44d968024f35 100644 --- a/core/src/layers/concurrent_limit.rs +++ b/core/src/layers/concurrent_limit.rs @@ -16,8 +16,6 @@ // under the License. use std::fmt::Debug; - -use std::io::SeekFrom; use std::sync::Arc; use async_trait::async_trait; @@ -25,6 +23,7 @@ use bytes::Bytes; use tokio::sync::OwnedSemaphorePermit; use tokio::sync::Semaphore; +use crate::raw::oio::Buffer; use crate::raw::*; use crate::*; @@ -256,22 +255,14 @@ impl ConcurrentLimitWrapper { } impl oio::Read for ConcurrentLimitWrapper { - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).await - } - - async fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit).await + async fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit).await } } impl oio::BlockingRead for ConcurrentLimitWrapper { - fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos) + fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit) } } diff --git a/core/src/layers/dtrace.rs b/core/src/layers/dtrace.rs index 295443cd49b8..5787f845a481 100644 --- a/core/src/layers/dtrace.rs +++ b/core/src/layers/dtrace.rs @@ -19,9 +19,8 @@ use std::ffi::CString; use std::fmt::Debug; use std::fmt::Formatter; -use std::io; - use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use probe::probe_lazy; @@ -344,12 +343,12 @@ impl DtraceLayerWrapper { } impl oio::Read for DtraceLayerWrapper { - async fn read(&mut self, limit: usize) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { let c_path = CString::new(self.path.clone()).unwrap(); probe_lazy!(opendal, reader_read_start, c_path.as_ptr()); - match self.inner.read(limit).await { + match self.inner.read_at(offset, limit).await { Ok(bs) => { - probe_lazy!(opendal, reader_read_ok, c_path.as_ptr(), bs.len()); + probe_lazy!(opendal, reader_read_ok, c_path.as_ptr(), bs.remaining()); Ok(bs) } Err(e) => { @@ -358,31 +357,21 @@ impl oio::Read for DtraceLayerWrapper { } } } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - let c_path = CString::new(self.path.clone()).unwrap(); - probe_lazy!(opendal, reader_seek_start, c_path.as_ptr()); - match self.inner.seek(pos).await { - Ok(n) => { - probe_lazy!(opendal, reader_seek_ok, c_path.as_ptr(), n); - Ok(n) - } - Err(e) => { - probe_lazy!(opendal, reader_seek_error, c_path.as_ptr()); - Err(e) - } - } - } } impl oio::BlockingRead for DtraceLayerWrapper { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { let c_path = CString::new(self.path.clone()).unwrap(); probe_lazy!(opendal, blocking_reader_read_start, c_path.as_ptr()); self.inner - .read(limit) + .read_at(offset, limit) .map(|bs| { - probe_lazy!(opendal, blocking_reader_read_ok, c_path.as_ptr(), bs.len()); + probe_lazy!( + opendal, + blocking_reader_read_ok, + c_path.as_ptr(), + bs.remaining() + ); bs }) .map_err(|e| { @@ -390,21 +379,6 @@ impl oio::BlockingRead for DtraceLayerWrapper { e }) } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - let c_path = CString::new(self.path.clone()).unwrap(); - probe_lazy!(opendal, blocking_reader_seek_start, c_path.as_ptr()); - self.inner - .seek(pos) - .map(|res| { - probe_lazy!(opendal, blocking_reader_seek_ok, c_path.as_ptr(), res); - res - }) - .map_err(|e| { - probe_lazy!(opendal, blocking_reader_seek_error, c_path.as_ptr()); - e - }) - } } impl oio::Write for DtraceLayerWrapper { diff --git a/core/src/layers/error_context.rs b/core/src/layers/error_context.rs index de37bc690b28..f57383c91296 100644 --- a/core/src/layers/error_context.rs +++ b/core/src/layers/error_context.rs @@ -18,8 +18,6 @@ use std::fmt::Debug; use std::fmt::Formatter; -use std::io::SeekFrom; - use async_trait::async_trait; use bytes::Bytes; use futures::TryFutureExt; @@ -93,8 +91,6 @@ impl LayeredAccessor for ErrorContextAccessor { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let br = args.range(); - self.inner .read(path, args) .map_ok(|(rp, r)| { @@ -111,7 +107,6 @@ impl LayeredAccessor for ErrorContextAccessor { err.with_operation(Operation::Read) .with_context("service", self.meta.scheme()) .with_context("path", path) - .with_context("range", br.to_string()) }) .await } @@ -348,41 +343,27 @@ pub struct ErrorContextWrapper { } impl oio::Read for ErrorContextWrapper { - async fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit).await.map_err(|err| { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit).await.map_err(|err| { err.with_operation(ReadOperation::Read) .with_context("service", self.scheme) .with_context("path", &self.path) - }) - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).await.map_err(|err| { - err.with_operation(ReadOperation::Seek) - .with_context("service", self.scheme) - .with_context("path", &self.path) + .with_context("offset", offset.to_string()) + .with_context("limit", limit.to_string()) }) } } impl oio::BlockingRead for ErrorContextWrapper { - fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit).map_err(|err| { + fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit).map_err(|err| { err.with_operation(ReadOperation::BlockingRead) .with_context("service", self.scheme) .with_context("path", &self.path) + .with_context("offset", offset.to_string()) .with_context("limit", limit.to_string()) }) } - - fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).map_err(|err| { - err.with_operation(ReadOperation::BlockingSeek) - .with_context("service", self.scheme) - .with_context("path", &self.path) - .with_context("seek", format!("{pos:?}")) - }) - } } impl oio::Write for ErrorContextWrapper { diff --git a/core/src/layers/immutable_index.rs b/core/src/layers/immutable_index.rs index a6eb054063ae..1b64e820ca3b 100644 --- a/core/src/layers/immutable_index.rs +++ b/core/src/layers/immutable_index.rs @@ -17,7 +17,6 @@ use std::collections::HashSet; use std::fmt::Debug; - use std::vec::IntoIter; use async_trait::async_trait; @@ -244,6 +243,7 @@ impl oio::BlockingList for ImmutableDir { } #[cfg(test)] +#[cfg(feature = "services-http")] mod tests { use std::collections::HashMap; use std::collections::HashSet; diff --git a/core/src/layers/logging.rs b/core/src/layers/logging.rs index 2f9265dbae24..c38999db7625 100644 --- a/core/src/layers/logging.rs +++ b/core/src/layers/logging.rs @@ -16,10 +16,11 @@ // under the License. use std::fmt::Debug; - -use std::io; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use futures::FutureExt; use futures::TryFutureExt; @@ -284,26 +285,22 @@ impl LayeredAccessor for LoggingAccessor { async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { debug!( target: LOGGING_TARGET, - "service={} operation={} path={} range={} -> started", + "service={} operation={} path={} -> started", self.ctx.scheme, Operation::Read, path, - args.range() ); - let range = args.range(); - self.inner .read(path, args) .await .map(|(rp, r)| { debug!( target: LOGGING_TARGET, - "service={} operation={} path={} range={} -> got reader", + "service={} operation={} path={} -> got reader", self.ctx.scheme, Operation::Read, path, - range ); ( rp, @@ -315,11 +312,10 @@ impl LayeredAccessor for LoggingAccessor { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} range={} -> {}", + "service={} operation={} path={} -> {}", self.ctx.scheme, Operation::Read, path, - range, self.ctx.error_print(&err) ) } @@ -683,11 +679,10 @@ impl LayeredAccessor for LoggingAccessor { fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { debug!( target: LOGGING_TARGET, - "service={} operation={} path={} range={} -> started", + "service={} operation={} path={} -> started", self.ctx.scheme, Operation::BlockingRead, path, - args.range(), ); self.inner @@ -695,11 +690,10 @@ impl LayeredAccessor for LoggingAccessor { .map(|(rp, r)| { debug!( target: LOGGING_TARGET, - "service={} operation={} path={} range={} -> got reader", + "service={} operation={} path={} -> got reader", self.ctx.scheme, Operation::BlockingRead, path, - args.range(), ); let r = LoggingReader::new(self.ctx.clone(), Operation::BlockingRead, path, r); (rp, r) @@ -709,11 +703,10 @@ impl LayeredAccessor for LoggingAccessor { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} range={} -> {}", + "service={} operation={} path={} -> {}", self.ctx.scheme, Operation::BlockingRead, path, - args.range(), self.ctx.error_print(&err) ); } @@ -958,7 +951,7 @@ pub struct LoggingReader { path: String, op: Operation, - read: u64, + read: AtomicU64, inner: R, } @@ -969,7 +962,7 @@ impl LoggingReader { op, path: path.to_string(), - read: 0, + read: AtomicU64::new(0), inner: reader, } } @@ -983,24 +976,25 @@ impl Drop for LoggingReader { self.ctx.scheme, self.op, self.path, - self.read + self.read.load(Ordering::Relaxed) ); } } impl oio::Read for LoggingReader { - async fn read(&mut self, limit: usize) -> Result { - match self.inner.read(limit).await { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + match self.inner.read_at(offset, limit).await { Ok(bs) => { - self.read += bs.len() as u64; + self.read + .fetch_add(bs.remaining() as u64, Ordering::Relaxed); trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> next returns {}B", + "service={} operation={} path={} read={} -> read returns {}B", self.ctx.scheme, ReadOperation::Read, self.path, - self.read, - bs.len() + self.read.load(Ordering::Relaxed), + bs.remaining() ); Ok(bs) } @@ -1009,42 +1003,11 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> next failed: {}", + "service={} operation={} path={} read={} -> read failed: {}", self.ctx.scheme, ReadOperation::Read, self.path, - self.read, - self.ctx.error_print(&err), - ) - } - Err(err) - } - } - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - match self.inner.seek(pos).await { - Ok(n) => { - trace!( - target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> seek to {pos:?}, current offset {n}", - self.ctx.scheme, - ReadOperation::Seek, - self.path, - self.read, - ); - Ok(n) - } - Err(err) => { - if let Some(lvl) = self.ctx.error_level(&err) { - log!( - target: LOGGING_TARGET, - lvl, - "service={} operation={} path={} read={} -> seek to {pos:?} failed: {}", - self.ctx.scheme, - ReadOperation::Seek, - self.path, - self.read, + self.read.load(Ordering::Relaxed), self.ctx.error_print(&err), ) } @@ -1055,18 +1018,19 @@ impl oio::Read for LoggingReader { } impl oio::BlockingRead for LoggingReader { - fn read(&mut self, limit: usize) -> Result { - match self.inner.read(limit) { + fn read_at(&self, offset: u64, limit: usize) -> Result { + match self.inner.read_at(offset, limit) { Ok(bs) => { - self.read += bs.len() as u64; + self.read + .fetch_add(bs.remaining() as u64, Ordering::Relaxed); trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read {}B", + "service={} operation={} path={} read={} -> read returns {}B", self.ctx.scheme, ReadOperation::BlockingRead, self.path, - self.read, - bs.len() + self.read.load(Ordering::Relaxed), + bs.remaining() ); Ok(bs) } @@ -1075,43 +1039,11 @@ impl oio::BlockingRead for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> read failed: {}", self.ctx.scheme, ReadOperation::BlockingRead, self.path, - self.read, - self.ctx.error_print(&err), - ); - } - Err(err) - } - } - } - - #[inline] - fn seek(&mut self, pos: io::SeekFrom) -> Result { - match self.inner.seek(pos) { - Ok(n) => { - trace!( - target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data seek to offset {n}", - self.ctx.scheme, - ReadOperation::BlockingSeek, - self.path, - self.read, - ); - Ok(n) - } - Err(err) => { - if let Some(lvl) = self.ctx.error_level(&err) { - log!( - target: LOGGING_TARGET, - lvl, - "service={} operation={} path={} read={} -> data read failed: {}", - self.ctx.scheme, - ReadOperation::BlockingSeek, - self.path, - self.read, + self.read.load(Ordering::Relaxed), self.ctx.error_print(&err), ); } diff --git a/core/src/layers/madsim.rs b/core/src/layers/madsim.rs index 4af7c1d84d1e..b48580a10bfa 100644 --- a/core/src/layers/madsim.rs +++ b/core/src/layers/madsim.rs @@ -264,21 +264,14 @@ pub struct MadsimReader { } impl oio::Read for MadsimReader { - async fn read(&mut self, size: usize) -> crate::Result { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { if let Some(ref data) = self.data { - let size = min(size, data.len()); - Ok(data.clone().split_to(size)) + let size = min(limit, data.len()); + Ok(data.clone().split_to(size).into()) } else { - Ok(Bytes::new()) + Ok(oio::Buffer::new()) } } - - async fn seek(&mut self, _: SeekFrom) -> crate::Result { - Err(Error::new( - ErrorKind::Unsupported, - "will be supported in the future", - )) - } } pub struct MadsimWriter { diff --git a/core/src/layers/metrics.rs b/core/src/layers/metrics.rs index a4abc41bdfc5..b1e1c615c817 100644 --- a/core/src/layers/metrics.rs +++ b/core/src/layers/metrics.rs @@ -17,13 +17,11 @@ use std::fmt::Debug; use std::fmt::Formatter; -use std::future::Future; -use std::io; use std::sync::Arc; - use std::time::Instant; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use futures::FutureExt; use futures::TryFutureExt; @@ -456,7 +454,7 @@ impl LayeredAccessor for MetricsAccessor { async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { self.handle.requests_total_read.increment(1); - let start = Instant::now(); + let _start = Instant::now(); self.inner .read(path, args) @@ -470,7 +468,6 @@ impl LayeredAccessor for MetricsAccessor { self.handle.clone(), self.handle.bytes_total_read.clone(), self.handle.requests_duration_seconds_read.clone(), - Some(start), ), ) }) @@ -486,7 +483,7 @@ impl LayeredAccessor for MetricsAccessor { async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { self.handle.requests_total_write.increment(1); - let start = Instant::now(); + let _start = Instant::now(); self.inner .write(path, args) @@ -499,7 +496,6 @@ impl LayeredAccessor for MetricsAccessor { self.handle.clone(), self.handle.bytes_total_write.clone(), self.handle.requests_duration_seconds_write.clone(), - Some(start), ), ) }) @@ -620,7 +616,7 @@ impl LayeredAccessor for MetricsAccessor { fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { self.handle.requests_total_blocking_read.increment(1); - let start = Instant::now(); + let _start = Instant::now(); let result = self.inner.blocking_read(path, args).map(|(rp, r)| { ( rp, @@ -630,7 +626,6 @@ impl LayeredAccessor for MetricsAccessor { self.handle.clone(), self.handle.bytes_total_blocking_read.clone(), self.handle.requests_duration_seconds_blocking_read.clone(), - Some(start), ), ) }); @@ -663,7 +658,6 @@ impl LayeredAccessor for MetricsAccessor { self.handle.clone(), self.handle.bytes_total_write.clone(), self.handle.requests_duration_seconds_write.clone(), - Some(start), ), ) }) @@ -736,9 +730,6 @@ pub struct MetricWrapper { bytes_counter: Counter, requests_duration_seconds: Histogram, handle: Arc, - - start: Option, - bytes: u64, } impl MetricWrapper { @@ -748,7 +739,6 @@ impl MetricWrapper { handle: Arc, bytes_counter: Counter, requests_duration_seconds: Histogram, - start: Option, ) -> Self { Self { inner, @@ -756,39 +746,21 @@ impl MetricWrapper { handle, bytes_counter, requests_duration_seconds, - start, - bytes: 0, - } - } -} - -impl Drop for MetricWrapper { - fn drop(&mut self) { - self.bytes_counter.increment(self.bytes); - if let Some(instant) = self.start { - let dur = instant.elapsed().as_secs_f64(); - self.requests_duration_seconds.record(dur); } } } impl oio::Read for MetricWrapper { - async fn read(&mut self, limit: usize) -> Result { - match self.inner.read(limit).await { - Ok(bytes) => { - self.bytes += bytes.len() as u64; - Ok(bytes) - } - Err(e) => { - self.handle.increment_errors_total(self.op, e.kind()); - Err(e) - } - } - } + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let start = Instant::now(); - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - match self.inner.seek(pos).await { - Ok(n) => Ok(n), + match self.inner.read_at(offset, limit).await { + Ok(bs) => { + self.bytes_counter.increment(bs.remaining() as u64); + self.requests_duration_seconds + .record(start.elapsed().as_secs_f64()); + Ok(bs) + } Err(e) => { self.handle.increment_errors_total(self.op, e.kind()); Err(e) @@ -798,11 +770,15 @@ impl oio::Read for MetricWrapper { } impl oio::BlockingRead for MetricWrapper { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { + let start = Instant::now(); + self.inner - .read(limit) + .read_at(offset, limit) .map(|bs| { - self.bytes += bs.len() as u64; + self.bytes_counter.increment(bs.remaining() as u64); + self.requests_duration_seconds + .record(start.elapsed().as_secs_f64()); bs }) .map_err(|e| { @@ -810,21 +786,19 @@ impl oio::BlockingRead for MetricWrapper { e }) } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos).map_err(|err| { - self.handle.increment_errors_total(self.op, err.kind()); - err - }) - } } impl oio::Write for MetricWrapper { - fn write(&mut self, bs: Bytes) -> impl Future> + Send { + async fn write(&mut self, bs: Bytes) -> Result { + let start = Instant::now(); + self.inner .write(bs) - .map_ok(|n| { - self.bytes += n as u64; + .await + .map(|n| { + self.bytes_counter.increment(n as u64); + self.requests_duration_seconds + .record(start.elapsed().as_secs_f64()); n }) .map_err(|err| { @@ -833,15 +807,15 @@ impl oio::Write for MetricWrapper { }) } - fn abort(&mut self) -> impl Future> + Send { - self.inner.abort().map_err(|err| { + async fn abort(&mut self) -> Result<()> { + self.inner.abort().await.map_err(|err| { self.handle.increment_errors_total(self.op, err.kind()); err }) } - fn close(&mut self) -> impl Future> + Send { - self.inner.close().map_err(|err| { + async fn close(&mut self) -> Result<()> { + self.inner.close().await.map_err(|err| { self.handle.increment_errors_total(self.op, err.kind()); err }) @@ -853,7 +827,7 @@ impl oio::BlockingWrite for MetricWrapper { self.inner .write(bs) .map(|n| { - self.bytes += n as u64; + self.bytes_counter.increment(n as u64); n }) .map_err(|err| { diff --git a/core/src/layers/minitrace.rs b/core/src/layers/minitrace.rs index 8dc720fe57ec..254fd11128a2 100644 --- a/core/src/layers/minitrace.rs +++ b/core/src/layers/minitrace.rs @@ -18,8 +18,6 @@ use std::fmt::Debug; use std::future::Future; -use std::io; - use async_trait::async_trait; use bytes::Bytes; use futures::FutureExt; @@ -298,27 +296,16 @@ impl MinitraceWrapper { impl oio::Read for MinitraceWrapper { #[trace(enter_on_poll = true)] - async fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit).await - } - - #[trace(enter_on_poll = true)] - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos).await + async fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit).await } } impl oio::BlockingRead for MinitraceWrapper { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { let _g = self.span.set_local_parent(); let _span = LocalSpan::enter_with_local_parent(ReadOperation::BlockingRead.into_static()); - self.inner.read(limit) - } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - let _g = self.span.set_local_parent(); - let _span = LocalSpan::enter_with_local_parent(ReadOperation::BlockingSeek.into_static()); - self.inner.seek(pos) + self.inner.read_at(offset, limit) } } diff --git a/core/src/layers/oteltrace.rs b/core/src/layers/oteltrace.rs index 138811bb6fcd..574eb58c004c 100644 --- a/core/src/layers/oteltrace.rs +++ b/core/src/layers/oteltrace.rs @@ -16,7 +16,6 @@ // under the License. use std::future::Future; -use std::io; use async_trait::async_trait; use bytes::Bytes; @@ -277,22 +276,14 @@ impl OtelTraceWrapper { } impl oio::Read for OtelTraceWrapper { - async fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit).await - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos).await + async fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit).await } } impl oio::BlockingRead for OtelTraceWrapper { - fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit) - } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos) + fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit) } } diff --git a/core/src/layers/prometheus.rs b/core/src/layers/prometheus.rs index b04f8ddb6397..224db75c9814 100644 --- a/core/src/layers/prometheus.rs +++ b/core/src/layers/prometheus.rs @@ -17,11 +17,10 @@ use std::fmt::Debug; use std::fmt::Formatter; - -use std::io; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use futures::FutureExt; use futures::TryFutureExt; @@ -685,18 +684,18 @@ impl PrometheusMetricWrapper { } impl oio::Read for PrometheusMetricWrapper { - async fn read(&mut self, limit: usize) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { let labels = self.stats.generate_metric_label( self.scheme.into_static(), Operation::Read.into_static(), &self.path, ); - match self.inner.read(limit).await { + match self.inner.read_at(offset, limit).await { Ok(bytes) => { self.stats .bytes_total .with_label_values(&labels) - .observe(bytes.len() as f64); + .observe(bytes.remaining() as f64); Ok(bytes) } Err(e) => { @@ -705,32 +704,22 @@ impl oio::Read for PrometheusMetricWrapper { } } } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - match self.inner.seek(pos).await { - Ok(n) => Ok(n), - Err(e) => { - self.stats.increment_errors_total(self.op, e.kind()); - Err(e) - } - } - } } impl oio::BlockingRead for PrometheusMetricWrapper { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { let labels = self.stats.generate_metric_label( self.scheme.into_static(), Operation::BlockingRead.into_static(), &self.path, ); self.inner - .read(limit) + .read_at(offset, limit) .map(|bs| { self.stats .bytes_total .with_label_values(&labels) - .observe(bs.len() as f64); + .observe(bs.remaining() as f64); bs }) .map_err(|e| { @@ -738,13 +727,6 @@ impl oio::BlockingRead for PrometheusMetricWrapper { e }) } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos).map_err(|err| { - self.stats.increment_errors_total(self.op, err.kind()); - err - }) - } } impl oio::Write for PrometheusMetricWrapper { diff --git a/core/src/layers/prometheus_client.rs b/core/src/layers/prometheus_client.rs index e29050571c32..967cb8800142 100644 --- a/core/src/layers/prometheus_client.rs +++ b/core/src/layers/prometheus_client.rs @@ -17,14 +17,12 @@ use std::fmt::Debug; use std::fmt::Formatter; -use std::future::Future; -use std::io; use std::sync::Arc; - use std::time::Duration; use std::time::Instant; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use futures::FutureExt; use futures::TryFutureExt; @@ -539,23 +537,17 @@ impl PrometheusMetricWrapper { } impl oio::Read for PrometheusMetricWrapper { - async fn read(&mut self, limit: usize) -> Result { - match self.inner.read(limit).await { - Ok(bytes) => { - self.bytes_total += bytes.len(); - Ok(bytes) - } - Err(e) => { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let start = Instant::now(); + + match self.inner.read_at(offset, limit).await { + Ok(bs) => { self.metrics - .increment_errors_total(self.scheme, self.op, e.kind()); - Err(e) + .observe_bytes_total(self.scheme, self.op, bs.remaining()); + self.metrics + .observe_request_duration(self.scheme, self.op, start.elapsed()); + Ok(bs) } - } - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - match self.inner.seek(pos).await { - Ok(n) => Ok(n), Err(e) => { self.metrics .increment_errors_total(self.scheme, self.op, e.kind()); @@ -566,11 +558,15 @@ impl oio::Read for PrometheusMetricWrapper { } impl oio::BlockingRead for PrometheusMetricWrapper { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { + let start = Instant::now(); self.inner - .read(limit) + .read_at(offset, limit) .map(|bs| { - self.bytes_total += bs.len(); + self.metrics + .observe_bytes_total(self.scheme, self.op, bs.remaining()); + self.metrics + .observe_request_duration(self.scheme, self.op, start.elapsed()); bs }) .map_err(|e| { @@ -579,22 +575,19 @@ impl oio::BlockingRead for PrometheusMetricWrapper { e }) } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos).map_err(|err| { - self.metrics - .increment_errors_total(self.scheme, self.op, err.kind()); - err - }) - } } impl oio::Write for PrometheusMetricWrapper { - fn write(&mut self, bs: Bytes) -> impl Future> + Send { + async fn write(&mut self, bs: Bytes) -> Result { + let start = Instant::now(); + self.inner .write(bs) - .map_ok(|n| { - self.bytes_total += n; + .await + .map(|n| { + self.metrics.observe_bytes_total(self.scheme, self.op, n); + self.metrics + .observe_request_duration(self.scheme, self.op, start.elapsed()); n }) .map_err(|err| { @@ -604,16 +597,16 @@ impl oio::Write for PrometheusMetricWrapper { }) } - fn abort(&mut self) -> impl Future> + Send { - self.inner.abort().map_err(|err| { + async fn abort(&mut self) -> Result<()> { + self.inner.abort().await.map_err(|err| { self.metrics .increment_errors_total(self.scheme, self.op, err.kind()); err }) } - fn close(&mut self) -> impl Future> + Send { - self.inner.close().map_err(|err| { + async fn close(&mut self) -> Result<()> { + self.inner.close().await.map_err(|err| { self.metrics .increment_errors_total(self.scheme, self.op, err.kind()); err diff --git a/core/src/layers/retry.rs b/core/src/layers/retry.rs index d8bfbbb26d92..43ed81f8671a 100644 --- a/core/src/layers/retry.rs +++ b/core/src/layers/retry.rs @@ -17,22 +17,18 @@ use std::fmt::Debug; use std::fmt::Formatter; - -use std::io; - use std::sync::Arc; - use std::time::Duration; use async_trait::async_trait; use backon::BlockingRetryable; - use backon::ExponentialBuilder; use backon::Retryable; use bytes::Bytes; use futures::FutureExt; use log::warn; +use crate::raw::oio::Buffer; use crate::raw::oio::ListOperation; use crate::raw::oio::ReadOperation; use crate::raw::oio::WriteOperation; @@ -665,20 +661,16 @@ impl RetryWrapper { } impl oio::Read for RetryWrapper { - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - use backon::RetryableWithContext; - - let inner = self.inner.take().expect("inner must be valid"); - - let (inner, res) = { - |mut r: R| async move { - let res = r.seek(pos).await; - - (r, res) + async fn read_at(&self, offset: u64, limit: usize) -> Result { + { + || { + self.inner + .as_ref() + .expect("inner must be valid") + .read_at(offset, limit) } } .retry(&self.builder) - .context(inner) .when(|e| e.is_temporary()) .notify(|err, dur| { self.notify.intercept( @@ -690,49 +682,14 @@ impl oio::Read for RetryWrapper { ], ) }) - .map(|(r, res)| (r, res.map_err(|err| err.set_persistent()))) - .await; - - self.inner = Some(inner); - res - } - - async fn read(&mut self, limit: usize) -> Result { - use backon::RetryableWithContext; - - let inner = self.inner.take().expect("inner must be valid"); - - let (inner, res) = { - |mut r: R| async move { - let res = r.read(limit).await; - - (r, res) - } - } - .retry(&self.builder) - .when(|e| e.is_temporary()) - .context(inner) - .notify(|err, dur| { - self.notify.intercept( - err, - dur, - &[ - ("operation", ReadOperation::Read.into_static()), - ("path", &self.path), - ], - ) - }) - .map(|(r, res)| (r, res.map_err(|err| err.set_persistent()))) - .await; - - self.inner = Some(inner); - res + .await + .map_err(|e| e.set_persistent()) } } impl oio::BlockingRead for RetryWrapper { - fn read(&mut self, limit: usize) -> Result { - { || self.inner.as_mut().unwrap().read(limit) } + fn read_at(&self, offset: u64, limit: usize) -> Result { + { || self.inner.as_ref().unwrap().read_at(offset, limit) } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -748,24 +705,6 @@ impl oio::BlockingRead for RetryWrapp .call() .map_err(|e| e.set_persistent()) } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - { || self.inner.as_mut().unwrap().seek(pos) } - .retry(&self.builder) - .when(|e| e.is_temporary()) - .notify(|err, dur| { - self.notify.intercept( - err, - dur, - &[ - ("operation", ReadOperation::BlockingSeek.into_static()), - ("path", &self.path), - ], - ); - }) - .call() - .map_err(|e| e.set_persistent()) - } } impl oio::Write for RetryWrapper { @@ -961,7 +900,6 @@ impl oio::BlockingList for RetryWrapp #[cfg(test)] mod tests { use std::collections::HashMap; - use std::io; use std::sync::Arc; use std::sync::Mutex; @@ -1024,7 +962,6 @@ mod tests { RpRead::new(), MockReader { attempt: self.attempt.clone(), - pos: 0, }, )) } @@ -1095,21 +1032,10 @@ mod tests { #[derive(Debug, Clone, Default)] struct MockReader { attempt: Arc>, - pos: u64, } impl oio::Read for MockReader { - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.pos = match pos { - io::SeekFrom::Current(n) => (self.pos as i64 + n) as u64, - io::SeekFrom::Start(n) => n, - io::SeekFrom::End(n) => (13 + n) as u64, - }; - - Ok(self.pos) - } - - async fn read(&mut self, _: usize) -> Result { + async fn read_at(&self, _: u64, _: usize) -> Result { let mut attempt = self.attempt.lock().unwrap(); *attempt += 1; @@ -1118,19 +1044,11 @@ mod tests { Error::new(ErrorKind::Unexpected, "retryable_error from reader") .set_temporary(), ), - 2 => { - self.pos += 7; - Ok(Bytes::copy_from_slice("Hello, ".as_bytes())) - } - 3 => Err( + 2 => Err( Error::new(ErrorKind::Unexpected, "retryable_error from reader") .set_temporary(), ), - 4 => { - self.pos += 6; - Ok(Bytes::copy_from_slice("World!".as_bytes())) - } - 5 => Ok(Bytes::new()), + 3 => Ok(Bytes::copy_from_slice("Hello, World!".as_bytes()).into()), _ => unreachable!(), } } @@ -1188,7 +1106,7 @@ mod tests { .layer(RetryLayer::new()) .finish(); - let mut r = op.reader("retryable_error").await.unwrap(); + let r = op.reader("retryable_error").await.unwrap(); let mut content = Vec::new(); let size = r .read_to_end(&mut content) @@ -1196,8 +1114,8 @@ mod tests { .expect("read must succeed"); assert_eq!(size, 13); assert_eq!(content, "Hello, World!".as_bytes()); - // The error is retryable, we should request it 1 + 10 times. - assert_eq!(*builder.attempt.lock().unwrap(), 5); + // The error is retryable, we should request it 3 times. + assert_eq!(*builder.attempt.lock().unwrap(), 3); } #[tokio::test] diff --git a/core/src/layers/throttle.rs b/core/src/layers/throttle.rs index 11f7e9913fbd..a21b2d94f1c2 100644 --- a/core/src/layers/throttle.rs +++ b/core/src/layers/throttle.rs @@ -15,10 +15,8 @@ // specific language governing permissions and limitations // under the License. -use std::io::SeekFrom; use std::num::NonZeroU32; use std::sync::Arc; - use std::thread; use async_trait::async_trait; @@ -185,24 +183,16 @@ impl ThrottleWrapper { } impl oio::Read for ThrottleWrapper { - async fn read(&mut self, limit: usize) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { // TODO: How can we handle buffer reads with a limiter? - self.inner.read(limit).await - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).await + self.inner.read_at(offset, limit).await } } impl oio::BlockingRead for ThrottleWrapper { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { // TODO: How can we handle buffer reads with a limiter? - self.inner.read(limit) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos) + self.inner.read_at(offset, limit) } } diff --git a/core/src/layers/timeout.rs b/core/src/layers/timeout.rs index 1679361564d3..fdd7faf8af51 100644 --- a/core/src/layers/timeout.rs +++ b/core/src/layers/timeout.rs @@ -16,8 +16,6 @@ // under the License. use std::future::Future; -use std::io::SeekFrom; - use std::time::Duration; use async_trait::async_trait; @@ -294,15 +292,10 @@ impl TimeoutWrapper { } impl oio::Read for TimeoutWrapper { - async fn read(&mut self, limit: usize) -> Result { - let fut = self.inner.read(limit); + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let fut = self.inner.read_at(offset, limit); Self::io_timeout(self.timeout, ReadOperation::Read.into_static(), fut).await } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - let fut = self.inner.seek(pos); - Self::io_timeout(self.timeout, ReadOperation::Seek.into_static(), fut).await - } } impl oio::Write for TimeoutWrapper { @@ -331,14 +324,12 @@ impl oio::List for TimeoutWrapper { #[cfg(test)] mod tests { - use std::future::{pending, Future}; - use std::io::SeekFrom; + use std::future::pending; + use std::future::Future; use std::sync::Arc; - use std::time::Duration; use async_trait::async_trait; - use bytes::Bytes; use futures::StreamExt; use tokio::time::sleep; use tokio::time::timeout; @@ -393,11 +384,7 @@ mod tests { struct MockReader; impl oio::Read for MockReader { - fn seek(&mut self, _: SeekFrom) -> impl Future> { - pending() - } - - fn read(&mut self, _: usize) -> impl Future> { + fn read_at(&self, _: u64, _: usize) -> impl Future> { pending() } } @@ -436,9 +423,9 @@ mod tests { let op = Operator::from_inner(acc) .layer(TimeoutLayer::new().with_io_timeout(Duration::from_secs(1))); - let mut reader = op.reader("test").await.unwrap(); + let reader = op.reader("test").await.unwrap(); - let res = reader.read(4).await; + let res = reader.read(&mut Vec::default(), 0, 4).await; assert!(res.is_err()); let err = res.unwrap_err(); assert_eq!(err.kind(), ErrorKind::Unexpected); diff --git a/core/src/layers/tracing.rs b/core/src/layers/tracing.rs index dcc0a79f4c91..bbdc6b204b40 100644 --- a/core/src/layers/tracing.rs +++ b/core/src/layers/tracing.rs @@ -18,8 +18,6 @@ use std::fmt::Debug; use std::future::Future; -use std::io; - use async_trait::async_trait; use bytes::Bytes; use futures::FutureExt; @@ -272,16 +270,8 @@ impl oio::Read for TracingWrapper { parent = &self.span, level = "trace", skip_all)] - async fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit).await - } - - #[tracing::instrument( - parent = &self.span, - level = "trace", - skip_all)] - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos).await + async fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit).await } } @@ -290,16 +280,8 @@ impl oio::BlockingRead for TracingWrapper { parent = &self.span, level = "trace", skip_all)] - fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit) - } - - #[tracing::instrument( - parent = &self.span, - level = "trace", - skip_all)] - fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.inner.seek(pos) + fn read_at(&self, offset: u64, limit: usize) -> Result { + self.inner.read_at(offset, limit) } } diff --git a/core/src/raw/adapters/kv/backend.rs b/core/src/raw/adapters/kv/backend.rs index 08447eeb1e99..72ebf9ff68f2 100644 --- a/core/src/raw/adapters/kv/backend.rs +++ b/core/src/raw/adapters/kv/backend.rs @@ -16,7 +16,6 @@ // under the License. use std::sync::Arc; - use std::vec::IntoIter; use async_trait::async_trait; @@ -63,8 +62,8 @@ where #[cfg_attr(not(target_arch = "wasm32"), async_trait)] #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl Accessor for Backend { - type Reader = oio::Cursor; - type BlockingReader = oio::Cursor; + type Reader = Bytes; + type BlockingReader = Bytes; type Writer = KvWriter; type BlockingWriter = KvWriter; type Lister = HierarchyLister; @@ -76,9 +75,6 @@ impl Accessor for Backend { let mut cap = am.native_capability(); if cap.read { - cap.read_can_seek = true; - cap.read_can_next = true; - cap.read_with_range = true; cap.stat = true; } @@ -96,7 +92,7 @@ impl Accessor for Backend { am } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = build_abs_path(&self.root, path); let bs = match self.kv.get(&p).await? { @@ -104,12 +100,12 @@ impl Accessor for Backend { None => return Err(Error::new(ErrorKind::NotFound, "kv doesn't have this path")), }; - let bs = self.apply_range(bs, args.range()); + let bs = Bytes::from(bs); - Ok((RpRead::new(), oio::Cursor::from(bs))) + Ok((RpRead::new(), bs)) } - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { + fn blocking_read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::BlockingReader)> { let p = build_abs_path(&self.root, path); let bs = match self.kv.blocking_get(&p)? { @@ -117,8 +113,8 @@ impl Accessor for Backend { None => return Err(Error::new(ErrorKind::NotFound, "kv doesn't have this path")), }; - let bs = self.apply_range(bs, args.range()); - Ok((RpRead::new(), oio::Cursor::from(bs))) + let bs = Bytes::from(bs); + Ok((RpRead::new(), bs)) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -198,26 +194,6 @@ impl Accessor for Backend { } } -impl Backend -where - S: Adapter, -{ - fn apply_range(&self, mut bs: Vec, br: BytesRange) -> Vec { - match (br.offset(), br.size()) { - (Some(offset), Some(size)) => { - let mut bs = bs.split_off(offset as usize); - if (size as usize) < bs.len() { - let _ = bs.split_off(size as usize); - } - bs - } - (Some(offset), None) => bs.split_off(offset as usize), - (None, Some(size)) => bs.split_off(bs.len() - size as usize), - (None, None) => bs, - } - } -} - pub struct KvLister { root: String, inner: IntoIter, diff --git a/core/src/raw/adapters/typed_kv/backend.rs b/core/src/raw/adapters/typed_kv/backend.rs index c33acfecdc84..a6910ead2c01 100644 --- a/core/src/raw/adapters/typed_kv/backend.rs +++ b/core/src/raw/adapters/typed_kv/backend.rs @@ -16,7 +16,6 @@ // under the License. use std::sync::Arc; - use std::vec::IntoIter; use async_trait::async_trait; @@ -57,8 +56,8 @@ where #[cfg_attr(not(target_arch = "wasm32"), async_trait)] #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl Accessor for Backend { - type Reader = oio::Cursor; - type BlockingReader = oio::Cursor; + type Reader = Bytes; + type BlockingReader = Bytes; type Writer = KvWriter; type BlockingWriter = KvWriter; type Lister = HierarchyLister; @@ -75,9 +74,6 @@ impl Accessor for Backend { let mut cap = Capability::default(); if kv_cap.get { cap.read = true; - cap.read_can_seek = true; - cap.read_can_next = true; - cap.read_with_range = true; cap.stat = true; } @@ -102,7 +98,7 @@ impl Accessor for Backend { am } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = build_abs_path(&self.root, path); let bs = match self.kv.get(&p).await? { @@ -111,12 +107,10 @@ impl Accessor for Backend { None => return Err(Error::new(ErrorKind::NotFound, "kv doesn't have this path")), }; - let bs = self.apply_range(bs, args.range()); - - Ok((RpRead::new(), oio::Cursor::from(bs))) + Ok((RpRead::new(), bs)) } - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { + fn blocking_read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::BlockingReader)> { let p = build_abs_path(&self.root, path); let bs = match self.kv.blocking_get(&p)? { @@ -125,8 +119,7 @@ impl Accessor for Backend { None => return Err(Error::new(ErrorKind::NotFound, "kv doesn't have this path")), }; - let bs = self.apply_range(bs, args.range()); - Ok((RpRead::new(), oio::Cursor::from(bs))) + Ok((RpRead::new(), bs)) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -202,26 +195,6 @@ impl Accessor for Backend { } } -impl Backend -where - S: Adapter, -{ - fn apply_range(&self, mut bs: Bytes, br: BytesRange) -> Bytes { - match (br.offset(), br.size()) { - (Some(offset), Some(size)) => { - let mut bs = bs.split_off(offset as usize); - if (size as usize) < bs.len() { - let _ = bs.split_off(size as usize); - } - bs - } - (Some(offset), None) => bs.split_off(offset as usize), - (None, Some(size)) => bs.split_off(bs.len() - size as usize), - (None, None) => bs, - } - } -} - pub struct KvLister { root: String, inner: IntoIter, diff --git a/core/src/raw/enum_utils.rs b/core/src/raw/enum_utils.rs index be39f0df5bbf..475cbe98f2e4 100644 --- a/core/src/raw/enum_utils.rs +++ b/core/src/raw/enum_utils.rs @@ -38,10 +38,9 @@ //! This module is used to provide some enums for the above code. We should remove this module once //! type_alias_impl_trait has been stabilized. -use std::io::SeekFrom; - use bytes::Bytes; +use crate::raw::oio::Buffer; use crate::raw::*; use crate::*; @@ -56,33 +55,19 @@ pub enum TwoWays { } impl oio::Read for TwoWays { - async fn seek(&mut self, pos: SeekFrom) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { match self { - Self::One(v) => v.seek(pos).await, - Self::Two(v) => v.seek(pos).await, - } - } - - async fn read(&mut self, limit: usize) -> Result { - match self { - Self::One(v) => v.read(limit).await, - Self::Two(v) => v.read(limit).await, + TwoWays::One(v) => v.read_at(offset, limit).await, + TwoWays::Two(v) => v.read_at(offset, limit).await, } } } impl oio::BlockingRead for TwoWays { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { match self { - Self::One(v) => v.read(limit), - Self::Two(v) => v.read(limit), - } - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - match self { - Self::One(v) => v.seek(pos), - Self::Two(v) => v.seek(pos), + Self::One(v) => v.read_at(offset, limit), + Self::Two(v) => v.read_at(offset, limit), } } } @@ -123,19 +108,11 @@ pub enum ThreeWays { } impl oio::Read for ThreeWays { - async fn seek(&mut self, pos: SeekFrom) -> Result { - match self { - Self::One(v) => v.seek(pos).await, - Self::Two(v) => v.seek(pos).await, - Self::Three(v) => v.seek(pos).await, - } - } - - async fn read(&mut self, limit: usize) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { match self { - Self::One(v) => v.read(limit).await, - Self::Two(v) => v.read(limit).await, - Self::Three(v) => v.read(limit).await, + ThreeWays::One(v) => v.read_at(offset, limit).await, + ThreeWays::Two(v) => v.read_at(offset, limit).await, + ThreeWays::Three(v) => v.read_at(offset, limit).await, } } } @@ -143,19 +120,11 @@ impl oio::Read for ThreeWays oio::BlockingRead for ThreeWays { - fn read(&mut self, limit: usize) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { match self { - Self::One(v) => v.read(limit), - Self::Two(v) => v.read(limit), - Self::Three(v) => v.read(limit), - } - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - match self { - Self::One(v) => v.seek(pos), - Self::Two(v) => v.seek(pos), - Self::Three(v) => v.seek(pos), + Self::One(v) => v.read_at(offset, limit), + Self::Two(v) => v.read_at(offset, limit), + Self::Three(v) => v.read_at(offset, limit), } } } @@ -209,21 +178,12 @@ where THREE: oio::Read, FOUR: oio::Read, { - async fn seek(&mut self, pos: SeekFrom) -> Result { - match self { - Self::One(v) => v.seek(pos).await, - Self::Two(v) => v.seek(pos).await, - Self::Three(v) => v.seek(pos).await, - Self::Four(v) => v.seek(pos).await, - } - } - - async fn read(&mut self, limit: usize) -> Result { + async fn read_at(&self, offset: u64, limit: usize) -> Result { match self { - Self::One(v) => v.read(limit).await, - Self::Two(v) => v.read(limit).await, - Self::Three(v) => v.read(limit).await, - Self::Four(v) => v.read(limit).await, + FourWays::One(v) => v.read_at(offset, limit).await, + FourWays::Two(v) => v.read_at(offset, limit).await, + FourWays::Three(v) => v.read_at(offset, limit).await, + FourWays::Four(v) => v.read_at(offset, limit).await, } } } @@ -235,21 +195,12 @@ where THREE: oio::BlockingRead, FOUR: oio::BlockingRead, { - fn read(&mut self, limit: usize) -> Result { - match self { - Self::One(v) => v.read(limit), - Self::Two(v) => v.read(limit), - Self::Three(v) => v.read(limit), - Self::Four(v) => v.read(limit), - } - } - - fn seek(&mut self, pos: SeekFrom) -> Result { + fn read_at(&self, offset: u64, limit: usize) -> Result { match self { - Self::One(v) => v.seek(pos), - Self::Two(v) => v.seek(pos), - Self::Three(v) => v.seek(pos), - Self::Four(v) => v.seek(pos), + Self::One(v) => v.read_at(offset, limit), + Self::Two(v) => v.read_at(offset, limit), + Self::Three(v) => v.read_at(offset, limit), + Self::Four(v) => v.read_at(offset, limit), } } } diff --git a/core/src/raw/http_util/body.rs b/core/src/raw/http_util/body.rs index e32dcb46d449..917fc53b1f77 100644 --- a/core/src/raw/http_util/body.rs +++ b/core/src/raw/http_util/body.rs @@ -15,17 +15,9 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; -use std::cmp::Ordering; -use std::io; - -use bytes::Buf; -use bytes::BufMut; use bytes::Bytes; -use futures::StreamExt; use crate::raw::*; -use crate::*; /// Body used in async HTTP requests. #[derive(Default)] @@ -36,160 +28,7 @@ pub enum AsyncBody { /// Body with bytes. Bytes(Bytes), /// Body with stream. - Stream(oio::Streamer), -} - -/// IncomingAsyncBody carries the content returned by remote servers. -/// -/// # Notes -/// -/// Client SHOULD NEVER construct this body. -pub struct IncomingAsyncBody { - /// # TODO - /// - /// hyper returns `impl Stream>` but we can't - /// write the types in stable. So we will box here. - /// - /// After [TAIT](https://rust-lang.github.io/rfcs/2515-type_alias_impl_trait.html) - /// has been stable, we can change `IncomingAsyncBody` into `IncomingAsyncBody`. - inner: oio::Streamer, - size: Option, - consumed: u64, - chunk: Bytes, -} - -impl IncomingAsyncBody { - /// Construct a new incoming async body - pub fn new(s: oio::Streamer, size: Option) -> Self { - Self { - inner: s, - size, - consumed: 0, - chunk: Bytes::new(), - } - } - - /// Create an empty IncomingAsyncBody. - #[allow(dead_code)] - pub(crate) fn empty() -> Self { - Self { - inner: Box::new(()), - size: Some(0), - consumed: 0, - chunk: Bytes::new(), - } - } - - /// Consume the entire body. - pub async fn consume(mut self) -> Result<()> { - use oio::Read; - - loop { - let buf = self.read(4 * 1024 * 1024).await.map_err(|err| { - Error::new(ErrorKind::Unexpected, "fetch bytes from stream") - .with_operation("http_util::IncomingAsyncBody::consume") - .set_source(err) - })?; - if buf.is_empty() { - break; - } - } - - Ok(()) - } - - /// Consume the response to bytes. /// - /// This code is inspired from hyper's [`to_bytes`](https://docs.rs/hyper/0.14.23/hyper/body/fn.to_bytes.html). - pub async fn bytes(mut self) -> Result { - use oio::Read; - - // If there's only 1 chunk, we can just return Buf::to_bytes() - let first = self.read(4 * 1024 * 1024).await?; - if first.is_empty() { - return Ok(first); - } - - let second = self.read(4 * 1024 * 1024).await?; - if second.is_empty() { - return Ok(first); - } - - // With more than 1 buf, we gotta flatten into a Vec first. - let cap = if let Some(size) = self.size { - // The convert from u64 to usize could fail, but it's unlikely. - // Let's just make it overflow. - size as usize - } else { - // It's highly possible that we have more data to read. - // Add extra 16K buffer to avoid another allocation. - first.remaining() + second.remaining() + 16 * 1024 - }; - let mut vec = Vec::with_capacity(cap); - vec.put(first); - vec.put(second); - - // TODO: we can tune the io size here. - loop { - let buf = self.read(4 * 1024 * 1024).await?; - if buf.is_empty() { - break; - } - vec.put(buf); - } - - Ok(vec.into()) - } - - #[inline] - fn check(expect: u64, actual: u64) -> Result<()> { - match actual.cmp(&expect) { - Ordering::Equal => Ok(()), - Ordering::Less => Err(Error::new( - ErrorKind::ContentIncomplete, - &format!("reader got too little data, expect: {expect}, actual: {actual}"), - ) - .set_temporary()), - Ordering::Greater => Err(Error::new( - ErrorKind::ContentTruncated, - &format!("reader got too much data, expect: {expect}, actual: {actual}"), - ) - .set_temporary()), - } - } -} - -impl oio::Read for IncomingAsyncBody { - async fn read(&mut self, limit: usize) -> Result { - if self.size == Some(0) { - return Ok(Bytes::new()); - } - - if self.chunk.is_empty() { - self.chunk = match self.inner.next().await.transpose()? { - Some(bs) => bs, - None => { - if let Some(size) = self.size { - Self::check(size, self.consumed)? - } - - return Ok(Bytes::new()); - } - }; - } - - let size = min(limit, self.chunk.len()); - self.consumed += size as u64; - let bs = self.chunk.split_to(size); - Ok(bs) - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) - } + /// TODO: remove this variant once by adopting oio::Buffer in writing. + Stream(oio::Streamer), } diff --git a/core/src/raw/http_util/bytes_range.rs b/core/src/raw/http_util/bytes_range.rs index 1cf3466b4823..b2bde391311a 100644 --- a/core/src/raw/http_util/bytes_range.rs +++ b/core/src/raw/http_util/bytes_range.rs @@ -22,8 +22,6 @@ use std::ops::Bound; use std::ops::RangeBounds; use std::str::FromStr; -use bytes::Bytes; - use crate::Error; use crate::ErrorKind; use crate::Result; @@ -37,22 +35,15 @@ use crate::Result; /// ```text /// Range: bytes=- /// Range: bytes=- -/// Range: bytes=- /// ``` /// /// # Notes /// -/// BytesRange support construct via rust native range syntax like `..`, `1024..`, `..2048`. -/// But it's has different semantic on `RangeTo`: `..`. -/// In rust, `..` means all items that `< end`, but in BytesRange, `..` means the -/// tailing part of content, a.k.a, the last `` bytes of content. -/// -/// - `0..1024` will be converted to header `range: bytes=0-1024` -/// - `..1024` will be converted to header `range: bytes=-1024` +/// We don't support tailing read like `Range: bytes=-` #[derive(Default, Debug, Clone, Copy, Eq, PartialEq)] pub struct BytesRange( /// Offset of the range. - Option, + u64, /// Size of the range. Option, ); @@ -68,12 +59,12 @@ impl BytesRange { /// /// - offset=None => `bytes=-`, read `` bytes from end. /// - offset=Some(0) => `bytes=0-`, read `` bytes from start. - pub fn new(offset: Option, size: Option) -> Self { + pub fn new(offset: u64, size: Option) -> Self { BytesRange(offset, size) } /// Get offset of BytesRange. - pub fn offset(&self) -> Option { + pub fn offset(&self) -> u64 { self.0 } @@ -86,15 +77,10 @@ impl BytesRange { /// /// If this range is full, we don't need to specify it in http request. pub fn is_full(&self) -> bool { - self.0.unwrap_or_default() == 0 && self.1.is_none() + self.0 == 0 && self.1.is_none() } /// Convert bytes range into Range header. - /// - /// # NOTE - /// - /// - `bytes=-1023` means get the suffix of the file. - /// - `bytes=0-1023` means get the first 1024 bytes, we must set the end to 1023. pub fn to_header(&self) -> String { format!("bytes={self}") } @@ -102,56 +88,20 @@ impl BytesRange { /// Convert bytes range into rust range. pub fn to_range(&self) -> impl RangeBounds { ( - match self.0 { - Some(offset) => Bound::Included(offset), - None => Bound::Unbounded, - }, + Bound::Included(self.0), match self.1 { - Some(size) => Bound::Excluded(self.0.unwrap_or_default() + size), + Some(size) => Bound::Excluded(self.0 + size), None => Bound::Unbounded, }, ) } - - /// Complete range with total size. - pub fn complete(&self, total_size: u64) -> Self { - match (self.offset(), self.size()) { - (Some(_), Some(_)) => *self, - (Some(offset), None) => Self(Some(offset), Some(total_size - offset)), - (None, Some(size)) => Self(Some(total_size - size), Some(size)), - (None, None) => Self(Some(0), Some(total_size)), - } - } - - /// apply_on_bytes will apply range on bytes. - pub fn apply_on_bytes(&self, mut bs: Bytes) -> Bytes { - match (self.0, self.1) { - (None, None) => bs, - (None, Some(size)) => { - if size as usize >= bs.len() { - return bs; - } - bs.split_off(bs.len() - size as usize) - } - (Some(offset), None) => bs.split_off(offset as usize), - (Some(offset), Some(size)) => { - let mut bs = bs.split_off(offset as usize); - if (size as usize) < bs.len() { - let _ = bs.split_off(size as usize); - } - bs - } - } - } } impl Display for BytesRange { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match (self.0, self.1) { - (Some(offset), None) => write!(f, "{offset}-"), - (None, Some(size)) => write!(f, "-{size}"), - (Some(offset), Some(size)) => write!(f, "{}-{}", offset, offset + size - 1), - (None, None) => write!(f, "0-"), + match self.1 { + None => write!(f, "{}-", self.0), + Some(size) => write!(f, "{}-{}", self.0, self.0 + size - 1), } } } @@ -189,20 +139,22 @@ impl FromStr for BytesRange { if v[1].is_empty() { // - Ok(BytesRange::new( - Some(v[0].parse().map_err(parse_int_error)?), + v[0].parse().map_err(parse_int_error)?, None, )) } else if v[0].is_empty() { // - - Ok(BytesRange::new( - None, - Some(v[1].parse::().map_err(parse_int_error)? + 1), - )) + Err(Error::new( + ErrorKind::Unexpected, + "header range with tailing is not supported", + ) + .with_operation("BytesRange::from_str") + .with_context("value", value)) } else { // - let start: u64 = v[0].parse().map_err(parse_int_error)?; let end: u64 = v[1].parse().map_err(parse_int_error)?; - Ok(BytesRange::new(Some(start), Some(end - start + 1))) + Ok(BytesRange::new(start, Some(end - start + 1))) } } } @@ -213,13 +165,13 @@ where { fn from(range: T) -> Self { let offset = match range.start_bound().cloned() { - Bound::Included(n) => Some(n), - Bound::Excluded(n) => Some(n + 1), - Bound::Unbounded => None, + Bound::Included(n) => n, + Bound::Excluded(n) => n + 1, + Bound::Unbounded => 0, }; let size = match range.end_bound().cloned() { - Bound::Included(n) => Some(n + 1 - offset.unwrap_or_default()), - Bound::Excluded(n) => Some(n - offset.unwrap_or_default()), + Bound::Included(n) => Some(n + 1 - offset), + Bound::Excluded(n) => Some(n - offset), Bound::Unbounded => None, }; @@ -233,69 +185,48 @@ mod tests { #[test] fn test_bytes_range_to_string() { - let h = BytesRange::new(None, Some(1024)); - assert_eq!(h.to_string(), "-1024"); - - let h = BytesRange::new(Some(0), Some(1024)); + let h = BytesRange::new(0, Some(1024)); assert_eq!(h.to_string(), "0-1023"); - let h = BytesRange::new(Some(1024), None); + let h = BytesRange::new(1024, None); assert_eq!(h.to_string(), "1024-"); - let h = BytesRange::new(Some(1024), Some(1024)); + let h = BytesRange::new(1024, Some(1024)); assert_eq!(h.to_string(), "1024-2047"); } #[test] fn test_bytes_range_to_header() { - let h = BytesRange::new(None, Some(1024)); - assert_eq!(h.to_header(), "bytes=-1024"); - - let h = BytesRange::new(Some(0), Some(1024)); + let h = BytesRange::new(0, Some(1024)); assert_eq!(h.to_header(), "bytes=0-1023"); - let h = BytesRange::new(Some(1024), None); + let h = BytesRange::new(1024, None); assert_eq!(h.to_header(), "bytes=1024-"); - let h = BytesRange::new(Some(1024), Some(1024)); + let h = BytesRange::new(1024, Some(1024)); assert_eq!(h.to_header(), "bytes=1024-2047"); } #[test] fn test_bytes_range_from_range_bounds() { - assert_eq!(BytesRange::new(None, None), BytesRange::from(..)); - assert_eq!(BytesRange::new(Some(10), None), BytesRange::from(10..)); - assert_eq!(BytesRange::new(None, Some(11)), BytesRange::from(..=10)); - assert_eq!(BytesRange::new(None, Some(10)), BytesRange::from(..10)); - assert_eq!( - BytesRange::new(Some(10), Some(10)), - BytesRange::from(10..20) - ); - assert_eq!( - BytesRange::new(Some(10), Some(11)), - BytesRange::from(10..=20) - ); + assert_eq!(BytesRange::new(0, None), BytesRange::from(..)); + assert_eq!(BytesRange::new(10, None), BytesRange::from(10..)); + assert_eq!(BytesRange::new(0, Some(11)), BytesRange::from(..=10)); + assert_eq!(BytesRange::new(0, Some(10)), BytesRange::from(..10)); + assert_eq!(BytesRange::new(10, Some(10)), BytesRange::from(10..20)); + assert_eq!(BytesRange::new(10, Some(11)), BytesRange::from(10..=20)); } #[test] fn test_bytes_range_from_str() -> Result<()> { let cases = vec![ - ( - "range-start", - "bytes=123-", - BytesRange::new(Some(123), None), - ), - ("suffix", "bytes=-123", BytesRange::new(None, Some(124))), - ( - "range", - "bytes=123-124", - BytesRange::new(Some(123), Some(2)), - ), - ("one byte", "bytes=0-0", BytesRange::new(Some(0), Some(1))), + ("range-start", "bytes=123-", BytesRange::new(123, None)), + ("range", "bytes=123-124", BytesRange::new(123, Some(2))), + ("one byte", "bytes=0-0", BytesRange::new(0, Some(1))), ( "lower case header", "bytes=0-0", - BytesRange::new(Some(0), Some(1)), + BytesRange::new(0, Some(1)), ), ]; @@ -307,32 +238,4 @@ mod tests { Ok(()) } - - #[test] - fn test_apply_on_bytes() -> Result<()> { - let bs = Bytes::from_static("Hello, World!".as_bytes()); - - let cases = vec![ - ("full", (None, None), "Hello, World!"), - ("with_offset", (Some(1), None), "ello, World!"), - ("with_size", (None, Some(1)), "!"), - ("with_larger_size", (None, Some(100)), "Hello, World!"), - ("with_offset_and_size", (Some(1), Some(1)), "e"), - ( - "with_offset_and_larger_size", - (Some(1), Some(100)), - "ello, World!", - ), - ("with_empty_offset", (Some(0), Some(100)), "Hello, World!"), - ]; - - for (name, input, expected) in cases { - let actual = BytesRange(input.0, input.1).apply_on_bytes(bs.clone()); - let actual = String::from_utf8_lossy(&actual); - - assert_eq!(expected, &actual, "{name}"); - } - - Ok(()) - } } diff --git a/core/src/raw/http_util/client.rs b/core/src/raw/http_util/client.rs index 3f2f50d67571..6c78130c6cbc 100644 --- a/core/src/raw/http_util/client.rs +++ b/core/src/raw/http_util/client.rs @@ -15,16 +15,18 @@ // specific language governing permissions and limitations // under the License. +use std::cmp::Ordering; use std::fmt::Debug; use std::fmt::Formatter; -use std::mem; use std::str::FromStr; +use std::{future, mem}; +use bytes::Buf; +use bytes::Bytes; use futures::TryStreamExt; use http::Request; use http::Response; -use super::body::IncomingAsyncBody; use super::parse_content_encoding; use super::parse_content_length; use super::AsyncBody; @@ -78,7 +80,7 @@ impl HttpClient { } /// Send a request in async way. - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { // Uri stores all string alike data in `Bytes` which means // the clone here is cheap. let uri = req.uri().clone(); @@ -166,19 +168,42 @@ impl HttpClient { // Swap headers directly instead of copy the entire map. mem::swap(hr.headers_mut().unwrap(), resp.headers_mut()); - let stream = resp.bytes_stream().map_err(move |err| { - // If stream returns a body related error, we can convert - // it to interrupt so we can retry it. - Error::new(ErrorKind::Unexpected, "read data from http stream") - .map(|v| if err.is_body() { v.set_temporary() } else { v }) - .with_context("url", uri.to_string()) - .set_source(err) - }); - - let body = IncomingAsyncBody::new(Box::new(oio::into_stream(stream)), content_length); + let bs: Vec = resp + .bytes_stream() + .try_filter(|v| future::ready(!v.is_empty())) + .try_collect() + .await + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "read data from http response") + .with_context("url", uri.to_string()) + .set_source(err) + })?; + + let buffer = oio::Buffer::from(bs); + + if let Some(expect) = content_length { + check(expect, buffer.remaining() as u64)?; + } - let resp = hr.body(body).expect("response must build succeed"); + let resp = hr.body(buffer).expect("response must build succeed"); Ok(resp) } } + +#[inline] +fn check(expect: u64, actual: u64) -> Result<()> { + match actual.cmp(&expect) { + Ordering::Equal => Ok(()), + Ordering::Less => Err(Error::new( + ErrorKind::ContentIncomplete, + &format!("reader got too little data, expect: {expect}, actual: {actual}"), + ) + .set_temporary()), + Ordering::Greater => Err(Error::new( + ErrorKind::ContentTruncated, + &format!("reader got too much data, expect: {expect}, actual: {actual}"), + ) + .set_temporary()), + } +} diff --git a/core/src/raw/http_util/mod.rs b/core/src/raw/http_util/mod.rs index 5a5f375e8dba..70ddcc16d1ef 100644 --- a/core/src/raw/http_util/mod.rs +++ b/core/src/raw/http_util/mod.rs @@ -27,7 +27,6 @@ pub use client::HttpClient; mod body; pub use body::AsyncBody; -pub use body::IncomingAsyncBody; mod header; pub use header::build_header_value; diff --git a/core/src/raw/http_util/multipart.rs b/core/src/raw/http_util/multipart.rs index 4d40b01a6128..aefaedf9b3ca 100644 --- a/core/src/raw/http_util/multipart.rs +++ b/core/src/raw/http_util/multipart.rs @@ -22,9 +22,8 @@ use std::task::ready; use std::task::Context; use std::task::Poll; -use bytes::Bytes; use bytes::BytesMut; -use futures::stream; +use bytes::{Buf, Bytes}; use http::header::CONTENT_DISPOSITION; use http::header::CONTENT_LENGTH; use http::header::CONTENT_TYPE; @@ -41,10 +40,8 @@ use http::Version; use super::new_request_build_error; use super::AsyncBody; -use super::IncomingAsyncBody; use crate::raw::oio; use crate::raw::oio::Stream; -use crate::raw::oio::Streamer; use crate::*; /// Multipart is a builder for multipart/form-data. @@ -225,8 +222,7 @@ pub trait Part: Sized + 'static { pub struct FormDataPart { headers: HeaderMap, - content_length: u64, - content: Streamer, + content: Bytes, } impl FormDataPart { @@ -245,8 +241,7 @@ impl FormDataPart { Self { headers, - content_length: 0, - content: Box::new(oio::Cursor::new()), + content: Bytes::new(), } } @@ -258,17 +253,7 @@ impl FormDataPart { /// Set the content for this part. pub fn content(mut self, content: impl Into) -> Self { - let content = content.into(); - - self.content_length = content.len() as u64; - self.content = Box::new(oio::Cursor::from(content)); - self - } - - /// Set the stream content for this part. - pub fn stream(mut self, size: u64, content: Streamer) -> Self { - self.content_length = size; - self.content = content; + self.content = content.into(); self } } @@ -303,7 +288,7 @@ impl Part for FormDataPart { let bs = bs.freeze(); // pre-content + content + post-content (b`\r\n`) - let total_size = bs.len() as u64 + self.content_length + 2; + let total_size = bs.len() as u64 + self.content.len() as u64 + 2; ( total_size, @@ -325,22 +310,21 @@ impl Part for FormDataPart { pub struct FormDataPartStream { /// Including headers and the first `b\r\n` pre_content: Option, - content: Option, + content: Option, } impl Stream for FormDataPartStream { - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + fn poll_next(&mut self, _: &mut Context<'_>) -> Poll>> { if let Some(pre_content) = self.pre_content.take() { return Poll::Ready(Some(Ok(pre_content))); } - if let Some(stream) = self.content.as_mut() { - return match ready!(stream.poll_next(cx)) { - None => { - self.content = None; - Poll::Ready(Some(Ok(Bytes::from_static(b"\r\n")))) - } - Some(v) => Poll::Ready(Some(v)), + if let Some(bs) = self.content.as_mut() { + if bs.has_remaining() { + return Poll::Ready(Some(Ok(bs.copy_to_bytes(bs.remaining())))); + } else { + self.content = None; + return Poll::Ready(Some(Ok(Bytes::from_static(b"\r\n")))); }; } @@ -355,8 +339,7 @@ pub struct MixedPart { /// Common version: Version, headers: HeaderMap, - content_length: u64, - content: Option, + content: Option, /// Request only method: Option, @@ -380,7 +363,6 @@ impl MixedPart { version: Version::HTTP_11, headers: HeaderMap::new(), - content_length: 0, content: None, uri: Some(uri), @@ -398,20 +380,11 @@ impl MixedPart { let (parts, body) = req.into_parts(); - let (content_length, content) = match body { - AsyncBody::Empty => (0, None), - AsyncBody::Bytes(bs) => ( - bs.len() as u64, - Some(Box::new(oio::Cursor::from(bs)) as Streamer), - ), - AsyncBody::Stream(stream) => { - let len = parts - .headers - .get(CONTENT_LENGTH) - .and_then(|v| v.to_str().ok()) - .and_then(|v| v.parse::().ok()) - .expect("the content length of a mixed part must be valid"); - (len, Some(stream)) + let content = match body { + AsyncBody::Empty => None, + AsyncBody::Bytes(bs) => Some(bs), + AsyncBody::Stream(_) => { + unimplemented!("multipart upload does not support streaming body") } }; @@ -429,7 +402,6 @@ impl MixedPart { ), version: parts.version, headers: parts.headers, - content_length, content, method: Some(parts.method), @@ -438,7 +410,7 @@ impl MixedPart { } /// Consume a mixed part to build a response. - pub fn into_response(mut self) -> Response { + pub fn into_response(mut self) -> Response { let mut builder = Response::builder(); builder = builder.status(self.status_code.unwrap_or(StatusCode::OK)); @@ -446,10 +418,9 @@ impl MixedPart { // Swap headers directly instead of copy the entire map. mem::swap(builder.headers_mut().unwrap(), &mut self.headers); - let body = if let Some(stream) = self.content { - IncomingAsyncBody::new(stream, Some(self.content_length)) - } else { - IncomingAsyncBody::new(Box::new(oio::into_stream(stream::empty())), Some(0)) + let body = match self.content { + None => oio::Buffer::new(), + Some(bs) => oio::Buffer::from(bs), }; builder @@ -483,17 +454,7 @@ impl MixedPart { /// Set the content for this part. pub fn content(mut self, content: impl Into) -> Self { - let content = content.into(); - - self.content_length = content.len() as u64; - self.content = Some(Box::new(oio::Cursor::from(content))); - self - } - - /// Set the stream content for this part. - pub fn stream(mut self, size: u64, content: Streamer) -> Self { - self.content_length = size; - self.content = Some(content); + self.content = Some(content.into()); self } } @@ -566,8 +527,8 @@ impl Part for MixedPart { // pre-content + content + post-content; let mut total_size = bs.len() as u64; - if self.content.is_some() { - total_size += self.content_length + 2; + if let Some(bs) = &self.content { + total_size += bs.len() as u64 + 2; } ( @@ -650,8 +611,7 @@ impl Part for MixedPart { part_headers, version: Version::HTTP_11, headers, - content_length: body_bytes.len() as u64, - content: Some(Box::new(oio::Cursor::from(body_bytes))), + content: Some(body_bytes), method: None, uri: None, @@ -670,22 +630,21 @@ impl Part for MixedPart { pub struct MixedPartStream { /// Including headers and the first `b\r\n` pre_content: Option, - content: Option, + content: Option, } impl Stream for MixedPartStream { - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + fn poll_next(&mut self, _: &mut Context<'_>) -> Poll>> { if let Some(pre_content) = self.pre_content.take() { return Poll::Ready(Some(Ok(pre_content))); } - if let Some(stream) = self.content.as_mut() { - return match ready!(stream.poll_next(cx)) { - None => { - self.content = None; - Poll::Ready(Some(Ok(Bytes::from_static(b"\r\n")))) - } - Some(v) => Poll::Ready(Some(v)), + if let Some(bs) = self.content.as_mut() { + if bs.has_remaining() { + return Poll::Ready(Some(Ok(bs.copy_to_bytes(bs.remaining())))); + } else { + self.content = None; + return Poll::Ready(Some(Ok(Bytes::from_static(b"\r\n")))); }; } @@ -1120,7 +1079,10 @@ Content-Length: 846 h }); - assert_eq!(multipart.parts[0].content_length, part0_bs.len() as u64); + assert_eq!( + multipart.parts[0].content.as_ref().unwrap().len(), + part0_bs.len() + ); assert_eq!(multipart.parts[0].uri, None); assert_eq!(multipart.parts[0].method, None); assert_eq!( @@ -1160,7 +1122,10 @@ Content-Length: 846 h }); - assert_eq!(multipart.parts[1].content_length, part1_bs.len() as u64); + assert_eq!( + multipart.parts[1].content.as_ref().unwrap().len(), + part1_bs.len() + ); assert_eq!(multipart.parts[1].uri, None); assert_eq!(multipart.parts[1].method, None); assert_eq!( @@ -1200,7 +1165,10 @@ Content-Length: 846 h }); - assert_eq!(multipart.parts[2].content_length, part2_bs.len() as u64); + assert_eq!( + multipart.parts[2].content.as_ref().unwrap().len(), + part2_bs.len() + ); assert_eq!(multipart.parts[2].uri, None); assert_eq!(multipart.parts[2].method, None); assert_eq!( diff --git a/core/src/raw/oio/buf/buffer.rs b/core/src/raw/oio/buf/buffer.rs new file mode 100644 index 000000000000..ea68425a8648 --- /dev/null +++ b/core/src/raw/oio/buf/buffer.rs @@ -0,0 +1,193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::VecDeque; +use std::sync::Arc; + +use bytes::Buf; +use bytes::Bytes; + +/// Buffer is a wrapper of contiguous `Bytes` and non contiguous `[Bytes]`. +/// +/// We designed buffer to allow underlying storage to return non-contiguous bytes. +/// +/// For example, http based storage like s3 could generate non-contiguous bytes by stream. +#[derive(Clone)] +pub struct Buffer(Inner); + +#[derive(Clone)] +enum Inner { + Contiguous(Bytes), + NonContiguous { + parts: Arc<[Bytes]>, + idx: usize, + offset: usize, + }, +} + +impl Buffer { + /// Create a new empty buffer. + /// + /// This operation is const and no allocation will be performed. + #[inline] + pub const fn new() -> Self { + Self(Inner::Contiguous(Bytes::new())) + } + + /// Clone internal bytes to a new `Bytes`. + #[inline] + pub fn to_bytes(&self) -> Bytes { + let mut bs = self.clone(); + bs.copy_to_bytes(bs.remaining()) + } +} + +impl From> for Buffer { + fn from(bs: Vec) -> Self { + Self(Inner::Contiguous(bs.into())) + } +} + +impl From for Buffer { + fn from(bs: Bytes) -> Self { + Self(Inner::Contiguous(bs)) + } +} + +/// Transform `VecDeque` to `Arc<[Bytes]>`. +impl From> for Buffer { + fn from(bs: VecDeque) -> Self { + Self(Inner::NonContiguous { + parts: Vec::from(bs).into(), + idx: 0, + offset: 0, + }) + } +} + +/// Transform `Vec` to `Arc<[Bytes]>`. +impl From> for Buffer { + fn from(bs: Vec) -> Self { + Self(Inner::NonContiguous { + parts: bs.into(), + idx: 0, + offset: 0, + }) + } +} + +impl Buf for Buffer { + #[inline] + fn remaining(&self) -> usize { + match &self.0 { + Inner::Contiguous(b) => b.remaining(), + Inner::NonContiguous { parts, idx, offset } => { + if *idx >= parts.len() { + return 0; + } + + parts[*idx..].iter().map(|p| p.len()).sum::() - offset + } + } + } + + #[inline] + fn chunk(&self) -> &[u8] { + match &self.0 { + Inner::Contiguous(b) => b.chunk(), + Inner::NonContiguous { parts, idx, offset } => { + if *idx >= parts.len() { + return &[]; + } + + &parts[*idx][*offset..] + } + } + } + + #[inline] + fn advance(&mut self, cnt: usize) { + match &mut self.0 { + Inner::Contiguous(b) => b.advance(cnt), + Inner::NonContiguous { parts, idx, offset } => { + let mut new_cnt = cnt; + let mut new_idx = *idx; + let mut new_offset = *offset; + + while new_cnt > 0 { + let remaining = parts[new_idx].len() - new_offset; + if new_cnt < remaining { + new_offset += new_cnt; + new_cnt = 0; + break; + } else { + new_cnt -= remaining; + new_idx += 1; + new_offset = 0; + if new_idx > parts.len() { + break; + } + } + } + + if new_cnt == 0 { + *idx = new_idx; + *offset = new_offset; + } else { + panic!("cannot advance past {cnt} bytes") + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + const EMPTY_SLICE: &[u8] = &[]; + + #[test] + fn test_contiguous_buffer() { + let buf = Buffer::new(); + + assert_eq!(buf.remaining(), 0); + assert_eq!(buf.chunk(), EMPTY_SLICE); + } + + #[test] + fn test_empty_non_contiguous_buffer() { + let buf = Buffer::from(vec![Bytes::new()]); + + assert_eq!(buf.remaining(), 0); + assert_eq!(buf.chunk(), EMPTY_SLICE); + } + + #[test] + fn test_non_contiguous_buffer_with_empty_chunks() { + let mut buf = Buffer::from(vec![Bytes::from("a")]); + + assert_eq!(buf.remaining(), 1); + assert_eq!(buf.chunk(), b"a"); + + buf.advance(1); + + assert_eq!(buf.remaining(), 0); + assert_eq!(buf.chunk(), EMPTY_SLICE); + } +} diff --git a/core/src/raw/oio/buf/mod.rs b/core/src/raw/oio/buf/mod.rs index 9c4881ec83ac..3009e803e2a1 100644 --- a/core/src/raw/oio/buf/mod.rs +++ b/core/src/raw/oio/buf/mod.rs @@ -17,3 +17,6 @@ mod adaptive; pub use adaptive::AdaptiveBuf; + +mod buffer; +pub use buffer::Buffer; diff --git a/core/src/raw/oio/cursor.rs b/core/src/raw/oio/cursor.rs deleted file mode 100644 index 75a034f1023e..000000000000 --- a/core/src/raw/oio/cursor.rs +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp::min; -use std::io::SeekFrom; -use std::task::Context; -use std::task::Poll; - -use bytes::Bytes; - -use crate::raw::*; -use crate::*; - -/// Cursor is the cursor for [`Bytes`] that implements [`oio::Read`] -#[derive(Default)] -pub struct Cursor { - inner: Bytes, - pos: u64, -} - -impl Cursor { - /// Create a new empty cursor. - pub fn new() -> Self { - Self::default() - } - - /// Returns `true` if the remaining slice is empty. - pub fn is_empty(&self) -> bool { - self.pos as usize >= self.inner.len() - } - - /// Returns the remaining slice. - pub fn remaining_slice(&self) -> &[u8] { - let len = self.pos.min(self.inner.len() as u64) as usize; - &self.inner.as_ref()[len..] - } - - /// Return the length of remaining slice. - pub fn len(&self) -> usize { - self.inner.len() - self.pos as usize - } -} - -impl From for Cursor { - fn from(v: Bytes) -> Self { - Cursor { inner: v, pos: 0 } - } -} - -impl From> for Cursor { - fn from(v: Vec) -> Self { - Cursor { - inner: Bytes::from(v), - pos: 0, - } - } -} - -impl oio::Read for Cursor { - async fn read(&mut self, limit: usize) -> Result { - if self.is_empty() { - Ok(Bytes::new()) - } else { - // The clone here is required as we don't want to change it. - let mut bs = self.inner.clone().split_off(self.pos as usize); - let bs = bs.split_to(min(bs.len(), limit)); - self.pos += bs.len() as u64; - Ok(bs) - } - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::End(n) => (self.inner.len() as i64, n), - SeekFrom::Current(n) => (self.pos as i64, n), - }; - - let n = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )) - } - }; - self.pos = n; - Ok(n) - } -} - -impl oio::BlockingRead for Cursor { - fn read(&mut self, limit: usize) -> Result { - if self.is_empty() { - Ok(Bytes::new()) - } else { - // The clone here is required as we don't want to change it. - let mut bs = self.inner.clone().split_off(self.pos as usize); - let bs = bs.split_to(min(bs.len(), limit)); - self.pos += bs.len() as u64; - Ok(bs) - } - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::End(n) => (self.inner.len() as i64, n), - SeekFrom::Current(n) => (self.pos as i64, n), - }; - - let n = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )) - } - }; - self.pos = n; - Ok(n) - } -} - -impl oio::Stream for Cursor { - fn poll_next(&mut self, _: &mut Context<'_>) -> Poll>> { - if self.is_empty() { - return Poll::Ready(None); - } - - let bs = self.inner.clone(); - self.pos += bs.len() as u64; - Poll::Ready(Some(Ok(bs))) - } -} diff --git a/core/src/raw/oio/list/api.rs b/core/src/raw/oio/list/api.rs index e5c62054cd00..48368c7a93a4 100644 --- a/core/src/raw/oio/list/api.rs +++ b/core/src/raw/oio/list/api.rs @@ -18,7 +18,6 @@ use std::fmt::Display; use std::fmt::Formatter; use std::future::Future; - use std::ops::DerefMut; use crate::raw::oio::Entry; diff --git a/core/src/raw/oio/mod.rs b/core/src/raw/oio/mod.rs index 7a10a8331c33..11e363f80895 100644 --- a/core/src/raw/oio/mod.rs +++ b/core/src/raw/oio/mod.rs @@ -34,9 +34,6 @@ pub use stream::*; mod list; pub use list::*; -mod cursor; -pub use cursor::Cursor; - mod entry; pub use entry::Entry; diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index d8ed05c4ece0..435148348980 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -17,13 +17,12 @@ use std::fmt::Display; use std::fmt::Formatter; -use std::io; -use std::ops::DerefMut; +use std::ops::Deref; use bytes::Bytes; use futures::Future; -use crate::raw::BoxedFuture; +use crate::raw::*; use crate::*; /// PageOperation is the name for APIs of lister. @@ -87,63 +86,52 @@ pub type Reader = Box; /// an additional layer of indirection and an extra allocation. Ideally, `ReadDyn` should occur only /// once, at the outermost level of our API. pub trait Read: Unpin + Send + Sync { - /// Fetch more bytes from underlying reader. + /// Read at the given offset with the given limit. /// - /// `limit` is used to hint the data that user want to read at most. Implementer - /// MUST NOT return more than `limit` bytes. However, implementer can decide - /// whether to split or merge the read requests underground. + /// # Notes /// - /// Returning `bytes`'s `length == 0` means: - /// - /// - This reader has reached its “end of file” and will likely no longer be able to produce bytes. - /// - The `limit` specified was `0`. - #[cfg(not(target_arch = "wasm32"))] - fn read(&mut self, limit: usize) -> impl Future> + Send; - #[cfg(target_arch = "wasm32")] - fn read(&mut self, size: usize) -> impl Future>; - - /// Seek asynchronously. - /// - /// Returns `Unsupported` error if underlying reader doesn't support seek. + /// Storage services should try to read as much as possible, only return bytes less than the + /// limit while reaching the end of the file. #[cfg(not(target_arch = "wasm32"))] - fn seek(&mut self, pos: io::SeekFrom) -> impl Future> + Send; + fn read_at( + &self, + offset: u64, + limit: usize, + ) -> impl Future> + Send; #[cfg(target_arch = "wasm32")] - fn seek(&mut self, pos: io::SeekFrom) -> impl Future>; + fn read_at(&self, offset: u64, limit: usize) -> impl Future>; } impl Read for () { - async fn read(&mut self, limit: usize) -> Result { - let _ = limit; + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let (_, _) = (offset, limit); Err(Error::new( ErrorKind::Unsupported, "output reader doesn't support streaming", )) } +} - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) +impl Read for Bytes { + /// TODO: we can check if the offset is out of range. + async fn read_at(&self, offset: u64, limit: usize) -> Result { + if offset >= self.len() as u64 { + return Ok(oio::Buffer::new()); + } + let offset = offset as usize; + let limit = limit.min(self.len() - offset); + Ok(oio::Buffer::from(self.slice(offset..offset + limit))) } } pub trait ReadDyn: Unpin + Send + Sync { - fn read_dyn(&mut self, limit: usize) -> BoxedFuture>; - - fn seek_dyn(&mut self, pos: io::SeekFrom) -> BoxedFuture>; + fn read_at_dyn(&self, offset: u64, limit: usize) -> BoxedFuture>; } impl ReadDyn for T { - fn read_dyn(&mut self, limit: usize) -> BoxedFuture> { - Box::pin(self.read(limit)) - } - - fn seek_dyn(&mut self, pos: io::SeekFrom) -> BoxedFuture> { - Box::pin(self.seek(pos)) + fn read_at_dyn(&self, offset: u64, limit: usize) -> BoxedFuture> { + Box::pin(self.read_at(offset, limit)) } } @@ -152,12 +140,8 @@ impl ReadDyn for T { /// Take care about the `deref_mut()` here. This makes sure that we are calling functions /// upon `&mut T` instead of `&mut Box`. The later could result in infinite recursion. impl Read for Box { - async fn read(&mut self, limit: usize) -> Result { - self.deref_mut().read_dyn(limit).await - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - self.deref_mut().seek_dyn(pos).await + async fn read_at(&self, offset: u64, limit: usize) -> Result { + self.deref().read_at_dyn(offset, limit).await } } @@ -165,48 +149,40 @@ impl Read for Box { pub type BlockingReader = Box; /// Read is the trait that OpenDAL returns to callers. -/// -/// Read is compose of the following trait -/// -/// - `Read` -/// - `Seek` -/// - `Iterator>` -/// -/// `Read` is required to be implemented, `Seek` and `Iterator` -/// is optional. We use `Read` to make users life easier. pub trait BlockingRead: Send + Sync { - /// Read synchronously. - fn read(&mut self, limit: usize) -> Result; - - /// Seek synchronously. - fn seek(&mut self, pos: io::SeekFrom) -> Result; + /// Read data from the reader at the given offset with the given limit. + /// + /// # Notes + /// + /// Storage services should try to read as much as possible, only return bytes less than the + /// limit while reaching the end of the file. + fn read_at(&self, offset: u64, limit: usize) -> Result; } impl BlockingRead for () { - fn read(&mut self, limit: usize) -> Result { - let _ = limit; + fn read_at(&self, offset: u64, limit: usize) -> Result { + let _ = (offset, limit); unimplemented!("read is required to be implemented for oio::BlockingRead") } +} - fn seek(&mut self, pos: io::SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "output blocking reader doesn't support seeking", - )) +impl BlockingRead for Bytes { + /// TODO: we can check if the offset is out of range. + fn read_at(&self, offset: u64, limit: usize) -> Result { + if offset >= self.len() as u64 { + return Ok(oio::Buffer::new()); + } + let offset = offset as usize; + let limit = limit.min(self.len() - offset); + Ok(oio::Buffer::from(self.slice(offset..offset + limit))) } } /// `Box` won't implement `BlockingRead` automatically. /// To make BlockingReader work as expected, we must add this impl. impl BlockingRead for Box { - fn read(&mut self, limit: usize) -> Result { - (**self).read(limit) - } - - fn seek(&mut self, pos: io::SeekFrom) -> Result { - (**self).seek(pos) + fn read_at(&self, offset: u64, limit: usize) -> Result { + (**self).read_at(offset, limit) } } diff --git a/core/src/raw/oio/read/buffer_reader.rs b/core/src/raw/oio/read/buffer_reader.rs deleted file mode 100644 index 733878d1cce0..000000000000 --- a/core/src/raw/oio/read/buffer_reader.rs +++ /dev/null @@ -1,802 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp::min; -use std::io::SeekFrom; - -use bytes::Bytes; -use tokio::io::ReadBuf; - -use super::BlockingRead; -use crate::raw::*; -use crate::*; - -/// [BufferReader] allows the underlying reader to fetch data at the buffer's size -/// and is used to amortize the IO's overhead. -pub struct BufferReader { - r: R, - cur: u64, - - /// TODO: maybe we can use chunked bytes here? - buf: Vec, - filled: usize, - pos: usize, -} - -impl BufferReader { - /// Create a new [`oio::Reader`] with a buffer. - pub fn new(r: R, cap: usize) -> BufferReader { - BufferReader { - r, - cur: 0, - - buf: Vec::with_capacity(cap), - filled: 0, - pos: 0, - } - } - - /// Invalidates all data in the internal buffer. - #[inline] - fn discard_buffer(&mut self) { - self.buf.clear(); - self.pos = 0; - self.filled = 0; - } - - /// Returns the capacity of the internal buffer. - fn capacity(&self) -> usize { - self.buf.capacity() - } - - fn consume(&mut self, amt: usize) { - let new_pos = min(self.pos + amt, self.filled); - let amt = new_pos - self.pos; - - self.pos = new_pos; - self.cur += amt as u64; - } - - fn seek_relative(&mut self, offset: i64) -> Option { - let pos = self.pos as u64; - - if let (Some(new_pos), Some(new_cur)) = ( - pos.checked_add_signed(offset), - self.cur.checked_add_signed(offset), - ) { - if new_pos <= self.filled as u64 { - self.cur = new_cur; - self.pos = new_pos as usize; - return Some(self.cur); - } - } - - None - } - - fn unconsumed_buffer_len(&self) -> i64 { - (self.filled as i64) - (self.pos as i64) - } -} - -impl BufferReader -where - R: oio::Read, -{ - async fn fill_buf(&mut self) -> Result<&[u8]> { - // If we've reached the end of our internal buffer then we need to fetch - // some more data from the underlying reader. - // Branch using `>=` instead of the more correct `==` - // to tell the compiler that the pos..cap slice is always valid. - if self.pos >= self.filled { - debug_assert!(self.pos == self.filled); - - let cap = self.capacity(); - self.buf.clear(); - let dst = self.buf.spare_capacity_mut(); - let mut buf = ReadBuf::uninit(dst); - unsafe { buf.assume_init(cap) }; - - let bs = self.r.read(cap).await?; - buf.put_slice(&bs); - unsafe { self.buf.set_len(bs.len()) } - - self.pos = 0; - self.filled = bs.len(); - } - - Ok(&self.buf[self.pos..self.filled]) - } - - async fn inner_seek(&mut self, pos: SeekFrom) -> Result { - let cur = self.r.seek(pos).await?; - self.discard_buffer(); - self.cur = cur; - - Ok(cur) - } -} - -impl oio::Read for BufferReader -where - R: oio::Read, -{ - async fn read(&mut self, limit: usize) -> Result { - if limit == 0 { - return Ok(Bytes::new()); - } - - // If we don't have any buffered data and we're doing a massive read - // (larger than our internal buffer), bypass our internal buffer - // entirely. - if self.pos == self.filled && limit >= self.capacity() { - let res = self.r.read(limit).await; - self.discard_buffer(); - return match res { - Ok(bs) => { - self.cur += bs.len() as u64; - Ok(bs) - } - Err(err) => Err(err), - }; - } - - let bytes = self.fill_buf().await?; - - if bytes.is_empty() { - return Ok(Bytes::new()); - } - let size = min(bytes.len(), limit); - let bytes = Bytes::copy_from_slice(&bytes[..size]); - self.consume(bytes.len()); - Ok(bytes) - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - match pos { - SeekFrom::Start(new_pos) => { - // TODO(weny): Check the overflowing. - let Some(offset) = (new_pos as i64).checked_sub(self.cur as i64) else { - return self.inner_seek(pos).await; - }; - - match self.seek_relative(offset) { - Some(cur) => Ok(cur), - None => self.inner_seek(pos).await, - } - } - SeekFrom::Current(offset) => match self.seek_relative(offset) { - Some(cur) => Ok(cur), - None => { - self.inner_seek(SeekFrom::Current(offset - self.unconsumed_buffer_len())) - .await - } - }, - SeekFrom::End(_) => self.inner_seek(pos).await, - } - } -} - -impl BufferReader -where - R: BlockingRead, -{ - fn blocking_fill_buf(&mut self) -> Result<&[u8]> { - // If we've reached the end of our internal buffer then we need to fetch - // some more data from the underlying reader. - // Branch using `>=` instead of the more correct `==` - // to tell the compiler that the pos..cap slice is always valid. - if self.pos >= self.filled { - debug_assert!(self.pos == self.filled); - - let cap = self.capacity(); - self.buf.clear(); - let dst = self.buf.spare_capacity_mut(); - let mut buf = ReadBuf::uninit(dst); - unsafe { buf.assume_init(cap) }; - - let bs = self.r.read(cap)?; - buf.put_slice(&bs); - unsafe { self.buf.set_len(bs.len()) } - - self.pos = 0; - self.filled = bs.len(); - } - - Ok(&self.buf[self.pos..self.filled]) - } - - fn blocking_inner_seek(&mut self, pos: SeekFrom) -> Result { - let cur = self.r.seek(pos)?; - self.discard_buffer(); - self.cur = cur; - - Ok(cur) - } -} - -impl BlockingRead for BufferReader -where - R: BlockingRead, -{ - fn read(&mut self, limit: usize) -> Result { - if limit == 0 { - return Ok(Bytes::new()); - } - - // If we don't have any buffered data and we're doing a massive read - // (larger than our internal buffer), bypass our internal buffer - // entirely. - if self.pos == self.filled && limit >= self.capacity() { - let res = self.r.read(limit); - self.discard_buffer(); - return match res { - Ok(bs) => { - self.cur += bs.len() as u64; - Ok(bs) - } - Err(err) => Err(err), - }; - } - - let bytes = self.blocking_fill_buf()?; - - if bytes.is_empty() { - return Ok(Bytes::new()); - } - let size = min(bytes.len(), limit); - let bytes = Bytes::copy_from_slice(&bytes[..size]); - self.consume(bytes.len()); - Ok(bytes) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - match pos { - SeekFrom::Start(new_pos) => { - // TODO(weny): Check the overflowing. - let Some(offset) = (new_pos as i64).checked_sub(self.cur as i64) else { - return self.blocking_inner_seek(pos); - }; - - match self.seek_relative(offset) { - Some(cur) => Ok(cur), - None => self.blocking_inner_seek(pos), - } - } - SeekFrom::Current(offset) => match self.seek_relative(offset) { - Some(cur) => Ok(cur), - None => self - .blocking_inner_seek(SeekFrom::Current(offset - self.unconsumed_buffer_len())), - }, - SeekFrom::End(_) => self.blocking_inner_seek(pos), - } - } -} - -#[cfg(test)] -mod tests { - use std::io::SeekFrom; - use std::sync::Arc; - - use async_trait::async_trait; - use bytes::Bytes; - use rand::prelude::*; - use sha2::Digest; - use sha2::Sha256; - - use super::*; - use crate::raw::oio::RangeReader; - - // Generate bytes between [4MiB, 16MiB) - fn gen_bytes() -> (Bytes, usize) { - let mut rng = thread_rng(); - - let size = rng.gen_range(4 * 1024 * 1024..16 * 1024 * 1024); - let mut content = vec![0; size]; - rng.fill_bytes(&mut content); - - (Bytes::from(content), size) - } - - #[derive(Debug, Clone, Default)] - struct MockReadService { - data: Bytes, - } - - impl MockReadService { - fn new(data: Bytes) -> Self { - Self { data } - } - } - - #[async_trait] - impl Accessor for MockReadService { - type Reader = MockReader; - type Writer = (); - type Lister = (); - type BlockingReader = MockReader; - type BlockingWriter = (); - type BlockingLister = (); - - fn info(&self) -> AccessorInfo { - let mut am = AccessorInfo::default(); - am.set_native_capability(Capability { - read: true, - ..Default::default() - }); - - am - } - - async fn read(&self, _: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let bs = args.range().apply_on_bytes(self.data.clone()); - - Ok(( - RpRead::new(), - MockReader { - inner: oio::Cursor::from(bs), - }, - )) - } - - fn blocking_read(&self, _: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - let bs = args.range().apply_on_bytes(self.data.clone()); - - Ok(( - RpRead::new(), - MockReader { - inner: oio::Cursor::from(bs), - }, - )) - } - } - - struct MockReader { - inner: oio::Cursor, - } - - impl oio::Read for MockReader { - async fn seek(&mut self, pos: SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) - } - - async fn read(&mut self, limit: usize) -> Result { - oio::Read::read(&mut self.inner, limit).await - } - } - - impl BlockingRead for MockReader { - fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit) - } - - fn seek(&mut self, _pos: SeekFrom) -> Result { - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) - } - } - - #[tokio::test] - async fn test_read_from_buf() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice(&b"Hello, World!"[..]); - - let acc = Arc::new(MockReadService::new(bs.clone())); - let r = Box::new(RangeReader::new(acc, "x", OpRead::default())) as oio::Reader; - - let buf_cap = 10; - let r = Box::new(BufferReader::new(r, buf_cap)) as oio::Reader; - let mut r = Reader::new(r); - - let bs = r.read(5).await?; - assert_eq!(bs.len(), 5); - assert_eq!(bs.as_ref(), b"Hello"); - - let bs = r.read(5).await?; - assert_eq!(bs.len(), 5); - assert_eq!(bs.as_ref(), b", Wor"); - - let bs = r.read(3).await?; - assert_eq!(bs.len(), 3); - assert_eq!(bs.as_ref(), b"ld!"); - - Ok(()) - } - - #[tokio::test] - async fn test_seek() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice(&b"Hello, World!"[..]); - let acc = Arc::new(MockReadService::new(bs.clone())); - let r = Box::new(RangeReader::new(acc, "x", OpRead::default())) as oio::Reader; - - let buf_cap = 10; - let r = Box::new(BufferReader::new(r, buf_cap)) as oio::Reader; - let mut r = Reader::new(r); - - // The underlying reader buffers the b"Hello, Wor". - let buf = r.read(5).await?; - assert_eq!(buf.len(), 5); - assert_eq!(buf.as_ref(), b"Hello"); - - let pos = r.seek(SeekFrom::Start(7)).await?; - assert_eq!(pos, 7); - let buf = r.read(5).await?; - assert_eq!(&buf, &bs[7..10]); - assert_eq!(buf.len(), 3); - - // Should perform a relative seek. - let pos = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(pos, 0); - let buf = r.read(9).await?; - assert_eq!(&buf, &bs[0..9]); - assert_eq!(buf.len(), 9); - - // Should perform a non-relative seek. - let pos = r.seek(SeekFrom::Start(11)).await?; - assert_eq!(pos, 11); - let buf = r.read(9).await?; - assert_eq!(&buf, &bs[11..13]); - assert_eq!(buf.len(), 2); - - Ok(()) - } - - #[tokio::test] - async fn test_read_all() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(..)), - )) as oio::Reader; - - let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::Reader; - let mut r = Reader::new(r); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(bs.len(), buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(bs.len(), buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_bypass_seek_relative() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice( - &b"Hello, World! I'm going to tests a seek relative related bug!"[..], - ); - let acc = Arc::new(MockReadService::new(bs.clone())); - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(..)), - )) as oio::Reader; - let r = Box::new(BufferReader::new(r, 10)) as oio::Reader; - let mut r = Reader::new(r); - - let mut cur = 0; - for _ in 0..3 { - let bs = r.read(5).await?; - assert_eq!(bs.len(), 5); - cur += 5; - } - - let ret_cur = r.seek(SeekFrom::Current(-15)).await?; - assert_eq!(cur - 15, ret_cur); - - Ok(()) - } - - #[tokio::test] - async fn test_bypass_read_and_seek_relative() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice( - &b"Hello, World! I'm going to tests a seek relative related bug!"[..], - ); - let acc = Arc::new(MockReadService::new(bs.clone())); - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(..)), - )) as oio::Reader; - let r = Box::new(BufferReader::new(r, 5)) as oio::Reader; - let mut r = Reader::new(r); - - let mut cur = 0; - for _ in 0..3 { - let bs = r.read(6).await?; - assert_eq!(bs.len(), 6); - cur += 6; - } - - let ret_cur = r.seek(SeekFrom::Current(6)).await?; - assert_eq!(cur + 6, ret_cur); - - Ok(()) - } - - #[tokio::test] - async fn test_read_part() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), - )) as oio::Reader; - let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::Reader; - let mut r = Reader::new(r); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(4096, buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(4096, buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - let n = r.seek(SeekFrom::Start(1024)).await?; - assert_eq!(1024, n, "seek to 1024"); - - let buf = r.read_exact(1024).await?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek 1024" - ); - - let n = r.seek(SeekFrom::Current(1024)).await?; - assert_eq!(3072, n, "seek to 3072"); - - let buf = r.read_exact(1024).await?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek to 3072" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_blocking_read_from_buf() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice(&b"Hello, World!"[..]); - let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let buf_cap = 10; - let r = Box::new(BufferReader::new(r, buf_cap)) as oio::BlockingReader; - let mut r = BlockingReader::new(r); - - let buf = r.read(5)?; - assert_eq!(buf.len(), 5); - assert_eq!(buf.as_ref(), b"Hello"); - - let buf = r.read(5)?; - assert_eq!(buf.len(), 5); - assert_eq!(buf.as_ref(), b", Wor"); - - let buf = r.read(3)?; - assert_eq!(buf.len(), 3); - assert_eq!(buf.as_ref(), b"ld!"); - - Ok(()) - } - - #[tokio::test] - async fn test_blocking_seek() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice(&b"Hello, World!"[..]); - let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let buf_cap = 10; - let r = Box::new(BufferReader::new(r, buf_cap)) as oio::BlockingReader; - let mut r = BlockingReader::new(r); - - // The underlying reader buffers the b"Hello, Wor". - let buf = r.read(5)?; - assert_eq!(buf.len(), 5); - assert_eq!(buf.as_ref(), b"Hello"); - - let pos = r.seek(SeekFrom::Start(7))?; - assert_eq!(pos, 7); - let buf = r.read(5)?; - assert_eq!(&buf[..], &bs[7..10]); - assert_eq!(buf.len(), 3); - - // Should perform a relative seek. - let pos = r.seek(SeekFrom::Start(0))?; - assert_eq!(pos, 0); - let buf = r.read(9)?; - assert_eq!(&buf[..], &bs[0..9]); - assert_eq!(buf.len(), 9); - - // Should perform a non-relative seek. - let pos = r.seek(SeekFrom::Start(11))?; - assert_eq!(pos, 11); - let buf = r.read(9)?; - assert_eq!(&buf[..], &bs[11..13]); - assert_eq!(buf.len(), 2); - - Ok(()) - } - - #[tokio::test] - async fn test_blocking_read_all() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::BlockingReader; - let mut r = BlockingReader::new(r); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf)?; - assert_eq!(bs.len(), buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0))?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf)?; - assert_eq!(bs.len(), buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_blocking_bypass_seek_relative() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice( - &b"Hello, World! I'm going to tests a seek relative related bug!"[..], - ); - let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let r = Box::new(BufferReader::new(r, 10)) as oio::BlockingReader; - let mut r = BlockingReader::new(r); - - let mut cur = 0; - for _ in 0..3 { - let bs = r.read(5)?; - assert_eq!(bs.len(), 5); - cur += 5; - } - - let ret_cur = r.seek(SeekFrom::Current(-15))?; - assert_eq!(cur - 15, ret_cur); - - Ok(()) - } - - #[tokio::test] - async fn test_blocking_bypass_read_and_seek_relative() -> anyhow::Result<()> { - let bs = Bytes::copy_from_slice( - &b"Hello, World! I'm going to tests a seek relative related bug!"[..], - ); - let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let r = Box::new(BufferReader::new(r, 5)) as oio::BlockingReader; - let mut r = BlockingReader::new(r); - - let mut cur = 0; - for _ in 0..3 { - let bs = r.read(6)?; - assert_eq!(bs.len(), 6); - cur += 6; - } - - let ret_cur = r.seek(SeekFrom::Current(6))?; - assert_eq!(cur + 6, ret_cur); - - Ok(()) - } - - #[tokio::test] - async fn test_blocking_read_part() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), - )) as oio::BlockingReader; - let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::BlockingReader; - let mut r = BlockingReader::new(r); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf)?; - assert_eq!(4096, buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0))?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf)?; - assert_eq!(4096, buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - let n = r.seek(SeekFrom::Start(1024))?; - assert_eq!(1024, n, "seek to 1024"); - - let buf = r.read_exact(1024)?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek 1024" - ); - - let n = r.seek(SeekFrom::Current(1024))?; - assert_eq!(3072, n, "seek to 3072"); - - let buf = r.read_exact(1024)?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek to 3072" - ); - - Ok(()) - } -} diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs deleted file mode 100644 index 64596c754a0b..000000000000 --- a/core/src/raw/oio/read/file_read.rs +++ /dev/null @@ -1,319 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; -use std::sync::Arc; - -use bytes::Bytes; - -use crate::raw::*; -use crate::*; - -/// FileReader that implement range read and streamable read on seekable reader. -/// -/// `oio::Reader` requires the underlying reader to handle range correctly and have streamable support. -/// But some services like `fs`, `hdfs` only have seek support. FileReader implements range and stream -/// support based on `seek`. We will maintain the correct range for give file and implement streamable -/// operations based on [`oio::AdaptiveBuf`]. -pub struct FileReader { - acc: Arc, - path: Arc, - op: OpRead, - - offset: Option, - size: Option, - cur: u64, - - reader: Option, - /// Do we need to reset our cursor? - seek_dirty: bool, -} - -impl FileReader -where - A: Accessor, -{ - /// Create a new FileReader. - /// - /// # Notes - /// - /// It's required that input reader's cursor is at the input `start` of the file. - pub fn new(acc: Arc, path: &str, op: OpRead) -> FileReader { - FileReader { - acc, - path: Arc::new(path.to_string()), - op, - - offset: None, - size: None, - cur: 0, - reader: None, - seek_dirty: false, - } - } -} - -impl FileReader -where - A: Accessor, - R: oio::Read, -{ - /// calculate_offset will make sure that the offset has been set. - async fn offset(r: &mut R, range: BytesRange) -> Result<(Option, Option)> { - let (offset, size) = match (range.offset(), range.size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = r.seek(SeekFrom::End(-(size as i64))).await?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = r.seek(SeekFrom::Start(offset)).await?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = r.seek(SeekFrom::Start(offset)).await?; - (start, Some(size)) - } - }; - - Ok((Some(offset), size)) - } - - async fn seek_inner( - r: &mut R, - offset: Option, - size: Option, - cur: u64, - pos: SeekFrom, - ) -> Result { - let offset = offset.expect("offset should be set for calculate_position"); - - match pos { - SeekFrom::Start(n) => { - // It's valid for user to seek outsides end of the file. - r.seek(SeekFrom::Start(offset + n)).await - } - SeekFrom::End(n) => { - let size = - size.expect("size should be set for calculate_position when seek with end"); - if size as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - // size is known, we can convert SeekFrom::End into SeekFrom::Start. - let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); - r.seek(pos).await - } - SeekFrom::Current(n) => { - if cur as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); - r.seek(pos).await - } - } - } -} - -impl FileReader -where - A: Accessor, - R: oio::BlockingRead, -{ - /// calculate_offset will make sure that the offset has been set. - fn calculate_offset(r: &mut R, range: BytesRange) -> Result<(Option, Option)> { - let (offset, size) = match (range.offset(), range.size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = r.seek(SeekFrom::End(-(size as i64)))?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = r.seek(SeekFrom::Start(offset))?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = r.seek(SeekFrom::Start(offset))?; - (start, Some(size)) - } - }; - - Ok((Some(offset), size)) - } - - fn blocking_seek_inner( - r: &mut R, - offset: Option, - size: Option, - cur: u64, - pos: SeekFrom, - ) -> Result { - let offset = offset.expect("offset should be set for calculate_position"); - - match pos { - SeekFrom::Start(n) => { - // It's valid for user to seek outsides end of the file. - r.seek(SeekFrom::Start(offset + n)) - } - SeekFrom::End(n) => { - let size = - size.expect("size should be set for calculate_position when seek with end"); - if size as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - // size is known, we can convert SeekFrom::End into SeekFrom::Start. - let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); - r.seek(pos) - } - SeekFrom::Current(n) => { - if cur as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); - r.seek(pos) - } - } - } -} - -impl oio::Read for FileReader -where - A: Accessor, - R: oio::Read, -{ - async fn read(&mut self, limit: usize) -> Result { - if self.reader.is_none() { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - let (_, r) = self.acc.read(&self.path, op).await?; - self.reader = Some(r); - } - - let r = self.reader.as_mut().expect("reader must be valid"); - - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = Self::offset(r, self.op.range()).await?; - } - - r.read(limit).await - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - if self.reader.is_none() { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - let (_, r) = self.acc.read(&self.path, op).await?; - self.reader = Some(r); - } - - let r = self.reader.as_mut().expect("reader must be valid"); - - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = Self::offset(r, self.op.range()).await?; - } - - // Fetch size when seek end. - let current_offset = self.offset.unwrap() + self.cur; - if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { - let size = r.seek(SeekFrom::End(0)).await?; - self.size = Some(size - self.offset.unwrap()); - self.seek_dirty = true; - } - if self.seek_dirty { - // Reset cursor. - r.seek(SeekFrom::Start(current_offset)).await?; - self.seek_dirty = false; - } - - let pos = Self::seek_inner(r, self.offset, self.size, self.cur, pos).await?; - self.cur = pos - self.offset.unwrap(); - Ok(self.cur) - } -} - -impl oio::BlockingRead for FileReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn read(&mut self, limit: usize) -> Result { - if self.reader.is_none() { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - let (_, r) = self.acc.blocking_read(&self.path, op)?; - self.reader = Some(r); - } - - let r = self.reader.as_mut().expect("reader must be valid"); - - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; - } - - r.read(limit) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - if self.reader.is_none() { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - let (_, r) = self.acc.blocking_read(&self.path, op)?; - self.reader = Some(r); - } - - let r = self.reader.as_mut().expect("reader must be valid"); - - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; - } - // Fetch size when seek end. - let current_offset = self.offset.unwrap() + self.cur; - if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { - let size = r.seek(SeekFrom::End(0))?; - self.size = Some(size - self.offset.unwrap()); - self.seek_dirty = true; - } - if self.seek_dirty { - // Reset cursor. - r.seek(SeekFrom::Start(current_offset))?; - self.seek_dirty = false; - } - - let pos = Self::blocking_seek_inner(r, self.offset, self.size, self.cur, pos)?; - self.cur = pos - self.offset.unwrap(); - Ok(self.cur) - } -} diff --git a/core/src/raw/oio/read/futures_read.rs b/core/src/raw/oio/read/futures_read.rs deleted file mode 100644 index 7b1bd2e6aa8b..000000000000 --- a/core/src/raw/oio/read/futures_read.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; - -use bytes::Bytes; -use futures::AsyncRead; -use futures::AsyncReadExt; -use futures::AsyncSeek; -use futures::AsyncSeekExt; -use tokio::io::ReadBuf; - -use crate::raw::*; -use crate::*; - -/// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. -pub struct FuturesReader { - inner: R, - buf: Vec, -} - -impl FuturesReader { - /// Create a new futures reader. - pub fn new(inner: R) -> Self { - Self { - inner, - buf: Vec::with_capacity(64 * 1024), - } - } -} - -impl oio::Read for FuturesReader -where - R: AsyncRead + AsyncSeek + Unpin + Send + Sync, -{ - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).await.map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Seek) - .with_context("source", "FuturesReader") - }) - } - - async fn read(&mut self, limit: usize) -> Result { - // Make sure buf has enough space. - if self.buf.capacity() < limit { - self.buf.reserve(limit); - } - let buf = self.buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(buf); - - // SAFETY: Read at most `size` bytes into `read_buf`. - unsafe { - read_buf.assume_init(limit); - } - - let n = self - .inner - .read(read_buf.initialized_mut()) - .await - .map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Read) - .with_context("source", "FuturesReader") - })?; - read_buf.set_filled(n); - - Ok(Bytes::copy_from_slice(read_buf.filled())) - } -} diff --git a/core/src/raw/oio/read/into_read_from_stream.rs b/core/src/raw/oio/read/into_read_from_stream.rs deleted file mode 100644 index ca1c43e9b361..000000000000 --- a/core/src/raw/oio/read/into_read_from_stream.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp::min; -use std::io::SeekFrom; - -use bytes::Bytes; -use futures::StreamExt; - -use crate::raw::*; -use crate::*; - -/// Convert given stream `futures::Stream>` into [`oio::Reader`]. -pub fn into_read_from_stream(stream: S) -> FromStreamReader { - FromStreamReader { - inner: stream, - buf: Bytes::new(), - } -} - -/// FromStreamReader will convert a `futures::Stream>` into `oio::Read` -pub struct FromStreamReader { - inner: S, - buf: Bytes, -} - -impl oio::Read for FromStreamReader -where - S: futures::Stream> + Send + Sync + Unpin + 'static, - T: Into, -{ - async fn seek(&mut self, _: SeekFrom) -> Result { - Err(Error::new( - ErrorKind::Unsupported, - "FromStreamReader can't support operation", - )) - } - - async fn read(&mut self, limit: usize) -> Result { - if self.buf.is_empty() { - self.buf = match self.inner.next().await.transpose()? { - Some(v) => v.into(), - None => return Ok(Bytes::new()), - }; - } - - let bs = self.buf.split_to(min(limit, self.buf.len())); - Ok(bs) - } -} diff --git a/core/src/raw/oio/read/into_streamable_read.rs b/core/src/raw/oio/read/into_streamable_read.rs deleted file mode 100644 index 6fe21a2c6077..000000000000 --- a/core/src/raw/oio/read/into_streamable_read.rs +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp::min; -use std::io::SeekFrom; - -use bytes::Bytes; -use tokio::io::ReadBuf; - -use crate::raw::*; -use crate::*; - -/// into_streamable is used to make [`oio::Read`] or [`oio::BlockingRead`] streamable. -pub fn into_streamable_read(r: R, capacity: usize) -> StreamableReader { - StreamableReader { - r, - buf: Vec::with_capacity(capacity), - } -} - -/// Make given read streamable. -pub struct StreamableReader { - r: R, - buf: Vec, -} - -impl oio::Read for StreamableReader { - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.r.seek(pos).await - } - - async fn read(&mut self, limit: usize) -> Result { - let size = min(self.buf.capacity(), limit); - - let dst = self.buf.spare_capacity_mut(); - let mut buf = ReadBuf::uninit(dst); - - // SAFETY: Read at most `size` bytes into `read_buf`. - unsafe { buf.assume_init(size) }; - - let bs = self.r.read(size).await?; - buf.put_slice(&bs); - buf.set_filled(bs.len()); - - Ok(Bytes::from(buf.filled().to_vec())) - } -} - -impl oio::BlockingRead for StreamableReader { - fn read(&mut self, limit: usize) -> Result { - self.r.read(limit) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - self.r.seek(pos) - } -} - -#[cfg(test)] -mod tests { - use crate::raw::oio::Read; - use bytes::BufMut; - use bytes::BytesMut; - use rand::prelude::*; - - use super::*; - - #[tokio::test] - async fn test_into_stream() { - let mut rng = ThreadRng::default(); - // Generate size between 1B..16MB. - let size = rng.gen_range(1..16 * 1024 * 1024); - let mut content = vec![0; size]; - rng.fill_bytes(&mut content); - // Generate cap between 1B..1MB; - let cap = rng.gen_range(1..1024 * 1024); - - let r = oio::Cursor::from(content.clone()); - let mut s = into_streamable_read(Box::new(r) as oio::Reader, cap); - - let mut bs = BytesMut::new(); - loop { - let b = s.read(4 * 1024 * 1024).await.expect("read must success"); - if b.is_empty() { - break; - } - bs.put_slice(&b); - } - assert_eq!(bs.freeze().to_vec(), content) - } - - #[test] - fn test_into_stream_blocking() { - use oio::BlockingRead; - - let mut rng = ThreadRng::default(); - // Generate size between 1B..16MB. - let size = rng.gen_range(1..16 * 1024 * 1024); - let mut content = vec![0; size]; - rng.fill_bytes(&mut content); - // Generate cap between 1B..1MB; - let cap = rng.gen_range(1..1024 * 1024); - - let r = oio::Cursor::from(content.clone()); - let mut s = into_streamable_read(Box::new(r) as oio::BlockingReader, cap); - - let mut bs = BytesMut::with_capacity(size); - loop { - let buf = s.read(size).expect("read must success"); - if buf.is_empty() { - break; - } - bs.put_slice(&buf) - } - assert_eq!(bs.freeze().to_vec(), content) - } -} diff --git a/core/src/raw/oio/read/lazy_read.rs b/core/src/raw/oio/read/lazy_read.rs deleted file mode 100644 index 1fd1c71b59ff..000000000000 --- a/core/src/raw/oio/read/lazy_read.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; -use std::sync::Arc; - -use bytes::Bytes; - -use crate::raw::*; -use crate::*; - -/// LazyReader implements [`oio::Read`] in a lazy way. -/// -/// The real requests are send when users calling read or seek. -pub struct LazyReader { - acc: Arc, - path: Arc, - op: OpRead, - reader: Option, -} - -impl LazyReader -where - A: Accessor, -{ - /// Create a new [`oio::Reader`] with lazy support. - pub fn new(acc: Arc, path: &str, op: OpRead) -> LazyReader { - LazyReader { - acc, - path: Arc::new(path.to_string()), - op, - - reader: None, - } - } -} - -impl LazyReader -where - A: Accessor, - R: oio::Read, -{ - async fn reader(&mut self) -> Result<&mut R> { - if self.reader.is_none() { - let (_, r) = self.acc.read(&self.path, self.op.clone()).await?; - self.reader = Some(r); - } - - Ok(self.reader.as_mut().expect("reader must be valid")) - } -} - -impl oio::Read for LazyReader -where - A: Accessor, - R: oio::Read, -{ - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.reader().await?.seek(pos).await - } - - async fn read(&mut self, limit: usize) -> Result { - let r = self.reader().await?; - r.read(limit).await - } -} - -impl LazyReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn blocking_reader(&mut self) -> Result<&mut R> { - if self.reader.is_none() { - let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; - self.reader = Some(r); - } - - Ok(self.reader.as_mut().expect("reader must be valid")) - } -} - -impl oio::BlockingRead for LazyReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn read(&mut self, limit: usize) -> Result { - self.blocking_reader()?.read(limit) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - self.blocking_reader()?.seek(pos) - } -} diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index d05b4593c8b1..3144ce874b3a 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -21,32 +21,3 @@ pub use api::BlockingReader; pub use api::Read; pub use api::ReadOperation; pub use api::Reader; - -mod into_streamable_read; -pub use into_streamable_read::into_streamable_read; -pub use into_streamable_read::StreamableReader; - -mod range_read; -pub use range_read::RangeReader; - -mod file_read; -pub use file_read::FileReader; - -mod into_read_from_stream; -pub use into_read_from_stream::into_read_from_stream; -pub use into_read_from_stream::FromStreamReader; - -mod futures_read; -pub use futures_read::FuturesReader; - -mod tokio_read; -pub use tokio_read::TokioReader; - -mod std_read; -pub use std_read::StdReader; - -mod lazy_read; -pub use lazy_read::LazyReader; - -mod buffer_reader; -pub use buffer_reader::BufferReader; diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs deleted file mode 100644 index 56289d534129..000000000000 --- a/core/src/raw/oio/read/range_read.rs +++ /dev/null @@ -1,603 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; -use std::sync::Arc; - -use bytes::Bytes; - -use crate::raw::*; -use crate::*; - -/// RangeReader that can do seek on non-seekable reader. -/// -/// `oio::Reader` requires the underlying reader to be seekable, but some services like s3, gcs -/// doesn't support seek natively. RangeReader implement seek by read_with_range. We will start -/// a new read request with the correct range when seek is called. -/// -/// The `seek` operation on `RangeReader` is zero cost and purely in-memory. But calling `seek` -/// while there is a pending read request will cancel the request and start a new one. This could -/// add extra cost to the read operation. -pub struct RangeReader { - acc: Arc, - path: Arc, - op: OpRead, - - offset: Option, - size: Option, - cur: u64, - reader: Option, -} - -impl RangeReader -where - A: Accessor, -{ - /// Create a new [`oio::Reader`] by range support. - /// - /// # Input - /// - /// The input is an Accessor will may return a non-seekable reader. - /// - /// # Output - /// - /// The output is a reader that can be seek by range. - /// - /// # Notes - /// - /// This operation is not zero cost. If the accessor already returns a - /// seekable reader, please don't use this. - pub fn new(acc: Arc, path: &str, op: OpRead) -> RangeReader { - // Normalize range like `..` into `0..` to make sure offset is valid. - let (offset, size) = match (op.range().offset(), op.range().size()) { - (None, None) => (Some(0), None), - v => v, - }; - - RangeReader { - acc, - path: Arc::new(path.to_string()), - op, - - offset, - size, - cur: 0, - reader: None, - } - } - - /// Ensure current reader's offset is valid via total_size. - fn ensure_offset(&mut self, total_size: u64) -> Result<()> { - (self.offset, self.size) = match (self.offset, self.size) { - (None, Some(size)) => { - if size > total_size { - // If returns an error, we should reset - // state to Idle so that we can retry it. - self.reader = None; - return Err(Error::new( - ErrorKind::InvalidInput, - "read to a negative or overflowing position is invalid", - )); - } - - (Some(total_size - size), Some(size)) - } - (Some(offset), None) => { - // It's valid for reader to seek to a position that out of the content length. - // We should return `Ok(0)` instead of an error at this case to align fs behavior. - let size = total_size.checked_sub(offset).unwrap_or_default(); - - (Some(offset), Some(size)) - } - (Some(offset), Some(size)) => (Some(offset), Some(size)), - (None, None) => { - unreachable!("fill_range should not reach this case after normalization") - } - }; - - Ok(()) - } - - /// Ensure size will use the information returned by RpRead to calculate the correct size for reader. - /// - /// - If `RpRead` returns `range`, we can calculate the correct size by `range.size()`. - /// - If `RpRead` returns `size`, we can use it's as the returning body's size. - fn ensure_size(&mut self, total_size: Option, content_size: Option) { - if let Some(total_size) = total_size { - // It's valid for reader to seek to a position that out of the content length. - // We should return `Ok(0)` instead of an error at this case to align fs behavior. - let size = total_size - .checked_sub(self.offset.expect("reader offset must be valid")) - .unwrap_or_default(); - - // Ensure size when: - // - // - reader's size is unknown. - // - reader's size is larger than file's size. - if self.size.is_none() || Some(size) < self.size { - self.size = Some(size); - return; - } - } - - if let Some(content_size) = content_size { - if content_size == 0 { - // Skip size set if content size is 0 since it could be invalid. - // - // For example, users seek to `u64::MAX` and calling read. - return; - } - - let calculated_size = content_size + self.cur; - - // Ensure size when: - // - // - reader's size is unknown. - // - reader's size is larger than file's size. - if self.size.is_none() || Some(calculated_size) < self.size { - self.size = Some(calculated_size); - } - } - } - - /// Calculate the current range, maybe sent as next read request. - /// - /// # Panics - /// - /// Offset must be normalized before calling this function. - /// - /// - `..` should be transformed into `0..` - /// - `..size` should be transformed into `(total-size)..total`. - fn calculate_range(&self) -> BytesRange { - let offset = self - .offset - .expect("offset must be set before calculating range"); - - BytesRange::new(Some(offset + self.cur), self.size.map(|v| v - self.cur)) - } -} - -impl RangeReader -where - A: Accessor, - R: oio::Read, -{ - async fn read_future(&self) -> Result<(RpRead, R)> { - let mut op = self.op.clone(); - // cur != 0 means we have read some data out, we should convert - // the op into deterministic to avoid ETag changes. - if self.cur != 0 { - op = op.into_deterministic(); - } - // Alter OpRead with correct calculated range. - op = op.with_range(self.calculate_range()); - - self.acc.read(&self.path, op).await - } - - async fn stat_future(&self) -> Result { - // Handle if-match and if-none-match correctly. - let mut args = OpStat::default(); - // TODO: stat should support range to check if ETag matches. - if self.op.range().is_full() { - if let Some(v) = self.op.if_match() { - args = args.with_if_match(v); - } - if let Some(v) = self.op.if_none_match() { - args = args.with_if_none_match(v); - } - } - - self.acc.stat(&self.path, args).await - } -} - -impl RangeReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn read_action(&self) -> Result<(RpRead, R)> { - let acc = self.acc.clone(); - let path = self.path.clone(); - - let mut op = self.op.clone(); - // cur != 0 means we have read some data out, we should convert - // the op into deterministic to avoid ETag changes. - if self.cur != 0 { - op = op.into_deterministic(); - } - // Alter OpRead with correct calculated range. - op = op.with_range(self.calculate_range()); - - acc.blocking_read(&path, op) - } - - fn stat_action(&self) -> Result { - let acc = self.acc.clone(); - let path = self.path.clone(); - - // Handle if-match and if-none-match correctly. - let mut args = OpStat::default(); - // TODO: stat should support range to check if ETag matches. - if self.op.range().is_full() { - if let Some(v) = self.op.if_match() { - args = args.with_if_match(v); - } - if let Some(v) = self.op.if_none_match() { - args = args.with_if_none_match(v); - } - } - - acc.blocking_stat(&path, args) - } -} - -impl oio::Read for RangeReader -where - A: Accessor, - R: oio::Read, -{ - async fn read(&mut self, limit: usize) -> Result { - // Sanity check for normal cases. - if self.cur >= self.size.unwrap_or(u64::MAX) { - return Ok(Bytes::new()); - } - - if self.offset.is_none() { - let rp = match self.stat_future().await { - Ok(v) => v, - Err(err) => return Err(err), - }; - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)? - } - if self.reader.is_none() { - let (rp, r) = match self.read_future().await { - Ok((rp, r)) => (rp, r), - Err(err) => return Err(err), - }; - - self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); - self.reader = Some(r); - } - - let r = self.reader.as_mut().expect("reader must be valid"); - match r.read(limit).await { - Ok(bs) => { - self.cur += bs.len() as u64; - Ok(bs) - } - Err(err) => { - self.reader = None; - Err(err) - } - } - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - // There is an optimization here that we can calculate if users trying to seek - // the same position, for example, `reader.seek(SeekFrom::Current(0))`. - // In this case, we can just return current position without dropping reader. - if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { - return Ok(self.cur); - } - - // We are seeking to other places, let's drop existing reader. - self.reader = None; - - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::Current(n) => (self.cur as i64, n), - SeekFrom::End(n) => { - if let Some(size) = self.size { - (size as i64, n) - } else { - let rp = self.stat_future().await?; - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; - - (length as i64, n) - } - } - }; - - let seek_pos = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )) - } - }; - - self.cur = seek_pos; - Ok(self.cur) - } -} - -impl oio::BlockingRead for RangeReader -where - A: Accessor, - R: oio::BlockingRead, -{ - fn read(&mut self, limit: usize) -> Result { - // Sanity check for normal cases. - if self.cur >= self.size.unwrap_or(u64::MAX) { - return Ok(Bytes::new()); - } - - if self.offset.is_none() { - let rp = match self.stat_action() { - Ok(v) => v, - Err(err) => return Err(err), - }; - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)? - } - if self.reader.is_none() { - let (rp, r) = match self.read_action() { - Ok((rp, r)) => (rp, r), - Err(err) => return Err(err), - }; - - self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); - self.reader = Some(r); - } - - let r = self.reader.as_mut().expect("reader must be valid"); - match r.read(limit) { - Ok(bs) => { - self.cur += bs.len() as u64; - Ok(bs) - } - Err(err) => { - self.reader = None; - Err(err) - } - } - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - // There is an optimization here that we can calculate if users trying to seek - // the same position, for example, `reader.seek(SeekFrom::Current(0))`. - // In this case, we can just return current position without dropping reader. - if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { - return Ok(self.cur); - } - - // We are seeking to other places, let's drop existing reader. - self.reader = None; - - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::Current(n) => (self.cur as i64, n), - SeekFrom::End(n) => { - if let Some(size) = self.size { - (size as i64, n) - } else { - let rp = self.stat_action()?; - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; - - (length as i64, n) - } - } - }; - - let seek_pos = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )) - } - }; - - self.cur = seek_pos; - Ok(self.cur) - } -} - -#[cfg(test)] -mod tests { - use std::io::SeekFrom; - - use async_trait::async_trait; - use bytes::Bytes; - use futures::AsyncReadExt; - use rand::prelude::*; - use sha2::Digest; - use sha2::Sha256; - - use super::*; - - // Generate bytes between [4MiB, 16MiB) - fn gen_bytes() -> (Bytes, usize) { - let mut rng = thread_rng(); - - let size = rng.gen_range(4 * 1024 * 1024..16 * 1024 * 1024); - let mut content = vec![0; size]; - rng.fill_bytes(&mut content); - - (Bytes::from(content), size) - } - - #[derive(Debug, Clone, Default)] - struct MockReadService { - data: Bytes, - } - - impl MockReadService { - fn new(data: Bytes) -> Self { - Self { data } - } - } - - #[async_trait] - impl Accessor for MockReadService { - type Reader = MockReader; - type Writer = (); - type Lister = (); - type BlockingReader = (); - type BlockingWriter = (); - type BlockingLister = (); - - fn info(&self) -> AccessorInfo { - let mut am = AccessorInfo::default(); - am.set_native_capability(Capability { - read: true, - ..Default::default() - }); - - am - } - - async fn read(&self, _: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let bs = args.range().apply_on_bytes(self.data.clone()); - - Ok(( - RpRead::new(), - MockReader { - inner: futures::io::Cursor::new(bs.into()), - }, - )) - } - } - - #[derive(Debug, Clone, Default)] - struct MockReader { - inner: futures::io::Cursor>, - } - - impl oio::Read for MockReader { - async fn seek(&mut self, pos: SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) - } - - async fn read(&mut self, limit: usize) -> Result { - let mut bs = vec![0; limit]; - let n = self.inner.read(&mut bs).await.map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) - })?; - if n == 0 { - Ok(Bytes::new()) - } else { - Ok(Bytes::from(bs[..n].to_vec())) - } - } - } - - #[tokio::test] - async fn test_read_all() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(..)), - )) as oio::Reader; - let mut r = Reader::new(r); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(bs.len(), buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(bs.len(), buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - Ok(()) - } - - #[tokio::test] - async fn test_read_part() -> anyhow::Result<()> { - let (bs, _) = gen_bytes(); - let acc = Arc::new(MockReadService::new(bs.clone())); - - let r = Box::new(RangeReader::new( - acc, - "x", - OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), - )) as oio::Reader; - let mut r = Reader::new(r); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(4096, buf.len(), "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read content" - ); - - let n = r.seek(SeekFrom::Start(0)).await?; - assert_eq!(n, 0, "seek position must be 0"); - - let mut buf = Vec::new(); - r.read_to_end(&mut buf).await?; - assert_eq!(4096, buf.len(), "read twice size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096..4096 + 4096])), - format!("{:x}", Sha256::digest(&buf)), - "read twice content" - ); - - let n = r.seek(SeekFrom::Start(1024)).await?; - assert_eq!(1024, n, "seek to 1024"); - - let buf = r.read_exact(1024).await?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek 1024" - ); - - let n = r.seek(SeekFrom::Current(1024)).await?; - assert_eq!(3072, n, "seek to 3072"); - - let buf = r.read_exact(1024).await?; - assert_eq!( - format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), - format!("{:x}", Sha256::digest(&buf)), - "read after seek to 3072" - ); - - Ok(()) - } -} diff --git a/core/src/raw/oio/read/std_read.rs b/core/src/raw/oio/read/std_read.rs deleted file mode 100644 index 8726ebf2499a..000000000000 --- a/core/src/raw/oio/read/std_read.rs +++ /dev/null @@ -1,77 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use bytes::Bytes; -use std::io::Read; -use std::io::Seek; -use std::io::SeekFrom; -use tokio::io::ReadBuf; - -use crate::raw::*; -use crate::*; - -/// FuturesReader implements [`oio::BlockingRead`] via [`Read`] + [`Seek`]. -pub struct StdReader { - inner: R, - buf: Vec, -} - -impl StdReader { - /// Create a new std reader. - pub fn new(inner: R) -> Self { - Self { - inner, - buf: Vec::with_capacity(64 * 1024), - } - } -} - -impl oio::BlockingRead for StdReader -where - R: Read + Seek + Send + Sync, -{ - fn read(&mut self, limit: usize) -> Result { - // Make sure buf has enough space. - if self.buf.capacity() < limit { - self.buf.reserve(limit); - } - let buf = self.buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(buf); - - // SAFETY: Read at most `size` bytes into `read_buf`. - unsafe { - read_buf.assume_init(limit); - } - - let n = self.inner.read(read_buf.initialized_mut()).map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Read) - .with_context("source", "TokioReader") - })?; - read_buf.set_filled(n); - - Ok(Bytes::copy_from_slice(read_buf.filled())) - } - - fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::BlockingSeek) - .with_context("source", "StdReader") - }) - } -} diff --git a/core/src/raw/oio/read/tokio_read.rs b/core/src/raw/oio/read/tokio_read.rs deleted file mode 100644 index f89e2ec5f35d..000000000000 --- a/core/src/raw/oio/read/tokio_read.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; - -use bytes::Bytes; -use tokio::io::AsyncRead; -use tokio::io::AsyncReadExt; -use tokio::io::AsyncSeek; -use tokio::io::AsyncSeekExt; -use tokio::io::ReadBuf; - -use crate::raw::*; -use crate::*; - -/// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. -pub struct TokioReader { - inner: R, - buf: Vec, -} - -impl TokioReader { - /// Create a new tokio reader. - pub fn new(inner: R) -> Self { - Self { - inner, - buf: Vec::with_capacity(64 * 1024), - } - } -} - -impl oio::Read for TokioReader -where - R: AsyncRead + AsyncSeek + Unpin + Send + Sync, -{ - async fn read(&mut self, limit: usize) -> Result { - // Make sure buf has enough space. - if self.buf.capacity() < limit { - self.buf.reserve(limit); - } - let buf = self.buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(buf); - - // SAFETY: Read at most `size` bytes into `read_buf`. - unsafe { - read_buf.assume_init(limit); - } - - let n = self - .inner - .read(read_buf.initialized_mut()) - .await - .map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Read) - .with_context("source", "TokioReader") - })?; - read_buf.set_filled(n); - - Ok(Bytes::copy_from_slice(read_buf.filled())) - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos).await.map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Seek) - .with_context("source", "TokioReader") - }) - } -} diff --git a/core/src/raw/oio/stream/api.rs b/core/src/raw/oio/stream/api.rs index 7a672d2ff2a0..4e912c303c1e 100644 --- a/core/src/raw/oio/stream/api.rs +++ b/core/src/raw/oio/stream/api.rs @@ -21,6 +21,7 @@ use std::task::ready; use std::task::Context; use std::task::Poll; +use bytes::Buf; use bytes::Bytes; use bytes::BytesMut; @@ -46,6 +47,16 @@ impl Stream for () { } } +impl Stream for Bytes { + fn poll_next(&mut self, _cx: &mut Context<'_>) -> Poll>> { + if self.has_remaining() { + Poll::Ready(Some(Ok(self.copy_to_bytes(self.remaining())))) + } else { + Poll::Ready(None) + } + } +} + /// `Box` won't implement `Stream` automatically. /// To make Streamer work as expected, we must add this impl. impl Stream for Box { @@ -54,12 +65,6 @@ impl Stream for Box { } } -// impl Stream for T { -// fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { -// raw::oio::Read::poll_next(self, cx) -// } -// } - impl futures::Stream for dyn Stream { type Item = Result; diff --git a/core/src/raw/oio/stream/into_stream.rs b/core/src/raw/oio/stream/into_stream.rs deleted file mode 100644 index 3e34bbf4bab5..000000000000 --- a/core/src/raw/oio/stream/into_stream.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[cfg(not(target_arch = "wasm32"))] -pub use non_wasm32_impl::*; - -#[cfg(not(target_arch = "wasm32"))] -mod non_wasm32_impl { - use std::task::Context; - use std::task::Poll; - - use bytes::Bytes; - use futures::TryStreamExt; - - use crate::raw::oio; - - /// Convert given futures stream into [`oio::Stream`]. - pub fn into_stream(stream: S) -> IntoStream - where - S: futures::Stream> + Send + Sync + Unpin, - { - IntoStream { inner: stream } - } - - pub struct IntoStream { - inner: S, - } - - impl oio::Stream for IntoStream - where - S: futures::Stream> + Send + Sync + Unpin, - { - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.try_poll_next_unpin(cx) - } - } -} - -#[cfg(target_arch = "wasm32")] -pub use wasm32_impl::*; -#[cfg(target_arch = "wasm32")] -mod wasm32_impl { - use std::task::Context; - use std::task::Poll; - - use bytes::Bytes; - use futures::TryStreamExt; - - use crate::raw::oio; - - /// Convert given futures stream into [`oio::Stream`]. - pub fn into_stream(stream: S) -> IntoStream - where - S: futures::Stream> + Unpin, - { - IntoStream { inner: stream } - } - - pub struct IntoStream { - inner: S, - } - - /// # Safety - /// - /// wasm32 is a special target that we only have one event-loop for this stream. - unsafe impl Send for IntoStream {} - /// # Safety - /// - /// IntoStream only has mutable references. - unsafe impl Sync for IntoStream {} - - impl oio::Stream for IntoStream - where - S: futures::Stream> + Unpin, - { - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.try_poll_next_unpin(cx) - } - } -} diff --git a/core/src/raw/oio/stream/into_stream_from_reader.rs b/core/src/raw/oio/stream/into_stream_from_reader.rs deleted file mode 100644 index cead7d11d72d..000000000000 --- a/core/src/raw/oio/stream/into_stream_from_reader.rs +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; - -use bytes::BufMut; -use bytes::Bytes; -use bytes::BytesMut; -use futures::AsyncRead; -use tokio::io::ReadBuf; - -use crate::raw::*; -use crate::*; - -// TODO: 64KiB is picked based on experiences, should be configurable -const DEFAULT_CAPACITY: usize = 64 * 1024; - -/// Convert given futures reader into [`oio::Stream`]. -pub fn into_stream_from_reader(r: R) -> FromReaderStream -where - R: AsyncRead + Send + Sync + Unpin, -{ - FromReaderStream { - inner: Some(r), - buf: BytesMut::new(), - } -} - -pub struct FromReaderStream { - inner: Option, - buf: BytesMut, -} - -impl oio::Stream for FromReaderStream -where - S: AsyncRead + Send + Sync + Unpin, -{ - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let reader = match self.inner.as_mut() { - Some(r) => r, - None => return Poll::Ready(None), - }; - - if self.buf.capacity() == 0 { - self.buf.reserve(DEFAULT_CAPACITY); - } - - let dst = self.buf.spare_capacity_mut(); - let mut buf = ReadBuf::uninit(dst); - - // Safety: the buf must contains enough space for reading - unsafe { buf.assume_init(buf.capacity()) }; - - match ready!(Pin::new(reader).poll_read(cx, buf.initialized_mut())) { - Ok(0) => { - // Set inner to None while reaching EOF. - self.inner = None; - Poll::Ready(None) - } - Ok(n) => { - // Safety: read_exact makes sure this buffer has been filled. - unsafe { self.buf.advance_mut(n) } - - let chunk = self.buf.split(); - Poll::Ready(Some(Ok(chunk.freeze()))) - } - Err(err) => Poll::Ready(Some(Err(Error::new( - ErrorKind::Unexpected, - "read data from reader into stream", - ) - .set_temporary() - .set_source(err)))), - } - } -} diff --git a/core/src/raw/oio/stream/mod.rs b/core/src/raw/oio/stream/mod.rs index c71d243cea11..572724dab92f 100644 --- a/core/src/raw/oio/stream/mod.rs +++ b/core/src/raw/oio/stream/mod.rs @@ -19,9 +19,3 @@ mod api; pub use api::Stream; pub use api::StreamExt; pub use api::Streamer; - -mod into_stream_from_reader; -pub use into_stream_from_reader::into_stream_from_reader; - -mod into_stream; -pub use into_stream::into_stream; diff --git a/core/src/raw/oio/write/api.rs b/core/src/raw/oio/write/api.rs index cb6707df7314..9d250d777bb0 100644 --- a/core/src/raw/oio/write/api.rs +++ b/core/src/raw/oio/write/api.rs @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -use bytes::Bytes; use std::fmt::Display; use std::fmt::Formatter; use std::future::Future; use std::ops::DerefMut; +use bytes::Bytes; + use crate::raw::*; use crate::*; diff --git a/core/src/raw/oio/write/block_write.rs b/core/src/raw/oio/write/block_write.rs index 3b0266ffbdb6..1c867961a679 100644 --- a/core/src/raw/oio/write/block_write.rs +++ b/core/src/raw/oio/write/block_write.rs @@ -17,7 +17,6 @@ use std::pin::Pin; use std::sync::Arc; - use std::task::Context; use std::task::Poll; diff --git a/core/src/raw/oio/write/exact_buf_write.rs b/core/src/raw/oio/write/exact_buf_write.rs index fba3d8dbaa58..3947bcf03341 100644 --- a/core/src/raw/oio/write/exact_buf_write.rs +++ b/core/src/raw/oio/write/exact_buf_write.rs @@ -15,9 +15,12 @@ // specific language governing permissions and limitations // under the License. -use bytes::{Buf, BufMut, Bytes}; use std::mem; +use bytes::Buf; +use bytes::BufMut; +use bytes::Bytes; + use crate::raw::*; use crate::*; diff --git a/core/src/raw/oio/write/multipart_write.rs b/core/src/raw/oio/write/multipart_write.rs index 2f4339329000..726f9bd3bfc6 100644 --- a/core/src/raw/oio/write/multipart_write.rs +++ b/core/src/raw/oio/write/multipart_write.rs @@ -17,7 +17,6 @@ use std::pin::Pin; use std::sync::Arc; - use std::task::Context; use std::task::Poll; diff --git a/core/src/raw/oio/write/range_write.rs b/core/src/raw/oio/write/range_write.rs index a8de1366fe4e..c858f14e9337 100644 --- a/core/src/raw/oio/write/range_write.rs +++ b/core/src/raw/oio/write/range_write.rs @@ -17,7 +17,6 @@ use std::pin::Pin; use std::sync::Arc; - use std::task::Context; use std::task::Poll; diff --git a/core/src/raw/ops.rs b/core/src/raw/ops.rs index 955a5884210e..3bbef0eb26a7 100644 --- a/core/src/raw/ops.rs +++ b/core/src/raw/ops.rs @@ -300,16 +300,12 @@ impl BatchOperation { /// Args for `read` operation. #[derive(Debug, Clone, Default)] pub struct OpRead { - br: BytesRange, if_match: Option, if_none_match: Option, override_content_type: Option, override_cache_control: Option, override_content_disposition: Option, version: Option, - /// The maximum buffer capability. - /// `None` stand for disable buffer. - buffer: Option, } impl OpRead { @@ -318,34 +314,6 @@ impl OpRead { Self::default() } - /// The into_deterministic function transforms the OpRead into a deterministic version. - /// - /// This API is utilized because it allows for internal optimizations such as dividing read - /// ranges or retrying the read request from where it failed. In these scenarios, the expected - /// `ETag` value differs from what users specify in `If-Match` or `If-None-Match`.Therefore, - /// we need to eliminate these conditional headers to ensure that the read operation is - /// deterministic. - /// - /// This API is not intended to be used by users and should never be exposed. - pub(crate) fn into_deterministic(self) -> Self { - Self { - if_match: None, - if_none_match: None, - ..self - } - } - - /// Create a new OpRead with range. - pub fn with_range(mut self, range: BytesRange) -> Self { - self.br = range; - self - } - - /// Get range from OpRead. - pub fn range(&self) -> BytesRange { - self.br - } - /// Sets the content-disposition header that should be send back by the remote read operation. pub fn with_override_content_disposition(mut self, content_disposition: &str) -> Self { self.override_content_disposition = Some(content_disposition.into()); @@ -412,18 +380,6 @@ impl OpRead { pub fn version(&self) -> Option<&str> { self.version.as_deref() } - - /// Set the buffer capability. - pub fn with_buffer(mut self, buffer: usize) -> Self { - self.buffer = Some(buffer); - - self - } - - /// Get buffer from option. - pub fn buffer(&self) -> Option { - self.buffer - } } /// Args for `stat` operation. diff --git a/core/src/raw/std_io_util.rs b/core/src/raw/std_io_util.rs index 1efe5d98c095..9aac5444b220 100644 --- a/core/src/raw/std_io_util.rs +++ b/core/src/raw/std_io_util.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -use crate::*; use std::io; +use crate::*; + /// Parse std io error into opendal::Error. /// /// # TODO diff --git a/core/src/raw/tests/read.rs b/core/src/raw/tests/read.rs index b2503901a32b..326001a94fc0 100644 --- a/core/src/raw/tests/read.rs +++ b/core/src/raw/tests/read.rs @@ -15,42 +15,29 @@ // specific language governing permissions and limitations // under the License. -use std::io::SeekFrom; - use bytes::Bytes; use rand::thread_rng; use rand::RngCore; use sha2::Digest; use sha2::Sha256; -use crate::raw::*; use crate::*; /// ReadAction represents a read action. -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum ReadAction { /// Read represents a read action with given input buf size. /// /// # NOTE /// /// The size is the input buf size, it's possible that the actual read size is smaller. - Read(usize), - /// Seek represents a seek action with given seek position. - /// - /// # NOTE - /// - /// It's valid that seek outside of the file's end. - Seek(SeekFrom), + Read(usize, usize), } /// ReadChecker is used to check the correctness of the read process. pub struct ReadChecker { /// Raw Data is the data we write to the storage. raw_data: Bytes, - /// Ranged Data is the data that we read from the storage. - ranged_data: Bytes, - /// Cur is the current position of the read process. - cur: usize, } impl ReadChecker { @@ -58,20 +45,14 @@ impl ReadChecker { /// /// It's by design that we use a random generator to generate the raw data. The content of data /// is not important, we only care about the correctness of the read process. - pub fn new(size: usize, range: impl Into) -> Self { + pub fn new(size: usize) -> Self { let mut rng = thread_rng(); let mut data = vec![0; size]; rng.fill_bytes(&mut data); let raw_data = Bytes::from(data); - let ranged_data = range.into().apply_on_bytes(raw_data.clone()); - - Self { - raw_data, - ranged_data, - cur: 0, - } + Self { raw_data } } /// Return the raw data of this read checker. @@ -83,8 +64,8 @@ impl ReadChecker { /// /// - buf_size is the read action's buf size. /// - output is the output of this read action. - fn check_read(&mut self, input: usize, output: &[u8]) { - if input == 0 { + fn check_read(&self, offset: usize, size: usize, output: &[u8]) { + if size == 0 { assert_eq!( output.len(), 0, @@ -93,20 +74,20 @@ impl ReadChecker { return; } - if input > 0 && output.is_empty() { + if size > 0 && output.is_empty() { assert!( - self.cur >= self.ranged_data.len(), + offset >= self.raw_data.len(), "check read failed: no data read means cur must outsides of ranged_data", ); return; } assert!( - self.cur + output.len() <= self.ranged_data.len(), - "check read failed: cur + output length must be less than ranged_data length, cur: {}, output: {}, ranged_data: {}", self.cur, output.len(), self.ranged_data.len(), + offset + output.len() <= self.raw_data.len(), + "check read failed: cur + output length must be less than ranged_data length, offset: {}, output: {}, ranged_data: {}", offset, output.len(), self.raw_data.len(), ); - let expected = &self.ranged_data[self.cur..self.cur + output.len()]; + let expected = &self.raw_data[offset..offset + output.len()]; // Check the read result assert_eq!( @@ -114,59 +95,21 @@ impl ReadChecker { format!("{:x}", Sha256::digest(expected)), "check read failed: output bs is different with expected bs", ); - - // Update the current position - self.cur += output.len(); - } - - /// check_seek checks the correctness of the read process after a seek action. - /// - /// - input is the `SeekFrom` passed by SeekAction. - /// - output ts the result after the seek operation. - fn check_seek(&mut self, input: SeekFrom, output: Result) { - let expected = match input { - SeekFrom::Start(offset) => offset as i64, - SeekFrom::End(offset) => self.ranged_data.len() as i64 + offset, - SeekFrom::Current(offset) => self.cur as i64 + offset, - }; - - if expected < 0 { - let Err(err) = output else { - panic!("check seek failed: seek should fail with negative offset"); - }; - - assert_eq!( - err.kind(), - ErrorKind::InvalidInput, - "check seek failed: seek should fail with error InvalidInput with negative offset" - ); - return; - } - - assert_eq!( - output.unwrap(), - expected as u64, - "check seek failed: seek result is different with expected result", - ); - - // only update the current position when seek succeed - self.cur = expected as usize; } /// Check will check the correctness of the read process via given actions. /// /// Check will panic if any check failed. - pub async fn check(&mut self, mut r: Reader, actions: &[ReadAction]) { + pub async fn check(&mut self, r: Reader, actions: &[ReadAction]) { for action in actions { - match action { - ReadAction::Read(size) => { - let bs = r.read(*size).await.expect("read must success"); - self.check_read(*size, &bs); - } - - ReadAction::Seek(pos) => { - let res = r.seek(*pos).await; - self.check_seek(*pos, res); + match *action { + ReadAction::Read(offset, size) => { + let mut bs = Vec::with_capacity(size); + let n = r + .read(&mut bs, offset as u64, size) + .await + .expect("read must success"); + self.check_read(offset, size, &bs[..n]); } } } @@ -175,17 +118,15 @@ impl ReadChecker { /// Check will check the correctness of the read process via given actions. /// /// Check will panic if any check failed. - pub fn blocking_check(&mut self, mut r: BlockingReader, actions: &[ReadAction]) { + pub fn blocking_check(&mut self, r: BlockingReader, actions: &[ReadAction]) { for action in actions { - match action { - ReadAction::Read(size) => { - let bs = r.read(*size).expect("read must success"); - self.check_read(*size, &bs); - } - - ReadAction::Seek(pos) => { - let res = r.seek(*pos); - self.check_seek(*pos, res); + match *action { + ReadAction::Read(offset, size) => { + let mut bs = Vec::with_capacity(size); + let n = r + .read(&mut bs, offset as u64, size) + .expect("read must success"); + self.check_read(offset, size, &bs[..n]); } } } diff --git a/core/src/services/alluxio/backend.rs b/core/src/services/alluxio/backend.rs index d14d82f0856a..06ad2f6a4ae1 100644 --- a/core/src/services/alluxio/backend.rs +++ b/core/src/services/alluxio/backend.rs @@ -29,6 +29,7 @@ use super::lister::AlluxioLister; use super::writer::AlluxioWriter; use super::writer::AlluxioWriters; use crate::raw::*; +use crate::services::alluxio::reader::AlluxioReader; use crate::*; /// Config for alluxio services support. @@ -181,7 +182,7 @@ pub struct AlluxioBackend { #[async_trait] impl Accessor for AlluxioBackend { - type Reader = IncomingAsyncBody; + type Reader = AlluxioReader; type Writer = AlluxioWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -195,7 +196,11 @@ impl Accessor for AlluxioBackend { .set_native_capability(Capability { stat: true, - read: true, + // FIXME: + // + // alluxio's read support is not implemented correctly + // We need to refactor by use [page_read](https://github.com/Alluxio/alluxio-py/blob/main/alluxio/const.py#L18) + read: false, write: true, write_can_multi: true, @@ -225,10 +230,8 @@ impl Accessor for AlluxioBackend { async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { let stream_id = self.core.open_file(path).await?; - let resp = self.core.read(stream_id, args.range()).await?; - - let size = parse_content_length(resp.headers())?; - Ok((RpRead::new().with_size(size), resp.into_body())) + let r = AlluxioReader::new(self.core.clone(), stream_id, args.clone()); + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/alluxio/core.rs b/core/src/services/alluxio/core.rs index 111f35718631..eb3edd5bc145 100644 --- a/core/src/services/alluxio/core.rs +++ b/core/src/services/alluxio/core.rs @@ -18,7 +18,7 @@ use std::fmt::Debug; use std::fmt::Formatter; -use http::header::RANGE; +use bytes::Buf; use http::Request; use http::Response; use http::StatusCode; @@ -155,9 +155,9 @@ impl AlluxioCore { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let steam_id: u64 = - serde_json::from_slice(&body).map_err(new_json_serialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_serialize_error)?; Ok(steam_id) } _ => Err(parse_error(resp).await?), @@ -181,9 +181,9 @@ impl AlluxioCore { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let steam_id: u64 = - serde_json::from_slice(&body).map_err(new_json_serialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_serialize_error)?; Ok(steam_id) } _ => Err(parse_error(resp).await?), @@ -261,9 +261,9 @@ impl AlluxioCore { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let file_info: FileInfo = - serde_json::from_slice(&body).map_err(new_json_serialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_serialize_error)?; Ok(file_info) } _ => Err(parse_error(resp).await?), @@ -289,37 +289,24 @@ impl AlluxioCore { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let file_infos: Vec = - serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; Ok(file_infos) } _ => Err(parse_error(resp).await?), } } - pub async fn read( - &self, - stream_id: u64, - range: BytesRange, - ) -> Result> { - let mut req = Request::post(format!( + /// TODO: we should implement range support correctly. + /// + /// Please refer to [alluxio-py](https://github.com/Alluxio/alluxio-py/blob/main/alluxio/const.py#L18) + pub async fn read(&self, stream_id: u64, _: BytesRange) -> Result> { + let req = Request::post(format!( "{}/api/v1/streams/{}/read", - self.endpoint, stream_id + self.endpoint, stream_id, )); - if !range.is_full() { - // alluxio doesn't support read with suffix range. - if range.offset().is_none() && range.size().is_some() { - return Err(Error::new( - ErrorKind::Unsupported, - "azblob doesn't support read with suffix range", - )); - } - - req = req.header(RANGE, range.to_header()); - } - let req = req .body(AsyncBody::Empty) .map_err(new_request_build_error)?; @@ -340,9 +327,9 @@ impl AlluxioCore { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let size: usize = - serde_json::from_slice(&body).map_err(new_json_serialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_serialize_error)?; Ok(size) } _ => Err(parse_error(resp).await?), diff --git a/core/src/services/alluxio/error.rs b/core/src/services/alluxio/error.rs index d07fece3103c..2b5c6b260fac 100644 --- a/core/src/services/alluxio/error.rs +++ b/core/src/services/alluxio/error.rs @@ -31,9 +31,9 @@ struct AlluxioError { message: String, } -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let mut kind = match parts.status.as_u16() { 500 => ErrorKind::Unexpected, @@ -62,7 +62,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod tests { - use futures::stream; use http::StatusCode; use super::*; @@ -91,10 +90,7 @@ mod tests { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder() .status(StatusCode::INTERNAL_SERVER_ERROR) .body(body) diff --git a/core/src/services/alluxio/mod.rs b/core/src/services/alluxio/mod.rs index 95e0216d34b0..226d0130353b 100644 --- a/core/src/services/alluxio/mod.rs +++ b/core/src/services/alluxio/mod.rs @@ -22,4 +22,5 @@ pub use backend::AlluxioConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/alluxio/reader.rs b/core/src/services/alluxio/reader.rs new file mode 100644 index 000000000000..7462cd234672 --- /dev/null +++ b/core/src/services/alluxio/reader.rs @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use super::core::*; +use crate::raw::*; +use crate::services::alluxio::error::parse_error; +use crate::*; + +pub struct AlluxioReader { + core: Arc, + + stream_id: u64, + _op: OpRead, +} + +impl AlluxioReader { + pub fn new(core: Arc, stream_id: u64, op: OpRead) -> Self { + AlluxioReader { + core, + stream_id, + _op: op, + } + } +} + +impl oio::Read for AlluxioReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.read(self.stream_id, range).await?; + + if !resp.status().is_success() { + return Err(parse_error(resp).await?); + } + Ok(resp.into_body()) + } +} diff --git a/core/src/services/alluxio/writer.rs b/core/src/services/alluxio/writer.rs index eed45f2cab08..6232799eea74 100644 --- a/core/src/services/alluxio/writer.rs +++ b/core/src/services/alluxio/writer.rs @@ -20,7 +20,6 @@ use std::sync::Arc; use bytes::Bytes; use super::core::AlluxioCore; - use crate::raw::*; use crate::*; diff --git a/core/src/services/atomicserver/backend.rs b/core/src/services/atomicserver/backend.rs index 0742cd9301fd..bc8742f58719 100644 --- a/core/src/services/atomicserver/backend.rs +++ b/core/src/services/atomicserver/backend.rs @@ -23,6 +23,7 @@ use async_trait::async_trait; use atomic_lib::agents::Agent; use atomic_lib::client::get_authentication_headers; use atomic_lib::commit::sign_message; +use bytes::Buf; use bytes::Bytes; use http::header::CONTENT_DISPOSITION; use http::header::CONTENT_TYPE; @@ -375,9 +376,9 @@ impl Adapter { .body(AsyncBody::Empty) .map_err(new_request_build_error)?; let resp = self.client.send(req).await?; - let bytes_file = resp.into_body().bytes().await?; + let mut bytes_file = resp.into_body(); - Ok(bytes_file) + Ok(bytes_file.copy_to_bytes(bytes_file.remaining())) } } @@ -389,10 +390,9 @@ impl Adapter { for _i in 0..1000 { let req = self.atomic_get_object_request(path)?; let resp = self.client.send(req).await?; - let bytes = resp.into_body().bytes().await?; + let bytes = resp.into_body(); let query_result: QueryResultStruct = - serde_json::from_str(std::str::from_utf8(&bytes).unwrap()) - .map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; if !expect_exist && query_result.results.is_empty() { break; } @@ -424,11 +424,10 @@ impl kv::Adapter for Adapter { async fn get(&self, path: &str) -> Result>> { let req = self.atomic_get_object_request(path)?; let resp = self.client.send(req).await?; - let bytes = resp.into_body().bytes().await?; + let bytes = resp.into_body(); let query_result: QueryResultStruct = - serde_json::from_str(std::str::from_utf8(&bytes).unwrap()) - .map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; if query_result.results.is_empty() { return Err(Error::new( @@ -447,11 +446,10 @@ impl kv::Adapter for Adapter { async fn set(&self, path: &str, value: &[u8]) -> Result<()> { let req = self.atomic_get_object_request(path)?; let res = self.client.send(req).await?; - let bytes = res.into_body().bytes().await?; + let bytes = res.into_body(); let query_result: QueryResultStruct = - serde_json::from_str(std::str::from_utf8(&bytes).unwrap()) - .map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; for result in query_result.results { let req = self.atomic_delete_object_request(&result.id)?; @@ -470,11 +468,10 @@ impl kv::Adapter for Adapter { async fn delete(&self, path: &str) -> Result<()> { let req = self.atomic_get_object_request(path)?; let res = self.client.send(req).await?; - let bytes = res.into_body().bytes().await?; + let bytes = res.into_body(); let query_result: QueryResultStruct = - serde_json::from_str(std::str::from_utf8(&bytes).unwrap()) - .map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; for result in query_result.results { let req = self.atomic_delete_object_request(&result.id)?; diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index 7d6e71e98a74..6367ba5135c2 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use async_trait::async_trait; use base64::prelude::BASE64_STANDARD; use base64::Engine; +use bytes::Buf; use http::header::CONTENT_TYPE; use http::StatusCode; use log::debug; @@ -38,6 +39,7 @@ use super::lister::AzblobLister; use super::writer::AzblobWriter; use crate::raw::*; use crate::services::azblob::core::AzblobCore; +use crate::services::azblob::reader::AzblobReader; use crate::services::azblob::writer::AzblobWriters; use crate::*; @@ -543,7 +545,7 @@ pub struct AzblobBackend { #[cfg_attr(not(target_arch = "wasm32"), async_trait)] #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl Accessor for AzblobBackend { - type Reader = IncomingAsyncBody; + type Reader = AzblobReader; type Writer = AzblobWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -561,8 +563,7 @@ impl Accessor for AzblobBackend { stat_with_if_none_match: true, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, read_with_override_content_disposition: true, @@ -607,25 +608,10 @@ impl Accessor for AzblobBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.azblob_get_blob(path, &args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + AzblobReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -667,10 +653,7 @@ impl Accessor for AzblobBackend { let status = resp.status(); match status { - StatusCode::ACCEPTED => { - resp.into_body().consume().await?; - Ok(RpCopy::default()) - } + StatusCode::ACCEPTED => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -678,7 +661,10 @@ impl Accessor for AzblobBackend { async fn presign(&self, path: &str, args: OpPresign) -> Result { let mut req = match args.operation() { PresignOperation::Stat(v) => self.core.azblob_head_blob_request(path, v)?, - PresignOperation::Read(v) => self.core.azblob_get_blob_request(path, v)?, + PresignOperation::Read(v) => { + self.core + .azblob_get_blob_request(path, BytesRange::default(), v)? + } PresignOperation::Write(_) => self.core.azblob_put_blob_request( path, None, @@ -740,9 +726,10 @@ impl Accessor for AzblobBackend { ) })?; - let multipart: Multipart = Multipart::new() - .with_boundary(boundary) - .parse(resp.into_body().bytes().await?)?; + let mut bs = resp.into_body(); + let bs = bs.copy_to_bytes(bs.remaining()); + + let multipart: Multipart = Multipart::new().with_boundary(boundary).parse(bs)?; let parts = multipart.into_parts(); if paths.len() != parts.len() { diff --git a/core/src/services/azblob/core.rs b/core/src/services/azblob/core.rs index dcf474b18435..da23012f5d69 100644 --- a/core/src/services/azblob/core.rs +++ b/core/src/services/azblob/core.rs @@ -126,7 +126,7 @@ impl AzblobCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -163,7 +163,12 @@ impl AzblobCore { } impl AzblobCore { - pub fn azblob_get_blob_request(&self, path: &str, args: &OpRead) -> Result> { + pub fn azblob_get_blob_request( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( @@ -190,18 +195,7 @@ impl AzblobCore { // Set SSE headers. req = self.insert_sse_headers(req); - let range = args.range(); if !range.is_full() { - // azblob doesn't support read with suffix range. - // - // ref: https://learn.microsoft.com/en-us/rest/api/storageservices/specifying-the-range-header-for-blob-service-operations - if range.offset().is_none() && range.size().is_some() { - return Err(Error::new( - ErrorKind::Unsupported, - "azblob doesn't support read with suffix range", - )); - } - req = req.header(http::header::RANGE, range.to_header()); } @@ -223,9 +217,10 @@ impl AzblobCore { pub async fn azblob_get_blob( &self, path: &str, + range: BytesRange, args: &OpRead, - ) -> Result> { - let mut req = self.azblob_get_blob_request(path, args)?; + ) -> Result> { + let mut req = self.azblob_get_blob_request(path, range, args)?; self.sign(&mut req).await?; @@ -423,7 +418,7 @@ impl AzblobCore { size: Option, args: &OpWrite, body: AsyncBody, - ) -> Result> { + ) -> Result> { let mut req = self.azblob_put_block_request(path, block_id, size, args, body)?; self.sign(&mut req).await?; @@ -477,7 +472,7 @@ impl AzblobCore { path: &str, block_ids: Vec, args: &OpWrite, - ) -> Result> { + ) -> Result> { let mut req = self .azblob_complete_put_block_list_request(path, block_ids, args) .await?; @@ -525,7 +520,7 @@ impl AzblobCore { &self, path: &str, args: &OpStat, - ) -> Result> { + ) -> Result> { let mut req = self.azblob_head_blob_request(path, args)?; self.sign(&mut req).await?; @@ -549,18 +544,14 @@ impl AzblobCore { .map_err(new_request_build_error) } - pub async fn azblob_delete_blob(&self, path: &str) -> Result> { + pub async fn azblob_delete_blob(&self, path: &str) -> Result> { let mut req = self.azblob_delete_blob_request(path)?; self.sign(&mut req).await?; self.send(req).await } - pub async fn azblob_copy_blob( - &self, - from: &str, - to: &str, - ) -> Result> { + pub async fn azblob_copy_blob(&self, from: &str, to: &str) -> Result> { let source = build_abs_path(&self.root, from); let target = build_abs_path(&self.root, to); @@ -593,7 +584,7 @@ impl AzblobCore { next_marker: &str, delimiter: &str, limit: Option, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( @@ -622,10 +613,7 @@ impl AzblobCore { self.send(req).await } - pub async fn azblob_batch_delete( - &self, - paths: &[String], - ) -> Result> { + pub async fn azblob_batch_delete(&self, paths: &[String]) -> Result> { let url = format!( "{}/{}?restype=container&comp=batch", self.endpoint, self.container diff --git a/core/src/services/azblob/error.rs b/core/src/services/azblob/error.rs index 58b744134b3d..22cbeb93be6c 100644 --- a/core/src/services/azblob/error.rs +++ b/core/src/services/azblob/error.rs @@ -61,9 +61,9 @@ impl Debug for AzblobError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/azblob/lister.rs b/core/src/services/azblob/lister.rs index 6bd1a61243ba..17b800be9cbb 100644 --- a/core/src/services/azblob/lister.rs +++ b/core/src/services/azblob/lister.rs @@ -58,7 +58,7 @@ impl oio::PageList for AzblobLister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ListBlobsOutput = de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; diff --git a/core/src/services/azblob/mod.rs b/core/src/services/azblob/mod.rs index da314f5b3ea8..bad56f999f97 100644 --- a/core/src/services/azblob/mod.rs +++ b/core/src/services/azblob/mod.rs @@ -22,4 +22,5 @@ pub use backend::AzblobConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/azblob/reader.rs b/core/src/services/azblob/reader.rs new file mode 100644 index 000000000000..077a68f22a0d --- /dev/null +++ b/core/src/services/azblob/reader.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::AzblobCore; +use super::error::parse_error; +use crate::raw::*; +use crate::*; + +pub struct AzblobReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl AzblobReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + AzblobReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for AzblobReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .azblob_get_blob(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/azblob/writer.rs b/core/src/services/azblob/writer.rs index 84025362d4a4..224dc078ae2e 100644 --- a/core/src/services/azblob/writer.rs +++ b/core/src/services/azblob/writer.rs @@ -99,10 +99,7 @@ impl oio::AppendWrite for AzblobWriter { let status = resp.status(); match status { - StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -120,10 +117,7 @@ impl oio::BlockWrite for AzblobWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -136,10 +130,7 @@ impl oio::BlockWrite for AzblobWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -152,10 +143,7 @@ impl oio::BlockWrite for AzblobWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azdls/backend.rs b/core/src/services/azdls/backend.rs index 4d6a36ed3b8c..e88a64dbcb11 100644 --- a/core/src/services/azdls/backend.rs +++ b/core/src/services/azdls/backend.rs @@ -34,6 +34,7 @@ use super::lister::AzdlsLister; use super::writer::AzdlsWriter; use super::writer::AzdlsWriters; use crate::raw::*; +use crate::services::azdls::reader::AzdlsReader; use crate::*; /// Known endpoint suffix Azure Data Lake Storage Gen2 URI syntax. @@ -245,7 +246,7 @@ pub struct AzdlsBackend { #[async_trait] impl Accessor for AzdlsBackend { - type Reader = IncomingAsyncBody; + type Reader = AzdlsReader; type Writer = AzdlsWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -261,8 +262,6 @@ impl Accessor for AzdlsBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_append: true, @@ -293,10 +292,7 @@ impl Accessor for AzdlsBackend { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCreateDir::default()) - } + StatusCode::CREATED | StatusCode::OK => Ok(RpCreateDir::default()), _ => Err(parse_error(resp).await?), } } @@ -348,25 +344,10 @@ impl Accessor for AzdlsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.azdls_read(path, args.range()).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + AzdlsReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -400,9 +381,7 @@ impl Accessor for AzdlsBackend { if let Some(resp) = self.core.azdls_ensure_parent_path(to).await? { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::CONFLICT => { - resp.into_body().consume().await?; - } + StatusCode::CREATED | StatusCode::CONFLICT => {} _ => return Err(parse_error(resp).await?), } } @@ -412,10 +391,7 @@ impl Accessor for AzdlsBackend { let status = resp.status(); match status { - StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(RpRename::default()) - } + StatusCode::CREATED => Ok(RpRename::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azdls/core.rs b/core/src/services/azdls/core.rs index dce6e5bb80ae..5678361903e9 100644 --- a/core/src/services/azdls/core.rs +++ b/core/src/services/azdls/core.rs @@ -91,17 +91,13 @@ impl AzdlsCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } } impl AzdlsCore { - pub async fn azdls_read( - &self, - path: &str, - range: BytesRange, - ) -> Result> { + pub async fn azdls_read(&self, path: &str, range: BytesRange) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -114,16 +110,6 @@ impl AzdlsCore { let mut req = Request::get(&url); if !range.is_full() { - // azblob doesn't support read with suffix range. - // - // ref: https://learn.microsoft.com/en-us/rest/api/storageservices/specifying-the-range-header-for-blob-service-operations - if range.offset().is_none() && range.size().is_some() { - return Err(Error::new( - ErrorKind::Unsupported, - "azblob doesn't support read with suffix range", - )); - } - req = req.header(http::header::RANGE, range.to_header()); } @@ -175,7 +161,7 @@ impl AzdlsCore { Ok(req) } - pub async fn azdls_rename(&self, from: &str, to: &str) -> Result> { + pub async fn azdls_rename(&self, from: &str, to: &str) -> Result> { let source = build_abs_path(&self.root, from); let target = build_abs_path(&self.root, to); @@ -231,7 +217,7 @@ impl AzdlsCore { Ok(req) } - pub async fn azdls_get_properties(&self, path: &str) -> Result> { + pub async fn azdls_get_properties(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -253,7 +239,7 @@ impl AzdlsCore { self.client.send(req).await } - pub async fn azdls_delete(&self, path: &str) -> Result> { + pub async fn azdls_delete(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -280,7 +266,7 @@ impl AzdlsCore { path: &str, continuation: &str, limit: Option, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -312,7 +298,7 @@ impl AzdlsCore { pub async fn azdls_ensure_parent_path( &self, path: &str, - ) -> Result>> { + ) -> Result>> { let abs_target_path = path.trim_end_matches('/').to_string(); let abs_target_path = abs_target_path.as_str(); let mut parts: Vec<&str> = abs_target_path diff --git a/core/src/services/azdls/error.rs b/core/src/services/azdls/error.rs index 04b839ce9f72..77b475c2954d 100644 --- a/core/src/services/azdls/error.rs +++ b/core/src/services/azdls/error.rs @@ -61,9 +61,9 @@ impl Debug for AzdlsError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/azdls/lister.rs b/core/src/services/azdls/lister.rs index 3a74b27a5835..27d51e266765 100644 --- a/core/src/services/azdls/lister.rs +++ b/core/src/services/azdls/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use serde::Deserialize; use serde_json::de; @@ -47,7 +48,6 @@ impl oio::PageList for AzdlsLister { // azdls will return not found for not-exist path. if resp.status() == http::StatusCode::NOT_FOUND { - resp.into_body().consume().await?; ctx.done = true; return Ok(()); } @@ -68,9 +68,9 @@ impl oio::PageList for AzdlsLister { ctx.done = true; } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let output: Output = de::from_slice(&bs).map_err(new_json_deserialize_error)?; + let output: Output = de::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; for object in output.paths { // Azdls will return `"true"` and `"false"` for is_directory. diff --git a/core/src/services/azdls/mod.rs b/core/src/services/azdls/mod.rs index 4284e4595ed0..e407c395eea8 100644 --- a/core/src/services/azdls/mod.rs +++ b/core/src/services/azdls/mod.rs @@ -21,4 +21,5 @@ pub use backend::AzdlsBuilder as Azdls; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/azdls/reader.rs b/core/src/services/azdls/reader.rs new file mode 100644 index 000000000000..914c8a62fec8 --- /dev/null +++ b/core/src/services/azdls/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::AzdlsCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct AzdlsReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl AzdlsReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + AzdlsReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for AzdlsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.azdls_read(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/azdls/writer.rs b/core/src/services/azdls/writer.rs index 6fc3709d04a1..aabd8133f1b5 100644 --- a/core/src/services/azdls/writer.rs +++ b/core/src/services/azdls/writer.rs @@ -22,7 +22,6 @@ use http::StatusCode; use super::core::AzdlsCore; use super::error::parse_error; - use crate::raw::*; use crate::*; @@ -53,9 +52,7 @@ impl oio::OneShotWrite for AzdlsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - } + StatusCode::CREATED | StatusCode::OK => {} _ => { return Err(parse_error(resp) .await? @@ -76,10 +73,7 @@ impl oio::OneShotWrite for AzdlsWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::ACCEPTED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::ACCEPTED => Ok(()), _ => Err(parse_error(resp) .await? .with_operation("Backend::azdls_update_request")), @@ -113,9 +107,7 @@ impl oio::AppendWrite for AzdlsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - } + StatusCode::CREATED | StatusCode::OK => {} _ => { return Err(parse_error(resp) .await? @@ -134,10 +126,7 @@ impl oio::AppendWrite for AzdlsWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::ACCEPTED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::ACCEPTED => Ok(()), _ => Err(parse_error(resp) .await? .with_operation("Backend::azdls_update_request")), diff --git a/core/src/services/azfile/backend.rs b/core/src/services/azfile/backend.rs index d11d7a700b8d..128d22e40d6f 100644 --- a/core/src/services/azfile/backend.rs +++ b/core/src/services/azfile/backend.rs @@ -34,6 +34,7 @@ use super::writer::AzfileWriter; use super::writer::AzfileWriters; use crate::raw::*; use crate::services::azfile::lister::AzfileLister; +use crate::services::azfile::reader::AzfileReader; use crate::*; /// Default endpoint of Azure File services. @@ -266,7 +267,7 @@ pub struct AzfileBackend { #[async_trait] impl Accessor for AzfileBackend { - type Reader = IncomingAsyncBody; + type Reader = AzfileReader; type Writer = AzfileWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -281,8 +282,6 @@ impl Accessor for AzfileBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, create_dir: true, @@ -303,10 +302,7 @@ impl Accessor for AzfileBackend { let status = resp.status(); match status { - StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(RpCreateDir::default()) - } + StatusCode::CREATED => Ok(RpCreateDir::default()), _ => { // we cannot just check status code because 409 Conflict has two meaning: // 1. If a directory by the same name is being deleted when Create Directory is called, the server returns status code 409 (Conflict) @@ -346,25 +342,10 @@ impl Accessor for AzfileBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.azfile_read(path, args.range()).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + AzfileReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -387,10 +368,7 @@ impl Accessor for AzfileBackend { let status = resp.status(); match status { - StatusCode::ACCEPTED | StatusCode::NOT_FOUND => { - resp.into_body().consume().await?; - Ok(RpDelete::default()) - } + StatusCode::ACCEPTED | StatusCode::NOT_FOUND => Ok(RpDelete::default()), _ => Err(parse_error(resp).await?), } } @@ -406,10 +384,7 @@ impl Accessor for AzfileBackend { let resp = self.core.azfile_rename(from, to).await?; let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpRename::default()) - } + StatusCode::OK => Ok(RpRename::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azfile/core.rs b/core/src/services/azfile/core.rs index ced2315ec5f7..a332ca081edd 100644 --- a/core/src/services/azfile/core.rs +++ b/core/src/services/azfile/core.rs @@ -93,7 +93,7 @@ impl AzfileCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -101,7 +101,7 @@ impl AzfileCore { &self, path: &str, range: BytesRange, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -114,16 +114,6 @@ impl AzfileCore { let mut req = Request::get(&url); if !range.is_full() { - // azfile doesn't support read with suffix range. - // - // ref: https://learn.microsoft.com/en-us/rest/api/storageservices/specifying-the-range-header-for-file-service-operations - if range.offset().is_none() && range.size().is_some() { - return Err(Error::new( - ErrorKind::Unsupported, - "azblob doesn't support read with suffix range", - )); - } - req = req.header(RANGE, range.to_header()); } @@ -139,7 +129,7 @@ impl AzfileCore { path: &str, size: usize, args: &OpWrite, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); @@ -182,7 +172,7 @@ impl AzfileCore { size: u64, position: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); @@ -210,10 +200,7 @@ impl AzfileCore { self.send(req).await } - pub async fn azfile_get_file_properties( - &self, - path: &str, - ) -> Result> { + pub async fn azfile_get_file_properties(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( "{}/{}/{}", @@ -234,7 +221,7 @@ impl AzfileCore { pub async fn azfile_get_directory_properties( &self, path: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -253,11 +240,7 @@ impl AzfileCore { self.send(req).await } - pub async fn azfile_rename( - &self, - path: &str, - new_path: &str, - ) -> Result> { + pub async fn azfile_rename(&self, path: &str, new_path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); @@ -309,7 +292,7 @@ impl AzfileCore { self.send(req).await } - pub async fn azfile_create_dir(&self, path: &str) -> Result> { + pub async fn azfile_create_dir(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); @@ -332,7 +315,7 @@ impl AzfileCore { self.send(req).await } - pub async fn azfile_delete_file(&self, path: &str) -> Result> { + pub async fn azfile_delete_file(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); @@ -353,7 +336,7 @@ impl AzfileCore { self.send(req).await } - pub async fn azfile_delete_dir(&self, path: &str) -> Result> { + pub async fn azfile_delete_dir(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); @@ -379,7 +362,7 @@ impl AzfileCore { path: &str, limit: &Option, continuation: &String, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path) .trim_start_matches('/') .to_string(); diff --git a/core/src/services/azfile/error.rs b/core/src/services/azfile/error.rs index 026f5b3980f7..0e8992b7abe2 100644 --- a/core/src/services/azfile/error.rs +++ b/core/src/services/azfile/error.rs @@ -61,9 +61,9 @@ impl Debug for AzfileError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/azfile/lister.rs b/core/src/services/azfile/lister.rs index 9c3e6819c5da..913af07d838f 100644 --- a/core/src/services/azfile/lister.rs +++ b/core/src/services/azfile/lister.rs @@ -17,8 +17,9 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; -use quick_xml::de::from_str; +use quick_xml::de; use serde::Deserialize; use super::core::AzfileCore; @@ -55,11 +56,10 @@ impl oio::PageList for AzfileLister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let text = String::from_utf8(bs.to_vec()).expect("response convert to string must success"); - - let results: EnumerationResults = from_str(&text).map_err(new_xml_deserialize_error)?; + let results: EnumerationResults = + de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; if results.next_marker.is_empty() { ctx.done = true; @@ -148,6 +148,8 @@ struct Properties { #[cfg(test)] mod tests { + use quick_xml::de::from_str; + use super::*; #[test] diff --git a/core/src/services/azfile/mod.rs b/core/src/services/azfile/mod.rs index 0ae9da72918f..a94ed0be8452 100644 --- a/core/src/services/azfile/mod.rs +++ b/core/src/services/azfile/mod.rs @@ -21,4 +21,5 @@ mod backend; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/azfile/reader.rs b/core/src/services/azfile/reader.rs new file mode 100644 index 000000000000..651188b8ef8f --- /dev/null +++ b/core/src/services/azfile/reader.rs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::AzfileCore; +use super::error::parse_error; +use crate::raw::*; +use crate::*; + +pub struct AzfileReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl AzfileReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + Self { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for AzfileReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.azfile_read(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/azfile/writer.rs b/core/src/services/azfile/writer.rs index e2c3dde86eb6..0fe1ea28a8df 100644 --- a/core/src/services/azfile/writer.rs +++ b/core/src/services/azfile/writer.rs @@ -48,9 +48,7 @@ impl oio::OneShotWrite for AzfileWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - } + StatusCode::OK | StatusCode::CREATED => {} _ => { return Err(parse_error(resp) .await? @@ -64,10 +62,7 @@ impl oio::OneShotWrite for AzfileWriter { .await?; let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::CREATED => Ok(()), _ => Err(parse_error(resp) .await? .with_operation("Backend::azfile_update")), @@ -95,10 +90,7 @@ impl oio::AppendWrite for AzfileWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::CREATED => Ok(()), _ => Err(parse_error(resp) .await? .with_operation("Backend::azfile_update")), diff --git a/core/src/services/b2/backend.rs b/core/src/services/b2/backend.rs index 5d1ec9b5313a..4eecace5ac43 100644 --- a/core/src/services/b2/backend.rs +++ b/core/src/services/b2/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::Request; use http::StatusCode; use log::debug; @@ -37,6 +38,7 @@ use super::writer::B2Writers; use crate::raw::*; use crate::services::b2::core::B2Signer; use crate::services::b2::core::ListFileNamesResponse; +use crate::services::b2::reader::B2Reader; use crate::*; /// Config for backblaze b2 services support. @@ -267,7 +269,7 @@ pub struct B2Backend { #[async_trait] impl Accessor for B2Backend { - type Reader = IncomingAsyncBody; + type Reader = B2Reader; type Writer = B2Writers; type Lister = oio::PageLister; type BlockingReader = (); @@ -282,8 +284,6 @@ impl Accessor for B2Backend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_empty: true, @@ -339,10 +339,10 @@ impl Accessor for B2Backend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: ListFileNamesResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; if resp.files.is_empty() { return Err(Error::new(ErrorKind::NotFound, "no such file or directory")); } @@ -354,25 +354,10 @@ impl Accessor for B2Backend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.download_file_by_name(path, &args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + B2Reader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -426,10 +411,10 @@ impl Accessor for B2Backend { let source_file_id = match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: ListFileNamesResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; if resp.files.is_empty() { return Err(Error::new(ErrorKind::NotFound, "no such file or directory")); } @@ -459,7 +444,7 @@ impl Accessor for B2Backend { PresignOperation::Stat(_) => { let resp = self .core - .get_download_authorization(path, &OpRead::default(), args.expire()) + .get_download_authorization(path, args.expire()) .await?; let path = build_abs_path(&self.core.root, path); @@ -485,10 +470,10 @@ impl Accessor for B2Backend { parts.headers, ))) } - PresignOperation::Read(op) => { + PresignOperation::Read(_) => { let resp = self .core - .get_download_authorization(path, op, args.expire()) + .get_download_authorization(path, args.expire()) .await?; let path = build_abs_path(&self.core.root, path); diff --git a/core/src/services/b2/core.rs b/core/src/services/b2/core.rs index 20e24f11ac60..8d01338f3151 100644 --- a/core/src/services/b2/core.rs +++ b/core/src/services/b2/core.rs @@ -20,6 +20,7 @@ use std::fmt::Formatter; use std::sync::Arc; use std::time::Duration; +use bytes::Buf; use chrono::DateTime; use chrono::Utc; use http::header; @@ -70,7 +71,7 @@ impl Debug for B2Core { impl B2Core { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -105,9 +106,10 @@ impl B2Core { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let token = serde_json::from_slice::(resp_body) - .map_err(new_json_deserialize_error)?; + let resp_body = resp.into_body(); + let token: AuthorizeAccountResponse = + serde_json::from_reader(resp_body.reader()) + .map_err(new_json_deserialize_error)?; signer.auth_info = AuthInfo { authorization_token: token.authorization_token.clone(), api_url: token.api_url.clone(), @@ -129,8 +131,9 @@ impl B2Core { pub async fn download_file_by_name( &self, path: &str, - args: &OpRead, - ) -> Result> { + range: BytesRange, + _args: &OpRead, + ) -> Result> { let path = build_abs_path(&self.root, path); let auth_info = self.get_auth_info().await?; @@ -147,7 +150,6 @@ impl B2Core { req = req.header(header::AUTHORIZATION, auth_info.authorization_token); - let range = args.range(); if !range.is_full() { req = req.header(header::RANGE, range.to_header()); } @@ -180,8 +182,8 @@ impl B2Core { let status = resp.status(); match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let resp = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let resp = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(resp) } @@ -192,7 +194,6 @@ impl B2Core { pub async fn get_download_authorization( &self, path: &str, - args: &OpRead, expire: Duration, ) -> Result { let path = build_abs_path(&self.root, path); @@ -208,10 +209,6 @@ impl B2Core { req = req.header(header::AUTHORIZATION, auth_info.authorization_token); - let range = args.range(); - if !range.is_full() { - req = req.header(header::RANGE, range.to_header()); - } let body = GetDownloadAuthorizationRequest { bucket_id: self.bucket_id.clone(), file_name_prefix: path, @@ -229,8 +226,8 @@ impl B2Core { let status = resp.status(); match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let resp = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let resp = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(resp) } @@ -244,7 +241,7 @@ impl B2Core { size: Option, args: &OpWrite, body: AsyncBody, - ) -> Result> { + ) -> Result> { let resp = self.get_upload_url().await?; let p = build_abs_path(&self.root, path); @@ -281,7 +278,7 @@ impl B2Core { &self, path: &str, args: &OpWrite, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let auth_info = self.get_auth_info().await?; @@ -335,8 +332,8 @@ impl B2Core { let status = resp.status(); match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let resp = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let resp = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(resp) } @@ -350,7 +347,7 @@ impl B2Core { part_number: usize, size: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let resp = self.get_upload_part_url(file_id).await?; let mut req = Request::post(resp.upload_url); @@ -373,7 +370,7 @@ impl B2Core { &self, file_id: &str, part_sha1_array: Vec, - ) -> Result> { + ) -> Result> { let auth_info = self.get_auth_info().await?; let url = format!("{}/b2api/v2/b2_finish_large_file", auth_info.api_url); @@ -397,7 +394,7 @@ impl B2Core { self.send(req).await } - pub async fn cancel_large_file(&self, file_id: &str) -> Result> { + pub async fn cancel_large_file(&self, file_id: &str) -> Result> { let auth_info = self.get_auth_info().await?; let url = format!("{}/b2api/v2/b2_cancel_large_file", auth_info.api_url); @@ -426,7 +423,7 @@ impl B2Core { delimiter: Option<&str>, limit: Option, start_after: Option, - ) -> Result> { + ) -> Result> { let auth_info = self.get_auth_info().await?; let mut url = format!( @@ -470,7 +467,7 @@ impl B2Core { &self, source_file_id: String, to: &str, - ) -> Result> { + ) -> Result> { let to = build_abs_path(&self.root, to); let auth_info = self.get_auth_info().await?; @@ -497,7 +494,7 @@ impl B2Core { self.send(req).await } - pub async fn hide_file(&self, path: &str) -> Result> { + pub async fn hide_file(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let auth_info = self.get_auth_info().await?; diff --git a/core/src/services/b2/error.rs b/core/src/services/b2/error.rs index 24b8bb1839f7..9b4705891967 100644 --- a/core/src/services/b2/error.rs +++ b/core/src/services/b2/error.rs @@ -34,9 +34,9 @@ struct B2Error { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (mut kind, mut retryable) = match parts.status.as_u16() { 403 => (ErrorKind::PermissionDenied, false), @@ -79,7 +79,6 @@ pub fn parse_b2_error_code(code: &str) -> Option<(ErrorKind, bool)> { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -124,10 +123,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/b2/lister.rs b/core/src/services/b2/lister.rs index 3d12ec6cf3e4..9b27b121c18b 100644 --- a/core/src/services/b2/lister.rs +++ b/core/src/services/b2/lister.rs @@ -79,7 +79,7 @@ impl oio::PageList for B2Lister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ListFileNamesResponse = serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; diff --git a/core/src/services/b2/mod.rs b/core/src/services/b2/mod.rs index 80745f01e82a..025a80c3a831 100644 --- a/core/src/services/b2/mod.rs +++ b/core/src/services/b2/mod.rs @@ -22,4 +22,5 @@ pub use backend::B2Config; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/b2/reader.rs b/core/src/services/b2/reader.rs new file mode 100644 index 000000000000..63cb47b6b78c --- /dev/null +++ b/core/src/services/b2/reader.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::*; +use super::error::*; +use crate::raw::*; +use crate::*; + +pub struct B2Reader { + core: Arc, + + path: String, + op: OpRead, +} + +impl B2Reader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + B2Reader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for B2Reader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .download_file_by_name(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/b2/writer.rs b/core/src/services/b2/writer.rs index 78cb8a9fc433..51df2623fa84 100644 --- a/core/src/services/b2/writer.rs +++ b/core/src/services/b2/writer.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use super::core::B2Core; @@ -55,10 +56,7 @@ impl oio::MultipartWrite for B2Writer { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -70,10 +68,10 @@ impl oio::MultipartWrite for B2Writer { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: StartLargeFileResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(result.file_id) } @@ -100,10 +98,10 @@ impl oio::MultipartWrite for B2Writer { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: UploadPartResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(oio::MultipartPart { etag: result.content_sha1, @@ -135,11 +133,7 @@ impl oio::MultipartWrite for B2Writer { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -148,10 +142,7 @@ impl oio::MultipartWrite for B2Writer { let resp = self.core.cancel_large_file(upload_id).await?; match resp.status() { // b2 returns code 200 if abort succeeds. - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/chainsafe/backend.rs b/core/src/services/chainsafe/backend.rs index 3957f4f900d6..fc300f62fb7f 100644 --- a/core/src/services/chainsafe/backend.rs +++ b/core/src/services/chainsafe/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -30,6 +31,7 @@ use super::core::ChainsafeCore; use super::core::ObjectInfoResponse; use super::error::parse_error; use super::lister::ChainsafeLister; +use super::reader::ChainsafeReader; use super::writer::ChainsafeWriter; use super::writer::ChainsafeWriters; use crate::raw::*; @@ -203,16 +205,11 @@ pub struct ChainsafeBackend { #[async_trait] impl Accessor for ChainsafeBackend { - type Reader = IncomingAsyncBody; - + type Reader = ChainsafeReader; type Writer = ChainsafeWriters; - type Lister = oio::PageLister; - type BlockingReader = (); - type BlockingWriter = (); - type BlockingLister = (); fn info(&self) -> AccessorInfo { @@ -258,32 +255,21 @@ impl Accessor for ChainsafeBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ObjectInfoResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(RpStat::new(parse_info(output.content))) } _ => Err(parse_error(resp).await?), } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.download_object(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + ChainsafeReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/chainsafe/core.rs b/core/src/services/chainsafe/core.rs index 50e3e370698e..e38dcb4c792b 100644 --- a/core/src/services/chainsafe/core.rs +++ b/core/src/services/chainsafe/core.rs @@ -52,13 +52,17 @@ impl Debug for ChainsafeCore { impl ChainsafeCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } } impl ChainsafeCore { - pub async fn download_object(&self, path: &str) -> Result> { + pub async fn download_object( + &self, + path: &str, + range: BytesRange, + ) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -76,6 +80,7 @@ impl ChainsafeCore { header::AUTHORIZATION, format_authorization_by_bearer(&self.api_key)?, ) + .header(header::RANGE, range.to_header()) .header(header::CONTENT_TYPE, "application/json") .body(body) .map_err(new_request_build_error)?; @@ -83,7 +88,7 @@ impl ChainsafeCore { self.send(req).await } - pub async fn object_info(&self, path: &str) -> Result> { + pub async fn object_info(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -109,7 +114,7 @@ impl ChainsafeCore { self.send(req).await } - pub async fn move_object(&self, from: &str, to: &str) -> Result> { + pub async fn move_object(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let to = build_abs_path(&self.root, to); @@ -136,7 +141,7 @@ impl ChainsafeCore { self.send(req).await } - pub async fn delete_object(&self, path: &str) -> Result> { + pub async fn delete_object(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -162,11 +167,7 @@ impl ChainsafeCore { self.send(req).await } - pub async fn upload_object( - &self, - path: &str, - bs: Bytes, - ) -> Result> { + pub async fn upload_object(&self, path: &str, bs: Bytes) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -190,7 +191,7 @@ impl ChainsafeCore { self.send(req).await } - pub async fn list_objects(&self, path: &str) -> Result> { + pub async fn list_objects(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -216,7 +217,7 @@ impl ChainsafeCore { self.send(req).await } - pub async fn create_dir(&self, path: &str) -> Result> { + pub async fn create_dir(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/chainsafe/error.rs b/core/src/services/chainsafe/error.rs index f998b625a4d9..b7e3cf2d889f 100644 --- a/core/src/services/chainsafe/error.rs +++ b/core/src/services/chainsafe/error.rs @@ -38,9 +38,9 @@ struct ChainsafeSubError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status.as_u16() { 401 | 403 => (ErrorKind::PermissionDenied, false), @@ -75,7 +75,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -95,10 +94,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/chainsafe/lister.rs b/core/src/services/chainsafe/lister.rs index faf00b6df70b..4d419cd3d5dc 100644 --- a/core/src/services/chainsafe/lister.rs +++ b/core/src/services/chainsafe/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use super::core::parse_info; @@ -48,10 +49,10 @@ impl oio::PageList for ChainsafeLister { match resp.status() { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: Vec = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; for info in output { let mut path = build_abs_path(&normalize_root(&self.path), &info.name); diff --git a/core/src/services/chainsafe/mod.rs b/core/src/services/chainsafe/mod.rs index ed5335a738bf..cecf6a300201 100644 --- a/core/src/services/chainsafe/mod.rs +++ b/core/src/services/chainsafe/mod.rs @@ -22,4 +22,5 @@ pub use backend::ChainsafeConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/chainsafe/reader.rs b/core/src/services/chainsafe/reader.rs new file mode 100644 index 000000000000..b6813660dd67 --- /dev/null +++ b/core/src/services/chainsafe/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::ChainsafeCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct ChainsafeReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl ChainsafeReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + ChainsafeReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for ChainsafeReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.download_object(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/chainsafe/writer.rs b/core/src/services/chainsafe/writer.rs index 79b58e324f0a..bf9547d5b147 100644 --- a/core/src/services/chainsafe/writer.rs +++ b/core/src/services/chainsafe/writer.rs @@ -50,10 +50,7 @@ impl oio::OneShotWrite for ChainsafeWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/cloudflare_kv/backend.rs b/core/src/services/cloudflare_kv/backend.rs index 3050582c842d..f55c34e79c11 100644 --- a/core/src/services/cloudflare_kv/backend.rs +++ b/core/src/services/cloudflare_kv/backend.rs @@ -20,6 +20,7 @@ use std::fmt::Debug; use std::fmt::Formatter; use async_trait::async_trait; +use bytes::Buf; use http::header; use http::Request; use http::StatusCode; @@ -236,8 +237,8 @@ impl kv::Adapter for Adapter { let status = resp.status(); match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; - Ok(Some(body.into())) + let mut body = resp.into_body(); + Ok(Some(body.copy_to_bytes(body.remaining()).to_vec())) } _ => Err(parse_error(resp).await?), } @@ -291,13 +292,14 @@ impl kv::Adapter for Adapter { let status = resp.status(); match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; - let response: CfKvScanResponse = serde_json::from_slice(&body).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - &format!("failed to parse error response: {}", e), - ) - })?; + let body = resp.into_body(); + let response: CfKvScanResponse = + serde_json::from_reader(body.reader()).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + &format!("failed to parse error response: {}", e), + ) + })?; Ok(response.result.into_iter().map(|r| r.name).collect()) } _ => Err(parse_error(resp).await?), diff --git a/core/src/services/cloudflare_kv/error.rs b/core/src/services/cloudflare_kv/error.rs index eb323f9ed2ce..34c81953516e 100644 --- a/core/src/services/cloudflare_kv/error.rs +++ b/core/src/services/cloudflare_kv/error.rs @@ -28,9 +28,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub(crate) async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub(crate) async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (mut kind, mut retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/cos/backend.rs b/core/src/services/cos/backend.rs index 48f0ca444d9d..59ff0771c7d9 100644 --- a/core/src/services/cos/backend.rs +++ b/core/src/services/cos/backend.rs @@ -33,6 +33,7 @@ use super::error::parse_error; use super::lister::CosLister; use super::writer::CosWriter; use crate::raw::*; +use crate::services::cos::reader::CosReader; use crate::services::cos::writer::CosWriters; use crate::*; @@ -254,7 +255,7 @@ pub struct CosBackend { #[async_trait] impl Accessor for CosBackend { - type Reader = IncomingAsyncBody; + type Reader = CosReader; type Writer = CosWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -272,8 +273,7 @@ impl Accessor for CosBackend { stat_with_if_none_match: true, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, @@ -326,25 +326,10 @@ impl Accessor for CosBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.cos_get_object(path, &args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + CosReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -383,10 +368,7 @@ impl Accessor for CosBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -394,7 +376,10 @@ impl Accessor for CosBackend { async fn presign(&self, path: &str, args: OpPresign) -> Result { let mut req = match args.operation() { PresignOperation::Stat(v) => self.core.cos_head_object_request(path, v)?, - PresignOperation::Read(v) => self.core.cos_get_object_request(path, v)?, + PresignOperation::Read(v) => { + self.core + .cos_get_object_request(path, BytesRange::default(), v)? + } PresignOperation::Write(v) => { self.core .cos_put_object_request(path, None, v, AsyncBody::Empty)? diff --git a/core/src/services/cos/core.rs b/core/src/services/cos/core.rs index 29659cfc1c1c..003be69b319b 100644 --- a/core/src/services/cos/core.rs +++ b/core/src/services/cos/core.rs @@ -95,7 +95,7 @@ impl CosCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } } @@ -104,16 +104,22 @@ impl CosCore { pub async fn cos_get_object( &self, path: &str, + range: BytesRange, args: &OpRead, - ) -> Result> { - let mut req = self.cos_get_object_request(path, args)?; + ) -> Result> { + let mut req = self.cos_get_object_request(path, range, args)?; self.sign(&mut req).await?; self.send(req).await } - pub fn cos_get_object_request(&self, path: &str, args: &OpRead) -> Result> { + pub fn cos_get_object_request( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}", self.endpoint, percent_encode_path(&p)); @@ -124,7 +130,6 @@ impl CosCore { req = req.header(IF_MATCH, if_match); } - let range = args.range(); if !range.is_full() { req = req.header(http::header::RANGE, range.to_header()) } @@ -175,7 +180,7 @@ impl CosCore { &self, path: &str, args: &OpStat, - ) -> Result> { + ) -> Result> { let mut req = self.cos_head_object_request(path, args)?; self.sign(&mut req).await?; @@ -205,7 +210,7 @@ impl CosCore { Ok(req) } - pub async fn cos_delete_object(&self, path: &str) -> Result> { + pub async fn cos_delete_object(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}", self.endpoint, percent_encode_path(&p)); @@ -257,11 +262,7 @@ impl CosCore { Ok(req) } - pub async fn cos_copy_object( - &self, - from: &str, - to: &str, - ) -> Result> { + pub async fn cos_copy_object(&self, from: &str, to: &str) -> Result> { let source = build_abs_path(&self.root, from); let target = build_abs_path(&self.root, to); @@ -284,7 +285,7 @@ impl CosCore { next_marker: &str, delimiter: &str, limit: Option, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut queries = vec![]; @@ -320,7 +321,7 @@ impl CosCore { &self, path: &str, args: &OpWrite, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}?uploads", self.endpoint, percent_encode_path(&p)); @@ -355,7 +356,7 @@ impl CosCore { part_number: usize, size: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -381,7 +382,7 @@ impl CosCore { path: &str, upload_id: &str, parts: Vec, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -414,7 +415,7 @@ impl CosCore { &self, path: &str, upload_id: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/cos/error.rs b/core/src/services/cos/error.rs index e0d98920f66f..95d5e347fbaa 100644 --- a/core/src/services/cos/error.rs +++ b/core/src/services/cos/error.rs @@ -38,9 +38,9 @@ struct CosError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/cos/lister.rs b/core/src/services/cos/lister.rs index 098e243d8ba7..7022aa9def0b 100644 --- a/core/src/services/cos/lister.rs +++ b/core/src/services/cos/lister.rs @@ -57,7 +57,7 @@ impl oio::PageList for CosLister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ListObjectsOutput = de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; diff --git a/core/src/services/cos/mod.rs b/core/src/services/cos/mod.rs index 780c2c4796cf..07f1e54ce94b 100644 --- a/core/src/services/cos/mod.rs +++ b/core/src/services/cos/mod.rs @@ -21,4 +21,5 @@ pub use backend::CosBuilder as Cos; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/cos/reader.rs b/core/src/services/cos/reader.rs new file mode 100644 index 000000000000..57af3a1607e4 --- /dev/null +++ b/core/src/services/cos/reader.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::CosCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct CosReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl CosReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + CosReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for CosReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .cos_get_object(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/cos/writer.rs b/core/src/services/cos/writer.rs index 51f0ad15edf5..1b938bb4c51c 100644 --- a/core/src/services/cos/writer.rs +++ b/core/src/services/cos/writer.rs @@ -56,10 +56,7 @@ impl oio::MultipartWrite for CosWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -74,7 +71,7 @@ impl oio::MultipartWrite for CosWriter { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: InitiateMultipartUploadResult = quick_xml::de::from_reader(bytes::Buf::reader(bs)) @@ -114,8 +111,6 @@ impl oio::MultipartWrite for CosWriter { })? .to_string(); - resp.into_body().consume().await?; - Ok(oio::MultipartPart { part_number, etag }) } _ => Err(parse_error(resp).await?), @@ -139,11 +134,7 @@ impl oio::MultipartWrite for CosWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -156,10 +147,7 @@ impl oio::MultipartWrite for CosWriter { match resp.status() { // cos returns code 204 if abort succeeds. // Reference: https://www.tencentcloud.com/document/product/436/7740 - StatusCode::NO_CONTENT => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::NO_CONTENT => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/d1/backend.rs b/core/src/services/d1/backend.rs index 203ad4d2d931..9cf3c38ccbcd 100644 --- a/core/src/services/d1/backend.rs +++ b/core/src/services/d1/backend.rs @@ -20,6 +20,7 @@ use std::fmt::Debug; use std::fmt::Formatter; use async_trait::async_trait; +use bytes::Buf; use http::header; use http::Request; use http::StatusCode; @@ -313,8 +314,9 @@ impl kv::Adapter for Adapter { let status = resp.status(); match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let body = resp.into_body().bytes().await?; - let d1_response = D1Response::parse(&body)?; + let mut body = resp.into_body(); + let bs = body.copy_to_bytes(body.remaining()); + let d1_response = D1Response::parse(&bs)?; Ok(d1_response.get_result(&self.value_field)) } _ => Err(parse_error(resp).await?), diff --git a/core/src/services/d1/error.rs b/core/src/services/d1/error.rs index 96ac072e004e..ada419ef4b3d 100644 --- a/core/src/services/d1/error.rs +++ b/core/src/services/d1/error.rs @@ -28,9 +28,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (mut kind, mut retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/dbfs/backend.rs b/core/src/services/dbfs/backend.rs index 1028c8b9dea3..7df87d371213 100644 --- a/core/src/services/dbfs/backend.rs +++ b/core/src/services/dbfs/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -182,8 +183,6 @@ impl Accessor for DbfsBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, create_dir: true, @@ -203,10 +202,7 @@ impl Accessor for DbfsBackend { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCreateDir::default()) - } + StatusCode::CREATED | StatusCode::OK => Ok(RpCreateDir::default()), _ => Err(parse_error(resp).await?), } } @@ -224,9 +220,9 @@ impl Accessor for DbfsBackend { match status { StatusCode::OK => { let mut meta = parse_into_metadata(path, resp.headers())?; - let bs = resp.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)?; + let bs = resp.into_body(); + let decoded_response: DbfsStatus = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; meta.set_last_modified(parse_datetime_from_from_timestamp_millis( decoded_response.modification_time, )?); @@ -246,8 +242,8 @@ impl Accessor for DbfsBackend { } } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let op = DbfsReader::new(self.core.clone(), args, path.to_string()); + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { + let op = DbfsReader::new(self.core.clone(), path.to_string()); Ok((RpRead::new(), op)) } @@ -285,10 +281,7 @@ impl Accessor for DbfsBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpRename::default()) - } + StatusCode::OK => Ok(RpRename::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/dbfs/core.rs b/core/src/services/dbfs/core.rs index 80541d31e46c..55e4516f1bfa 100644 --- a/core/src/services/dbfs/core.rs +++ b/core/src/services/dbfs/core.rs @@ -48,7 +48,7 @@ impl Debug for DbfsCore { } impl DbfsCore { - pub async fn dbfs_create_dir(&self, path: &str) -> Result> { + pub async fn dbfs_create_dir(&self, path: &str) -> Result> { let url = format!("{}/api/2.0/dbfs/mkdirs", self.endpoint); let mut req = Request::post(&url); @@ -69,7 +69,7 @@ impl DbfsCore { self.client.send(req).await } - pub async fn dbfs_delete(&self, path: &str) -> Result> { + pub async fn dbfs_delete(&self, path: &str) -> Result> { let url = format!("{}/api/2.0/dbfs/delete", self.endpoint); let mut req = Request::post(&url); @@ -93,7 +93,7 @@ impl DbfsCore { self.client.send(req).await } - pub async fn dbfs_rename(&self, from: &str, to: &str) -> Result> { + pub async fn dbfs_rename(&self, from: &str, to: &str) -> Result> { let source = build_rooted_abs_path(&self.root, from); let target = build_rooted_abs_path(&self.root, to); @@ -115,7 +115,7 @@ impl DbfsCore { self.client.send(req).await } - pub async fn dbfs_list(&self, path: &str) -> Result> { + pub async fn dbfs_list(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -162,7 +162,7 @@ impl DbfsCore { path: &str, offset: u64, length: u64, - ) -> Result> { + ) -> Result> { let p = build_rooted_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -200,7 +200,7 @@ impl DbfsCore { } } - pub async fn dbfs_get_status(&self, path: &str) -> Result> { + pub async fn dbfs_get_status(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); diff --git a/core/src/services/dbfs/error.rs b/core/src/services/dbfs/error.rs index 41069d45fb0d..c65612fe3ddc 100644 --- a/core/src/services/dbfs/error.rs +++ b/core/src/services/dbfs/error.rs @@ -17,6 +17,7 @@ use std::fmt::Debug; +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -44,9 +45,9 @@ impl Debug for DbfsError { } } -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/dbfs/lister.rs b/core/src/services/dbfs/lister.rs index 11841bdae842..6a48066a584b 100644 --- a/core/src/services/dbfs/lister.rs +++ b/core/src/services/dbfs/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use serde::Deserialize; @@ -50,9 +51,9 @@ impl oio::PageList for DbfsLister { return Err(error); } - let bytes = response.into_body().bytes().await?; - let decoded_response = - serde_json::from_slice::(&bytes).map_err(new_json_deserialize_error)?; + let bytes = response.into_body(); + let decoded_response: DbfsOutputList = + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; ctx.done = true; diff --git a/core/src/services/dbfs/reader.rs b/core/src/services/dbfs/reader.rs index 856d0eaed490..ce267d297b17 100644 --- a/core/src/services/dbfs/reader.rs +++ b/core/src/services/dbfs/reader.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::io::SeekFrom; use std::sync::Arc; use base64::engine::general_purpose; @@ -24,6 +23,7 @@ use bytes::Bytes; use serde::Deserialize; use super::core::DbfsCore; +use crate::raw::oio::Buffer; use crate::raw::*; use crate::*; @@ -35,26 +35,18 @@ use crate::*; pub struct DbfsReader { core: Arc, path: String, - offset: u64, has_filled: u64, } impl DbfsReader { - pub fn new(core: Arc, op: OpRead, path: String) -> Self { + pub fn new(core: Arc, path: String) -> Self { DbfsReader { core, path, - offset: op.range().offset().unwrap_or(0), has_filled: 0, } } - #[inline] - #[allow(dead_code)] - fn set_offset(&mut self, offset: u64) { - self.offset = offset; - } - #[allow(dead_code)] fn serde_json_decode(&self, bs: &Bytes) -> Result { let response_body = match serde_json::from_slice::(bs) { @@ -86,22 +78,8 @@ impl DbfsReader { unsafe impl Sync for DbfsReader {} impl oio::Read for DbfsReader { - async fn read(&mut self, limit: usize) -> Result { - let _ = limit; - - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) - } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - )) + async fn read_at(&self, _offset: u64, _limit: usize) -> Result { + todo!() } } diff --git a/core/src/services/dbfs/writer.rs b/core/src/services/dbfs/writer.rs index 47442901335c..390f973269b4 100644 --- a/core/src/services/dbfs/writer.rs +++ b/core/src/services/dbfs/writer.rs @@ -21,7 +21,6 @@ use bytes::Bytes; use http::StatusCode; use super::error::parse_error; - use crate::raw::*; use crate::services::dbfs::core::DbfsCore; use crate::*; @@ -57,10 +56,7 @@ impl oio::OneShotWrite for DbfsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/dropbox/backend.rs b/core/src/services/dropbox/backend.rs index 5d05aaf5f2f8..73c04bb861e5 100644 --- a/core/src/services/dropbox/backend.rs +++ b/core/src/services/dropbox/backend.rs @@ -20,10 +20,12 @@ use std::sync::Arc; use async_trait::async_trait; use backon::Retryable; +use bytes::Buf; use http::StatusCode; use super::core::*; use super::error::*; +use super::reader::DropboxReader; use super::writer::DropboxWriter; use crate::raw::*; use crate::*; @@ -35,7 +37,7 @@ pub struct DropboxBackend { #[async_trait] impl Accessor for DropboxBackend { - type Reader = IncomingAsyncBody; + type Reader = DropboxReader; type Writer = oio::OneShotWriter; type Lister = (); type BlockingReader = (); @@ -50,7 +52,6 @@ impl Accessor for DropboxBackend { stat: true, read: true, - read_with_range: true, write: true, @@ -70,9 +71,9 @@ impl Accessor for DropboxBackend { // Check if the folder already exists. let resp = self.core.dropbox_get_metadata(path).await?; if StatusCode::OK == resp.status() { - let bytes = resp.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::(&bytes) - .map_err(new_json_deserialize_error)?; + let bytes = resp.into_body(); + let decoded_response: DropboxMetadataResponse = + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; if "folder" == decoded_response.tag { return Ok(RpCreateDir::default()); } @@ -102,9 +103,9 @@ impl Accessor for DropboxBackend { let status = resp.status(); match status { StatusCode::OK => { - let bytes = resp.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::(&bytes) - .map_err(new_json_deserialize_error)?; + let bytes = resp.into_body(); + let decoded_response: DropboxMetadataResponse = + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; let entry_mode: EntryMode = match decoded_response.tag.as_str() { "file" => EntryMode::FILE, "folder" => EntryMode::DIR, @@ -136,16 +137,10 @@ impl Accessor for DropboxBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.dropbox_get(path, args).await?; - let status = resp.status(); - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + DropboxReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -193,9 +188,9 @@ impl Accessor for DropboxBackend { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)?; + let bs = resp.into_body(); + let decoded_response: DropboxDeleteBatchResponse = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; match decoded_response.tag.as_str() { "complete" => { diff --git a/core/src/services/dropbox/core.rs b/core/src/services/dropbox/core.rs index 67c7cf5e5778..71ed3a085862 100644 --- a/core/src/services/dropbox/core.rs +++ b/core/src/services/dropbox/core.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use std::time::Duration; use backon::ExponentialBuilder; +use bytes::Buf; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; @@ -101,10 +102,10 @@ impl DropboxCore { .map_err(new_request_build_error)?; let resp = self.client.send(request).await?; - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let token: DropboxTokenResponse = - serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; // Update signer after token refreshed. signer.access_token = token.access_token.clone(); @@ -124,8 +125,9 @@ impl DropboxCore { pub async fn dropbox_get( &self, path: &str, - args: OpRead, - ) -> Result> { + range: BytesRange, + _: &OpRead, + ) -> Result> { let url: String = "https://content.dropboxapi.com/2/files/download".to_string(); let download_args = DropboxDownloadArgs { path: build_rooted_abs_path(&self.root, path), @@ -137,7 +139,6 @@ impl DropboxCore { .header("Dropbox-API-Arg", request_payload) .header(CONTENT_LENGTH, 0); - let range = args.range(); if !range.is_full() { req = req.header(header::RANGE, range.to_header()); } @@ -156,7 +157,7 @@ impl DropboxCore { size: Option, args: &OpWrite, body: AsyncBody, - ) -> Result> { + ) -> Result> { let url = "https://content.dropboxapi.com/2/files/upload".to_string(); let dropbox_update_args = DropboxUploadArgs { path: build_rooted_abs_path(&self.root, path), @@ -183,7 +184,7 @@ impl DropboxCore { self.client.send(request).await } - pub async fn dropbox_delete(&self, path: &str) -> Result> { + pub async fn dropbox_delete(&self, path: &str) -> Result> { let url = "https://api.dropboxapi.com/2/files/delete_v2".to_string(); let args = DropboxDeleteArgs { path: self.build_path(path), @@ -201,10 +202,7 @@ impl DropboxCore { self.client.send(request).await } - pub async fn dropbox_delete_batch( - &self, - paths: Vec, - ) -> Result> { + pub async fn dropbox_delete_batch(&self, paths: Vec) -> Result> { let url = "https://api.dropboxapi.com/2/files/delete_batch".to_string(); let args = DropboxDeleteBatchArgs { entries: paths @@ -246,10 +244,10 @@ impl DropboxCore { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let decoded_response = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)?; + let decoded_response: DropboxDeleteBatchResponse = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; match decoded_response.tag.as_str() { "in_progress" => Err(Error::new( ErrorKind::Unexpected, @@ -300,7 +298,7 @@ impl DropboxCore { } } - pub async fn dropbox_get_metadata(&self, path: &str) -> Result> { + pub async fn dropbox_get_metadata(&self, path: &str) -> Result> { let url = "https://api.dropboxapi.com/2/files/get_metadata".to_string(); let args = DropboxMetadataArgs { path: self.build_path(path), diff --git a/core/src/services/dropbox/error.rs b/core/src/services/dropbox/error.rs index ae731df785a8..a956a6f714fc 100644 --- a/core/src/services/dropbox/error.rs +++ b/core/src/services/dropbox/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -31,9 +32,9 @@ pub struct DropboxErrorResponse { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (mut kind, mut retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/dropbox/mod.rs b/core/src/services/dropbox/mod.rs index e056778cb746..3ba91497498d 100644 --- a/core/src/services/dropbox/mod.rs +++ b/core/src/services/dropbox/mod.rs @@ -19,6 +19,7 @@ mod backend; mod builder; mod core; mod error; +mod reader; mod writer; pub use builder::DropboxBuilder as Dropbox; diff --git a/core/src/services/dropbox/reader.rs b/core/src/services/dropbox/reader.rs new file mode 100644 index 000000000000..dd22b7ab0427 --- /dev/null +++ b/core/src/services/dropbox/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::DropboxCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct DropboxReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl DropboxReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + DropboxReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for DropboxReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.dropbox_get(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/dropbox/writer.rs b/core/src/services/dropbox/writer.rs index de1f594cd6b0..e14ea6bc247a 100644 --- a/core/src/services/dropbox/writer.rs +++ b/core/src/services/dropbox/writer.rs @@ -45,10 +45,7 @@ impl oio::OneShotWrite for DropboxWriter { .await?; let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 68ebf1b398e0..7870892e69e8 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -27,6 +27,7 @@ use uuid::Uuid; use super::lister::FsLister; use super::writer::FsWriter; use crate::raw::*; +use crate::services::fs::reader::FsReader; use crate::*; /// POSIX file system support. @@ -240,10 +241,10 @@ impl FsBackend { #[async_trait] impl Accessor for FsBackend { - type Reader = oio::TokioReader; + type Reader = FsReader; type Writer = FsWriter; type Lister = Option>; - type BlockingReader = oio::StdReader; + type BlockingReader = FsReader; type BlockingWriter = FsWriter; type BlockingLister = Option>; @@ -255,7 +256,6 @@ impl Accessor for FsBackend { stat: true, read: true, - read_can_seek: true, write: true, write_can_empty: true, @@ -327,7 +327,7 @@ impl Accessor for FsBackend { .await .map_err(new_std_io_error)?; - let r = oio::TokioReader::new(f); + let r = FsReader::new(f.into_std().await); Ok((RpRead::new(), r)) } @@ -475,8 +475,7 @@ impl Accessor for FsBackend { .open(p) .map_err(new_std_io_error)?; - let r = oio::StdReader::new(f); - + let r = FsReader::new(f); Ok((RpRead::new(), r)) } diff --git a/core/src/services/fs/mod.rs b/core/src/services/fs/mod.rs index fd51bffc96ec..795180d41601 100644 --- a/core/src/services/fs/mod.rs +++ b/core/src/services/fs/mod.rs @@ -19,4 +19,5 @@ mod backend; pub use backend::FsBuilder as Fs; mod lister; +mod reader; mod writer; diff --git a/core/src/services/fs/reader.rs b/core/src/services/fs/reader.rs new file mode 100644 index 000000000000..af03e80e00d3 --- /dev/null +++ b/core/src/services/fs/reader.rs @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use tokio::io::ReadBuf; + +use crate::raw::*; +use crate::*; + +pub struct FsReader { + f: std::fs::File, +} + +impl FsReader { + pub fn new(f: std::fs::File) -> Self { + Self { f } + } + + fn try_clone(&self) -> Result { + let f = self.f.try_clone().map_err(|err| { + Error::new( + ErrorKind::Unexpected, + "tokio fs clone file description failed", + ) + .set_source(err) + })?; + + Ok(Self { f }) + } + + #[cfg(target_family = "unix")] + pub fn read_at_inner(&self, buf: &mut [u8], offset: u64) -> Result { + use std::os::unix::fs::FileExt; + self.f.read_at(buf, offset).map_err(new_std_io_error) + } + + #[cfg(target_family = "windows")] + pub fn read_at_inner(&self, buf: &mut [u8], offset: u64) -> Result { + use std::os::windows::fs::FileExt; + self.f.seek_read(buf, offset).map_err(new_std_io_error) + } +} + +impl oio::Read for FsReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let handle = self.try_clone()?; + + match tokio::runtime::Handle::try_current() { + Ok(runtime) => runtime + .spawn_blocking(move || oio::BlockingRead::read_at(&handle, offset, limit)) + .await + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "tokio spawn io task failed").set_source(err) + })?, + Err(_) => Err(Error::new( + ErrorKind::Unexpected, + "no tokio runtime found, failed to run io task", + )), + } + } +} + +impl oio::BlockingRead for FsReader { + fn read_at(&self, mut offset: u64, limit: usize) -> Result { + let mut bs = Vec::with_capacity(limit); + + let buf = bs.spare_capacity_mut(); + let mut read_buf: ReadBuf = ReadBuf::uninit(buf); + + // SAFETY: Read at most `size` bytes into `read_buf`. + unsafe { + read_buf.assume_init(limit); + } + + loop { + // If the buffer is full, we are done. + if read_buf.initialize_unfilled().is_empty() { + break; + } + let n = self.read_at_inner(read_buf.initialize_unfilled(), offset)?; + if n == 0 { + break; + } + read_buf.advance(n); + offset += n as u64; + } + + // Safety: We make sure that bs contains `n` more bytes. + let filled = read_buf.filled().len(); + unsafe { bs.set_len(filled) } + Ok(oio::Buffer::from(bs)) + } +} diff --git a/core/src/services/fs/writer.rs b/core/src/services/fs/writer.rs index c035ff1bf94b..7cec4bd27332 100644 --- a/core/src/services/fs/writer.rs +++ b/core/src/services/fs/writer.rs @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. -use bytes::Bytes; - use std::io::Write; use std::path::PathBuf; +use bytes::Bytes; use tokio::io::AsyncWriteExt; use crate::raw::*; diff --git a/core/src/services/ftp/backend.rs b/core/src/services/ftp/backend.rs index e51b64384ba6..962fd8cd8e80 100644 --- a/core/src/services/ftp/backend.rs +++ b/core/src/services/ftp/backend.rs @@ -25,8 +25,6 @@ use async_tls::TlsConnector; use async_trait::async_trait; use bb8::PooledConnection; use bb8::RunError; -use futures::AsyncRead; -use futures::AsyncReadExt; use http::Uri; use log::debug; use serde::Deserialize; @@ -42,10 +40,10 @@ use tokio::sync::OnceCell; use super::err::parse_error; use super::lister::FtpLister; -use super::util::FtpReader; +use super::reader::FtpReader; use super::writer::FtpWriter; +use super::writer::FtpWriters; use crate::raw::*; -use crate::services::ftp::writer::FtpWriters; use crate::*; /// Config for Ftpservices support. @@ -301,7 +299,6 @@ impl Accessor for FtpBackend { stat: true, read: true, - read_with_range: true, write: true, @@ -359,49 +356,8 @@ impl Accessor for FtpBackend { Ok(RpStat::new(meta)) } - /// TODO: migrate to FileReader maybe? async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let mut ftp_stream = self.ftp_connect(Operation::Read).await?; - - let meta = self.ftp_stat(path).await?; - - let br = args.range(); - let r: Box = match (br.offset(), br.size()) { - (Some(offset), Some(size)) => { - ftp_stream - .resume_transfer(offset as usize) - .await - .map_err(parse_error)?; - let ds = ftp_stream - .retr_as_stream(path) - .await - .map_err(parse_error)? - .take(size); - Box::new(ds) - } - (Some(offset), None) => { - ftp_stream - .resume_transfer(offset as usize) - .await - .map_err(parse_error)?; - let ds = ftp_stream.retr_as_stream(path).await.map_err(parse_error)?; - Box::new(ds) - } - (None, Some(size)) => { - ftp_stream - .resume_transfer((meta.size() as u64 - size) as usize) - .await - .map_err(parse_error)?; - let ds = ftp_stream.retr_as_stream(path).await.map_err(parse_error)?; - Box::new(ds) - } - (None, None) => { - let ds = ftp_stream.retr_as_stream(path).await.map_err(parse_error)?; - Box::new(ds) - } - }; - - Ok((RpRead::new(), FtpReader::new(r, ftp_stream))) + Ok((RpRead::new(), FtpReader::new(self.clone(), path, args))) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -497,7 +453,7 @@ impl FtpBackend { }) } - async fn ftp_stat(&self, path: &str) -> Result { + pub async fn ftp_stat(&self, path: &str) -> Result { let mut ftp_stream = self.ftp_connect(Operation::Stat).await?; let (parent, basename) = (get_parent(path), get_basename(path)); diff --git a/core/src/services/ftp/lister.rs b/core/src/services/ftp/lister.rs index 3d531815a201..c3625a2eed0d 100644 --- a/core/src/services/ftp/lister.rs +++ b/core/src/services/ftp/lister.rs @@ -17,7 +17,6 @@ use std::str; use std::str::FromStr; - use std::vec::IntoIter; use suppaftp::list::File; diff --git a/core/src/services/ftp/mod.rs b/core/src/services/ftp/mod.rs index 85d3d8da9234..35a0ba5734ac 100644 --- a/core/src/services/ftp/mod.rs +++ b/core/src/services/ftp/mod.rs @@ -21,5 +21,5 @@ pub use backend::FtpConfig; mod err; mod lister; -mod util; +mod reader; mod writer; diff --git a/core/src/services/ftp/reader.rs b/core/src/services/ftp/reader.rs new file mode 100644 index 000000000000..f85f7074c4fd --- /dev/null +++ b/core/src/services/ftp/reader.rs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use futures::AsyncReadExt; + +use super::backend::FtpBackend; +use super::err::parse_error; +use crate::raw::*; +use crate::*; + +pub struct FtpReader { + core: FtpBackend, + + path: String, + _op: OpRead, +} + +impl FtpReader { + pub fn new(core: FtpBackend, path: &str, op: OpRead) -> Self { + FtpReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for FtpReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let mut ftp_stream = self.core.ftp_connect(Operation::Read).await?; + + if offset != 0 { + ftp_stream + .resume_transfer(offset as usize) + .await + .map_err(parse_error)?; + } + + let mut ds = ftp_stream + .retr_as_stream(&self.path) + .await + .map_err(parse_error)? + .take(limit as _); + let mut bs = Vec::with_capacity(limit); + ds.read_to_end(&mut bs).await.map_err(new_std_io_error)?; + Ok(oio::Buffer::from(bs)) + } +} diff --git a/core/src/services/ftp/util.rs b/core/src/services/ftp/util.rs deleted file mode 100644 index 3a7e5abac9fd..000000000000 --- a/core/src/services/ftp/util.rs +++ /dev/null @@ -1,105 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io; - -use bb8::PooledConnection; -use bytes::Bytes; -use futures::AsyncRead; -use futures::AsyncReadExt; -use suppaftp::Status; -use tokio::io::ReadBuf; - -use super::backend::Manager; -use crate::raw::*; -use crate::services::ftp::err::parse_error; -use crate::*; - -/// Wrapper for ftp data stream and command stream. -pub struct FtpReader { - reader: Box, - conn: Option>, - buf: Vec, -} - -unsafe impl Sync for FtpReader {} - -impl FtpReader { - /// Create an instance of FtpReader. - pub fn new( - reader: Box, - conn: PooledConnection<'static, Manager>, - ) -> Self { - Self { - reader, - conn: Some(conn), - buf: Vec::with_capacity(64 * 1024), - } - } -} - -impl oio::Read for FtpReader { - async fn read(&mut self, limit: usize) -> Result { - if self.conn.is_none() { - return Err(Error::new( - ErrorKind::Unexpected, - "ftp reader is already closed", - )); - } - - // Make sure buf has enough space. - if self.buf.capacity() < limit { - self.buf.reserve(limit); - } - let buf = self.buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(buf); - - // SAFETY: Read at most `size` bytes into `read_buf`. - unsafe { - read_buf.assume_init(limit); - } - - let data = self.reader.read(read_buf.initialize_unfilled()).await; - - // Data read with success, copy and return it. - if let Ok(n) = data { - if n > 0 { - read_buf.set_filled(n); - return Ok(Bytes::copy_from_slice(&self.buf[..n])); - } - } - - // While hitting Error or EOF, we should end this ftp stream. - let _ = self - .conn - .take() - .expect("connection must be valid during read") - .read_response_in(&[Status::ClosingDataConnection, Status::RequestedFileActionOk]) - .await - .map_err(parse_error)?; - Ok(Bytes::new()) - } - - async fn seek(&mut self, pos: io::SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "ftp reader doesn't support seeking", - )) - } -} diff --git a/core/src/services/ftp/writer.rs b/core/src/services/ftp/writer.rs index 505176a46d27..fa9f959e2b00 100644 --- a/core/src/services/ftp/writer.rs +++ b/core/src/services/ftp/writer.rs @@ -19,7 +19,6 @@ use bytes::Bytes; use futures::AsyncWriteExt; use super::backend::FtpBackend; - use crate::raw::*; use crate::services::ftp::err::parse_error; use crate::*; diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index 899b5efb3361..6ec2f28b1c57 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use reqsign::GoogleCredentialLoader; @@ -33,6 +34,7 @@ use serde_json; use super::core::*; use super::error::parse_error; use super::lister::GcsLister; +use super::reader::GcsReader; use super::writer::GcsWriter; use super::writer::GcsWriters; use crate::raw::*; @@ -332,7 +334,7 @@ pub struct GcsBackend { #[cfg_attr(not(target_arch = "wasm32"), async_trait)] #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl Accessor for GcsBackend { - type Reader = IncomingAsyncBody; + type Reader = GcsReader; type Writer = GcsWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -350,8 +352,7 @@ impl Accessor for GcsBackend { stat_with_if_none_match: true, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, @@ -393,10 +394,10 @@ impl Accessor for GcsBackend { return Err(parse_error(resp).await?); } - let slc = resp.into_body().bytes().await?; + let slc = resp.into_body(); let meta: GetObjectJsonResponse = - serde_json::from_slice(&slc).map_err(new_json_deserialize_error)?; + serde_json::from_reader(slc.reader()).map_err(new_json_deserialize_error)?; let mut m = Metadata::new(EntryMode::FILE); @@ -418,16 +419,10 @@ impl Accessor for GcsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.gcs_get_object(path, &args).await?; - - if resp.status().is_success() { - let size = parse_content_length(resp.headers())?; - Ok((RpRead::new().with_size(size), resp.into_body())) - } else if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { - Ok((RpRead::new(), IncomingAsyncBody::empty())) - } else { - Err(parse_error(resp).await?) - } + Ok(( + RpRead::default(), + GcsReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -465,7 +460,6 @@ impl Accessor for GcsBackend { let resp = self.core.gcs_copy_object(from, to).await?; if resp.status().is_success() { - resp.into_body().consume().await?; Ok(RpCopy::default()) } else { Err(parse_error(resp).await?) @@ -528,7 +522,7 @@ impl Accessor for GcsBackend { .trim_matches('"'); let multipart: Multipart = Multipart::new() .with_boundary(boundary) - .parse(resp.into_body().bytes().await?)?; + .parse(resp.into_body().to_bytes())?; let parts = multipart.into_parts(); let mut batched_result = Vec::with_capacity(parts.len()); diff --git a/core/src/services/gcs/core.rs b/core/src/services/gcs/core.rs index 46cc2c4123be..491625d32276 100644 --- a/core/src/services/gcs/core.rs +++ b/core/src/services/gcs/core.rs @@ -141,13 +141,18 @@ impl GcsCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } } impl GcsCore { - pub fn gcs_get_object_request(&self, path: &str, args: &OpRead) -> Result> { + pub fn gcs_get_object_request( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -165,8 +170,8 @@ impl GcsCore { if let Some(if_none_match) = args.if_none_match() { req = req.header(IF_NONE_MATCH, if_none_match); } - if !args.range().is_full() { - req = req.header(http::header::RANGE, args.range().to_header()); + if !range.is_full() { + req = req.header(http::header::RANGE, range.to_header()); } let req = req @@ -194,9 +199,6 @@ impl GcsCore { if let Some(if_none_match) = args.if_none_match() { req = req.header(IF_NONE_MATCH, if_none_match); } - if !args.range().is_full() { - req = req.header(http::header::RANGE, args.range().to_header()); - } let req = req .body(AsyncBody::Empty) @@ -208,9 +210,10 @@ impl GcsCore { pub async fn gcs_get_object( &self, path: &str, + range: BytesRange, args: &OpRead, - ) -> Result> { - let mut req = self.gcs_get_object_request(path, args)?; + ) -> Result> { + let mut req = self.gcs_get_object_request(path, range, args)?; self.sign(&mut req).await?; self.send(req).await @@ -288,8 +291,11 @@ impl GcsCore { AsyncBody::Bytes(bytes) => { media_part = media_part.content(bytes); } - AsyncBody::Stream(stream) => { - media_part = media_part.stream(size.unwrap(), stream); + _ => { + return Err(Error::new( + ErrorKind::Unexpected, + "multipart upload does not support streaming body", + )); } } @@ -388,7 +394,7 @@ impl GcsCore { &self, path: &str, args: &OpStat, - ) -> Result> { + ) -> Result> { let mut req = self.gcs_head_object_request(path, args)?; self.sign(&mut req).await?; @@ -396,7 +402,7 @@ impl GcsCore { self.send(req).await } - pub async fn gcs_delete_object(&self, path: &str) -> Result> { + pub async fn gcs_delete_object(&self, path: &str) -> Result> { let mut req = self.gcs_delete_object_request(path)?; self.sign(&mut req).await?; @@ -418,10 +424,7 @@ impl GcsCore { .map_err(new_request_build_error) } - pub async fn gcs_delete_objects( - &self, - paths: Vec, - ) -> Result> { + pub async fn gcs_delete_objects(&self, paths: Vec) -> Result> { let uri = format!("{}/batch/storage/v1", self.endpoint); let mut multipart = Multipart::new(); @@ -441,11 +444,7 @@ impl GcsCore { self.send(req).await } - pub async fn gcs_copy_object( - &self, - from: &str, - to: &str, - ) -> Result> { + pub async fn gcs_copy_object(&self, from: &str, to: &str) -> Result> { let source = build_abs_path(&self.root, from); let dest = build_abs_path(&self.root, to); @@ -474,7 +473,7 @@ impl GcsCore { delimiter: &str, limit: Option, start_after: Option, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( @@ -515,10 +514,7 @@ impl GcsCore { self.send(req).await } - pub async fn gcs_initiate_resumable_upload( - &self, - path: &str, - ) -> Result> { + pub async fn gcs_initiate_resumable_upload(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( "{}/upload/storage/v1/b/{}/o?uploadType=resumable&name={}", @@ -561,7 +557,7 @@ impl GcsCore { written: u64, size: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let mut req = Request::post(location) .header(CONTENT_LENGTH, size) .header( @@ -584,7 +580,7 @@ impl GcsCore { pub async fn gcs_abort_resumable_upload( &self, location: &str, - ) -> Result> { + ) -> Result> { let mut req = Request::delete(location) .header(CONTENT_LENGTH, 0) .body(AsyncBody::Empty) diff --git a/core/src/services/gcs/error.rs b/core/src/services/gcs/error.rs index 2c5665ffd4cf..38ffa0486ba4 100644 --- a/core/src/services/gcs/error.rs +++ b/core/src/services/gcs/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -50,9 +51,9 @@ struct GcsErrorDetail { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/gcs/lister.rs b/core/src/services/gcs/lister.rs index f18412c74bc4..17c98452edec 100644 --- a/core/src/services/gcs/lister.rs +++ b/core/src/services/gcs/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use serde_json; use super::core::*; @@ -79,10 +80,10 @@ impl oio::PageList for GcsLister { if !resp.status().is_success() { return Err(parse_error(resp).await?); } - let bytes = resp.into_body().bytes().await?; + let bytes = resp.into_body(); let output: ListResponse = - serde_json::from_slice(&bytes).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; if let Some(token) = &output.next_page_token { ctx.token = token.clone(); diff --git a/core/src/services/gcs/mod.rs b/core/src/services/gcs/mod.rs index 1dcaa9ca0632..756b2474728d 100644 --- a/core/src/services/gcs/mod.rs +++ b/core/src/services/gcs/mod.rs @@ -22,5 +22,6 @@ pub use backend::GcsConfig; mod core; mod error; mod lister; +mod reader; mod uri; mod writer; diff --git a/core/src/services/gcs/reader.rs b/core/src/services/gcs/reader.rs new file mode 100644 index 000000000000..bc10981cb2c7 --- /dev/null +++ b/core/src/services/gcs/reader.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::GcsCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct GcsReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl GcsReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + GcsReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for GcsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .gcs_get_object(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/gcs/writer.rs b/core/src/services/gcs/writer.rs index 0bf13a4498c3..9e3c3a54d6b0 100644 --- a/core/src/services/gcs/writer.rs +++ b/core/src/services/gcs/writer.rs @@ -58,10 +58,7 @@ impl oio::RangeWrite for GcsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -122,10 +119,7 @@ impl oio::RangeWrite for GcsWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -136,10 +130,7 @@ impl oio::RangeWrite for GcsWriter { match resp.status().as_u16() { // gcs returns 499 if the upload aborted successfully // reference: https://cloud.google.com/storage/docs/performing-resumable-uploads#cancel-upload-json - 499 => { - resp.into_body().consume().await?; - Ok(()) - } + 499 => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/gdrive/backend.rs b/core/src/services/gdrive/backend.rs index 671fa79e1c08..c6233359f610 100644 --- a/core/src/services/gdrive/backend.rs +++ b/core/src/services/gdrive/backend.rs @@ -19,6 +19,7 @@ use std::fmt::Debug; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use chrono::Utc; use http::Request; @@ -26,12 +27,12 @@ use http::StatusCode; use serde_json::json; use super::core::GdriveCore; +use super::core::GdriveFile; use super::error::parse_error; use super::lister::GdriveLister; +use super::reader::GdriveReader; use super::writer::GdriveWriter; use crate::raw::*; -use crate::services::gdrive::core::GdriveFile; -use crate::types::Result; use crate::*; #[derive(Clone, Debug)] @@ -42,7 +43,7 @@ pub struct GdriveBackend { #[cfg_attr(not(target_arch = "wasm32"), async_trait)] #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl Accessor for GdriveBackend { - type Reader = IncomingAsyncBody; + type Reader = GdriveReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; type BlockingReader = (); @@ -86,9 +87,9 @@ impl Accessor for GdriveBackend { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let gdrive_file: GdriveFile = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; if gdrive_file.mime_type == "application/vnd.google-apps.folder" { return Ok(RpStat::new(Metadata::new(EntryMode::DIR))); @@ -108,22 +109,11 @@ impl Accessor for GdriveBackend { Ok(RpStat::new(meta)) } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.gdrive_get(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + GdriveReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -155,7 +145,7 @@ impl Accessor for GdriveBackend { } self.core.path_cache.remove(&path).await; - resp.into_body().consume().await?; + return Ok(RpDelete::default()); } @@ -190,7 +180,6 @@ impl Accessor for GdriveBackend { } self.core.path_cache.remove(&to_path).await; - resp.into_body().consume().await?; } let url = format!( @@ -230,7 +219,6 @@ impl Accessor for GdriveBackend { } self.core.path_cache.remove(&target).await; - resp.into_body().consume().await?; } let resp = self @@ -242,9 +230,9 @@ impl Accessor for GdriveBackend { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; - let meta = serde_json::from_slice::(&body) - .map_err(new_json_deserialize_error)?; + let body = resp.into_body(); + let meta: GdriveFile = + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; let cache = &self.core.path_cache; diff --git a/core/src/services/gdrive/core.rs b/core/src/services/gdrive/core.rs index 0f6211666d6a..8b062b6c3450 100644 --- a/core/src/services/gdrive/core.rs +++ b/core/src/services/gdrive/core.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use async_trait::async_trait; use bytes; +use bytes::Buf; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; @@ -58,7 +59,7 @@ impl Debug for GdriveCore { } impl GdriveCore { - pub async fn gdrive_stat(&self, path: &str) -> Result> { + pub async fn gdrive_stat(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let file_id = self.path_cache.get(&path).await?.ok_or(Error::new( ErrorKind::NotFound, @@ -78,7 +79,7 @@ impl GdriveCore { self.client.send(req).await } - pub async fn gdrive_get(&self, path: &str) -> Result> { + pub async fn gdrive_get(&self, path: &str, range: BytesRange) -> Result> { let path = build_abs_path(&self.root, path); let path_id = self.path_cache.get(&path).await?.ok_or(Error::new( ErrorKind::NotFound, @@ -91,6 +92,7 @@ impl GdriveCore { ); let mut req = Request::get(&url) + .header(header::RANGE, range.to_header()) .body(AsyncBody::Empty) .map_err(new_request_build_error)?; self.sign(&mut req).await?; @@ -103,7 +105,7 @@ impl GdriveCore { file_id: &str, page_size: i32, next_page_token: &str, - ) -> Result> { + ) -> Result> { let q = format!("'{}' in parents and trashed = false", file_id); let mut url = format!( "https://www.googleapis.com/drive/v3/files?pageSize={}&q={}", @@ -127,7 +129,7 @@ impl GdriveCore { &self, source: &str, target: &str, - ) -> Result> { + ) -> Result> { let source_file_id = self.path_cache.get(source).await?.ok_or(Error::new( ErrorKind::NotFound, &format!("source path not found: {}", source), @@ -161,7 +163,7 @@ impl GdriveCore { self.client.send(req).await } - pub async fn gdrive_trash(&self, file_id: &str) -> Result> { + pub async fn gdrive_trash(&self, file_id: &str) -> Result> { let url = format!("https://www.googleapis.com/drive/v3/files/{}", file_id); let body = serde_json::to_vec(&json!({ @@ -184,7 +186,7 @@ impl GdriveCore { path: &str, size: u64, body: Bytes, - ) -> Result> { + ) -> Result> { let parent = self.path_cache.ensure_dir(get_parent(path)).await?; let url = "https://www.googleapis.com/upload/drive/v3/files?uploadType=multipart"; @@ -234,7 +236,7 @@ impl GdriveCore { file_id: &str, size: u64, body: Bytes, - ) -> Result> { + ) -> Result> { let url = format!( "https://www.googleapis.com/upload/drive/v3/files/{}?uploadType=media", file_id @@ -310,8 +312,8 @@ impl GdriveSigner { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let token = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let token: GdriveTokenResponse = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; self.access_token = token.access_token.clone(); self.expires_in = Utc::now() + chrono::Duration::seconds(token.expires_in) @@ -379,9 +381,9 @@ impl PathQuery for GdrivePathQuery { match status { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let meta: GdriveFileList = - serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; if let Some(f) = meta.files.first() { Ok(Some(f.id.clone())) @@ -416,8 +418,9 @@ impl PathQuery for GdrivePathQuery { return Err(parse_error(resp).await?); } - let body = resp.into_body().bytes().await?; - let file: GdriveFile = serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + let body = resp.into_body(); + let file: GdriveFile = + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; Ok(file.id) } } diff --git a/core/src/services/gdrive/error.rs b/core/src/services/gdrive/error.rs index 8b7ad781aed7..f9e57ecef804 100644 --- a/core/src/services/gdrive/error.rs +++ b/core/src/services/gdrive/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -35,9 +36,9 @@ struct GdriveInnerError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (mut kind, mut retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/gdrive/lister.rs b/core/src/services/gdrive/lister.rs index 34df9073f388..e77a5022da7a 100644 --- a/core/src/services/gdrive/lister.rs +++ b/core/src/services/gdrive/lister.rs @@ -54,7 +54,7 @@ impl oio::PageList for GdriveLister { .await?; let bytes = match resp.status() { - StatusCode::OK => resp.into_body().bytes().await?, + StatusCode::OK => resp.into_body().to_bytes(), _ => return Err(parse_error(resp).await?), }; diff --git a/core/src/services/gdrive/mod.rs b/core/src/services/gdrive/mod.rs index 015f177b28e0..2b9b906cbc46 100644 --- a/core/src/services/gdrive/mod.rs +++ b/core/src/services/gdrive/mod.rs @@ -22,4 +22,5 @@ mod error; pub use builder::GdriveBuilder as Gdrive; mod lister; +mod reader; mod writer; diff --git a/core/src/services/gdrive/reader.rs b/core/src/services/gdrive/reader.rs new file mode 100644 index 000000000000..267777753a84 --- /dev/null +++ b/core/src/services/gdrive/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::GdriveCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct GdriveReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl GdriveReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + GdriveReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for GdriveReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.gdrive_get(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/gdrive/writer.rs b/core/src/services/gdrive/writer.rs index 0ebbd809803b..9647b55b6dad 100644 --- a/core/src/services/gdrive/writer.rs +++ b/core/src/services/gdrive/writer.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use bytes::Bytes; use http::StatusCode; @@ -64,12 +65,10 @@ impl oio::OneShotWrite for GdriveWriter { StatusCode::OK | StatusCode::CREATED => { // If we don't have the file id before, let's update the cache to avoid re-fetching. if self.file_id.is_none() { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let file: GdriveFile = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; self.core.path_cache.insert(&self.path, &file.id).await; - } else { - resp.into_body().consume().await?; } Ok(()) } diff --git a/core/src/services/ghac/backend.rs b/core/src/services/ghac/backend.rs index 6319ae7f43a8..76b1349319fd 100644 --- a/core/src/services/ghac/backend.rs +++ b/core/src/services/ghac/backend.rs @@ -19,6 +19,7 @@ use std::collections::HashMap; use std::env; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use http::header; use http::header::ACCEPT; @@ -37,6 +38,7 @@ use serde::Serialize; use super::error::parse_error; use super::writer::GhacWriter; use crate::raw::*; +use crate::services::ghac::reader::GhacReader; use crate::*; /// The base url for cache url. @@ -227,7 +229,7 @@ pub struct GhacBackend { #[async_trait] impl Accessor for GhacBackend { - type Reader = IncomingAsyncBody; + type Reader = GhacReader; type Writer = GhacWriter; type Lister = (); type BlockingReader = (); @@ -243,8 +245,6 @@ impl Accessor for GhacBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_multi: true, @@ -266,9 +266,9 @@ impl Accessor for GhacBackend { let resp = self.client.send(req).await?; let location = if resp.status() == StatusCode::OK { - let slc = resp.into_body().bytes().await?; + let slc = resp.into_body(); let query_resp: GhacQueryResponse = - serde_json::from_slice(&slc).map_err(new_json_deserialize_error)?; + serde_json::from_reader(slc.reader()).map_err(new_json_deserialize_error)?; query_resp.archive_location } else { return Err(parse_error(resp).await?); @@ -292,7 +292,6 @@ impl Accessor for GhacBackend { .expect("content range must contains size"), ); - resp.into_body().consume().await?; Ok(RpStat::new(meta)) } _ => Err(parse_error(resp).await?), @@ -305,33 +304,18 @@ impl Accessor for GhacBackend { let resp = self.client.send(req).await?; let location = if resp.status() == StatusCode::OK { - let slc = resp.into_body().bytes().await?; + let slc = resp.into_body(); let query_resp: GhacQueryResponse = - serde_json::from_slice(&slc).map_err(new_json_deserialize_error)?; + serde_json::from_reader(slc.reader()).map_err(new_json_deserialize_error)?; query_resp.archive_location } else { return Err(parse_error(resp).await?); }; - let req = self.ghac_get_location(&location, args.range()).await?; - let resp = self.client.send(req).await?; - - let status = resp.status(); - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + GhacReader::new(self.clone(), &location, args), + )) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -340,9 +324,9 @@ impl Accessor for GhacBackend { let resp = self.client.send(req).await?; let cache_id = if resp.status().is_success() { - let slc = resp.into_body().bytes().await?; + let slc = resp.into_body(); let reserve_resp: GhacReserveResponse = - serde_json::from_slice(&slc).map_err(new_json_deserialize_error)?; + serde_json::from_reader(slc.reader()).map_err(new_json_deserialize_error)?; reserve_resp.cache_id } else { return Err(parse_error(resp) @@ -394,7 +378,7 @@ impl GhacBackend { Ok(req) } - async fn ghac_get_location( + pub async fn ghac_get_location( &self, location: &str, range: BytesRange, @@ -402,18 +386,7 @@ impl GhacBackend { let mut req = Request::get(location); if !range.is_full() { - // ghac is backed by azblob, and azblob doesn't support - // read with suffix range - // - // ref: https://learn.microsoft.com/en-us/rest/api/storageservices/specifying-the-range-header-for-blob-service-operations - if range.offset().is_none() && range.size().is_some() { - return Err(Error::new( - ErrorKind::Unsupported, - "ghac doesn't support read with suffix range", - )); - } - - req = req.header(http::header::RANGE, range.to_header()); + req = req.header(header::RANGE, range.to_header()); } req.body(AsyncBody::Empty).map_err(new_request_build_error) @@ -488,7 +461,7 @@ impl GhacBackend { Ok(req) } - async fn ghac_delete(&self, path: &str) -> Result> { + async fn ghac_delete(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/ghac/error.rs b/core/src/services/ghac/error.rs index 598822f1821e..c84004e6cd29 100644 --- a/core/src/services/ghac/error.rs +++ b/core/src/services/ghac/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,8 +25,8 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND | StatusCode::NO_CONTENT => (ErrorKind::NotFound, false), @@ -39,7 +40,7 @@ pub async fn parse_error(resp: Response) -> Result { _ => (ErrorKind::Unexpected, false), }; - let bs = body.bytes().await?; + let bs = body.copy_to_bytes(body.remaining()); let message = String::from_utf8_lossy(&bs); let mut err = Error::new(kind, &message); diff --git a/core/src/services/ghac/mod.rs b/core/src/services/ghac/mod.rs index b819c409dd78..8d5fe5623d69 100644 --- a/core/src/services/ghac/mod.rs +++ b/core/src/services/ghac/mod.rs @@ -19,4 +19,5 @@ mod backend; pub use backend::GhacBuilder as Ghac; mod error; +mod reader; mod writer; diff --git a/core/src/services/ghac/reader.rs b/core/src/services/ghac/reader.rs new file mode 100644 index 000000000000..c6fcfea79aeb --- /dev/null +++ b/core/src/services/ghac/reader.rs @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use http::StatusCode; + +use super::error::parse_error; +use crate::raw::*; +use crate::services::ghac::backend::GhacBackend; + +pub struct GhacReader { + core: GhacBackend, + + location: String, + _op: OpRead, +} + +impl GhacReader { + pub fn new(core: GhacBackend, location: &str, op: OpRead) -> Self { + GhacReader { + core, + location: location.to_string(), + _op: op, + } + } +} + +impl oio::Read for GhacReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let req = self.core.ghac_get_location(&self.location, range).await?; + let resp = self.core.client.send(req).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/ghac/writer.rs b/core/src/services/ghac/writer.rs index 3db0ea597975..6e92cd6aa990 100644 --- a/core/src/services/ghac/writer.rs +++ b/core/src/services/ghac/writer.rs @@ -57,7 +57,6 @@ impl oio::Write for GhacWriter { .map(|err| err.with_operation("Backend::ghac_upload"))?); } - resp.into_body().consume().await?; self.size += size as u64; Ok(size) } @@ -71,7 +70,6 @@ impl oio::Write for GhacWriter { let resp = self.backend.client.send(req).await?; if resp.status().is_success() { - resp.into_body().consume().await?; Ok(()) } else { Err(parse_error(resp) diff --git a/core/src/services/github/backend.rs b/core/src/services/github/backend.rs index 997e2ae92f03..b9a3d6e5c37e 100644 --- a/core/src/services/github/backend.rs +++ b/core/src/services/github/backend.rs @@ -31,6 +31,7 @@ use super::lister::GithubLister; use super::writer::GithubWriter; use super::writer::GithubWriters; use crate::raw::*; +use crate::services::github::reader::GithubReader; use crate::*; /// Config for backblaze Github services support. @@ -221,7 +222,7 @@ pub struct GithubBackend { #[async_trait] impl Accessor for GithubBackend { - type Reader = IncomingAsyncBody; + type Reader = GithubReader; type Writer = GithubWriters; @@ -284,22 +285,11 @@ impl Accessor for GithubBackend { } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.get(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + GithubReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, _args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/github/core.rs b/core/src/services/github/core.rs index 2e90f66e2698..0c93c83aa5ed 100644 --- a/core/src/services/github/core.rs +++ b/core/src/services/github/core.rs @@ -19,6 +19,7 @@ use std::fmt::Debug; use std::fmt::Formatter; use base64::Engine; +use bytes::Buf; use bytes::Bytes; use http::header; use http::request; @@ -28,11 +29,10 @@ use http::StatusCode; use serde::Deserialize; use serde::Serialize; +use super::error::parse_error; use crate::raw::*; use crate::*; -use super::error::parse_error; - /// Core of [github contents](https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#create-or-update-file-contents) services support. #[derive(Clone)] pub struct GithubCore { @@ -60,7 +60,7 @@ impl Debug for GithubCore { impl GithubCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -100,7 +100,7 @@ impl GithubCore { } } - pub async fn stat(&self, path: &str) -> Result> { + pub async fn stat(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -122,7 +122,7 @@ impl GithubCore { self.send(req).await } - pub async fn get(&self, path: &str) -> Result> { + pub async fn get(&self, path: &str, range: BytesRange) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -137,14 +137,15 @@ impl GithubCore { let req = self.sign(req)?; let req = req - .header("Accept", "application/vnd.github.raw+json") + .header(header::ACCEPT, "application/vnd.github.raw+json") + .header(header::RANGE, range.to_header()) .body(AsyncBody::Empty) .map_err(new_request_build_error)?; self.send(req).await } - pub async fn upload(&self, path: &str, bs: Bytes) -> Result> { + pub async fn upload(&self, path: &str, bs: Bytes) -> Result> { let sha = self.get_file_sha(path).await?; let path = build_abs_path(&self.root, path); @@ -241,9 +242,9 @@ impl GithubCore { match resp.status() { StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let resp: ListResponse = - serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; Ok(resp.entries) } diff --git a/core/src/services/github/error.rs b/core/src/services/github/error.rs index edd52ce70786..b438a82522c1 100644 --- a/core/src/services/github/error.rs +++ b/core/src/services/github/error.rs @@ -38,9 +38,9 @@ struct GithubSubError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status.as_u16() { 401 | 403 => (ErrorKind::PermissionDenied, false), @@ -75,7 +75,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -93,10 +92,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/github/mod.rs b/core/src/services/github/mod.rs index 06418c19347b..9855cb0808d4 100644 --- a/core/src/services/github/mod.rs +++ b/core/src/services/github/mod.rs @@ -22,4 +22,5 @@ pub use backend::GithubConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/github/reader.rs b/core/src/services/github/reader.rs new file mode 100644 index 000000000000..64fd0ded58a6 --- /dev/null +++ b/core/src/services/github/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::GithubCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct GithubReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl GithubReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + GithubReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for GithubReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.get(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/github/writer.rs b/core/src/services/github/writer.rs index 0ff42415b113..5b6d3f6c1fb7 100644 --- a/core/src/services/github/writer.rs +++ b/core/src/services/github/writer.rs @@ -45,10 +45,7 @@ impl oio::OneShotWrite for GithubWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::CREATED => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index b1eb2e57a5ab..b58f3aba3a56 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -31,6 +31,7 @@ use uuid::Uuid; use super::lister::HdfsLister; use super::writer::HdfsWriter; use crate::raw::*; +use crate::services::hdfs::reader::HdfsReader; use crate::*; /// [Hadoop Distributed File System (HDFS™)](https://hadoop.apache.org/) support. @@ -245,10 +246,10 @@ unsafe impl Sync for HdfsBackend {} #[async_trait] impl Accessor for HdfsBackend { - type Reader = oio::FuturesReader; + type Reader = HdfsReader; type Writer = HdfsWriter; type Lister = Option; - type BlockingReader = oio::StdReader; + type BlockingReader = HdfsReader; type BlockingWriter = HdfsWriter; type BlockingLister = Option; @@ -260,7 +261,6 @@ impl Accessor for HdfsBackend { stat: true, read: true, - read_can_seek: true, write: true, write_can_append: self.enable_append, @@ -309,17 +309,27 @@ impl Accessor for HdfsBackend { async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = build_rooted_abs_path(&self.root, path); - let f = self - .client - .open_file() - .read(true) - .async_open(&p) - .await - .map_err(new_std_io_error)?; - - let r = oio::FuturesReader::new(f); - - Ok((RpRead::new(), r)) + let client = self.client.clone(); + let f = match tokio::runtime::Handle::try_current() { + Ok(runtime) => runtime + .spawn_blocking(move || { + client + .open_file() + .read(true) + .open(&p) + .map_err(new_std_io_error) + }) + .await + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "tokio spawn io task failed").set_source(err) + })?, + Err(_) => Err(Error::new( + ErrorKind::Unexpected, + "no tokio runtime found, failed to run io task", + )), + }?; + + Ok((RpRead::new(), HdfsReader::new(f))) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -508,9 +518,7 @@ impl Accessor for HdfsBackend { .open(&p) .map_err(new_std_io_error)?; - let r = oio::StdReader::new(f); - - Ok((RpRead::new(), r)) + Ok((RpRead::new(), HdfsReader::new(f))) } fn blocking_write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { diff --git a/core/src/services/hdfs/mod.rs b/core/src/services/hdfs/mod.rs index f5d6506fb4c6..afa3c200a7c3 100644 --- a/core/src/services/hdfs/mod.rs +++ b/core/src/services/hdfs/mod.rs @@ -19,4 +19,5 @@ mod backend; pub use backend::HdfsBuilder as Hdfs; pub use backend::HdfsConfig; mod lister; +mod reader; mod writer; diff --git a/core/src/services/hdfs/reader.rs b/core/src/services/hdfs/reader.rs new file mode 100644 index 000000000000..63b17a05587c --- /dev/null +++ b/core/src/services/hdfs/reader.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use hdrs::File; +use tokio::io::ReadBuf; + +use crate::raw::*; +use crate::Error; +use crate::ErrorKind; + +pub struct HdfsReader { + f: Arc, +} + +impl HdfsReader { + pub fn new(f: File) -> Self { + Self { f: Arc::new(f) } + } +} + +impl oio::Read for HdfsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let r = Self { f: self.f.clone() }; + match tokio::runtime::Handle::try_current() { + Ok(runtime) => runtime + .spawn_blocking(move || oio::BlockingRead::read_at(&r, offset, limit)) + .await + .map_err(|err| { + Error::new(ErrorKind::Unexpected, "tokio spawn io task failed").set_source(err) + })?, + Err(_) => Err(Error::new( + ErrorKind::Unexpected, + "no tokio runtime found, failed to run io task", + )), + } + } +} + +impl oio::BlockingRead for HdfsReader { + fn read_at(&self, mut offset: u64, limit: usize) -> crate::Result { + let mut bs = Vec::with_capacity(limit); + + let buf = bs.spare_capacity_mut(); + let mut read_buf: ReadBuf = ReadBuf::uninit(buf); + + // SAFETY: Read at most `size` bytes into `read_buf`. + unsafe { + read_buf.assume_init(limit); + } + + loop { + // If the buffer is full, we are done. + if read_buf.initialize_unfilled().is_empty() { + break; + } + let n = self + .f + .read_at(read_buf.initialize_unfilled(), offset) + .map_err(new_std_io_error)?; + if n == 0 { + break; + } + read_buf.advance(n); + offset += n as u64; + } + + // Safety: We make sure that bs contains `n` more bytes. + let filled = read_buf.filled().len(); + unsafe { bs.set_len(filled) } + Ok(oio::Buffer::from(bs)) + } +} diff --git a/core/src/services/hdfs/writer.rs b/core/src/services/hdfs/writer.rs index be1fc78e14d8..4fbc55bc84f2 100644 --- a/core/src/services/hdfs/writer.rs +++ b/core/src/services/hdfs/writer.rs @@ -16,11 +16,9 @@ // under the License. use std::io::Write; - use std::sync::Arc; use bytes::Bytes; - use futures::AsyncWriteExt; use crate::raw::*; diff --git a/core/src/services/hdfs_native/reader.rs b/core/src/services/hdfs_native/reader.rs index 9ae5ed020a9c..f54697236604 100644 --- a/core/src/services/hdfs_native/reader.rs +++ b/core/src/services/hdfs_native/reader.rs @@ -15,13 +15,9 @@ // specific language governing permissions and limitations // under the License. -use std::io::SeekFrom; - -use bytes::Bytes; use hdfs_native::file::FileReader; -use crate::raw::oio::Read; -use crate::*; +use crate::raw::oio; pub struct HdfsNativeReader { _f: FileReader, @@ -33,19 +29,8 @@ impl HdfsNativeReader { } } -impl Read for HdfsNativeReader { - async fn read(&mut self, limit: usize) -> Result { - let _ = limit; - +impl oio::Read for HdfsNativeReader { + async fn read_at(&self, _offset: u64, _limit: usize) -> crate::Result { todo!() } - - async fn seek(&mut self, pos: SeekFrom) -> Result { - let _ = pos; - - Err(Error::new( - ErrorKind::Unsupported, - "HdfsNativeReader doesn't support seeking", - )) - } } diff --git a/core/src/services/hdfs_native/writer.rs b/core/src/services/hdfs_native/writer.rs index 75ad0aeaf620..36648097cc99 100644 --- a/core/src/services/hdfs_native/writer.rs +++ b/core/src/services/hdfs_native/writer.rs @@ -16,11 +16,9 @@ // under the License. use bytes::Bytes; - use hdfs_native::file::FileWriter; use crate::raw::oio; - use crate::*; pub struct HdfsNativeWriter { diff --git a/core/src/services/http/backend.rs b/core/src/services/http/backend.rs index daf704735432..cd8c0c6e4050 100644 --- a/core/src/services/http/backend.rs +++ b/core/src/services/http/backend.rs @@ -31,6 +31,7 @@ use serde::Deserialize; use super::error::parse_error; use crate::raw::*; +use crate::services::http::reader::HttpReader; use crate::*; /// Config for Http service support. @@ -223,7 +224,7 @@ impl Debug for HttpBackend { #[async_trait] impl Accessor for HttpBackend { - type Reader = IncomingAsyncBody; + type Reader = HttpReader; type Writer = (); type Lister = (); type BlockingReader = (); @@ -240,8 +241,7 @@ impl Accessor for HttpBackend { stat_with_if_none_match: true, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, @@ -273,30 +273,17 @@ impl Accessor for HttpBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.http_get(path, &args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok((RpRead::default(), HttpReader::new(self.clone(), path, args))) } } impl HttpBackend { - async fn http_get(&self, path: &str, args: &OpRead) -> Result> { + pub async fn http_get( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&p)); @@ -315,8 +302,8 @@ impl HttpBackend { req = req.header(header::AUTHORIZATION, auth.clone()) } - if !args.range().is_full() { - req = req.header(header::RANGE, args.range().to_header()); + if !range.is_full() { + req = req.header(header::RANGE, range.to_header()); } let req = req @@ -326,7 +313,7 @@ impl HttpBackend { self.client.send(req).await } - async fn http_head(&self, path: &str, args: &OpStat) -> Result> { + async fn http_head(&self, path: &str, args: &OpStat) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&p)); @@ -352,199 +339,3 @@ impl HttpBackend { self.client.send(req).await } } - -#[cfg(test)] -mod tests { - use anyhow::Result; - use wiremock::matchers::basic_auth; - use wiremock::matchers::bearer_token; - use wiremock::matchers::headers; - use wiremock::matchers::method; - use wiremock::matchers::path; - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - - use super::*; - use crate::Operator; - - #[tokio::test] - async fn test_read() -> Result<()> { - let _ = tracing_subscriber::fmt().with_test_writer().try_init(); - - let mock_server = MockServer::start().await; - Mock::given(method("GET")) - .and(path("/hello")) - .respond_with( - ResponseTemplate::new(200) - .insert_header("content-length", "13") - .set_body_string("Hello, World!"), - ) - .mount(&mock_server) - .await; - Mock::given(method("HEAD")) - .and(path("/hello")) - .respond_with(ResponseTemplate::new(200).insert_header("content-length", "13")) - .mount(&mock_server) - .await; - - let mut builder = HttpBuilder::default(); - builder.endpoint(&mock_server.uri()); - builder.root("/"); - let op = Operator::new(builder)?.finish(); - - let bs = op.read("hello").await?; - - assert_eq!(bs, b"Hello, World!"); - Ok(()) - } - - #[tokio::test] - async fn test_read_via_basic_auth() -> Result<()> { - let _ = tracing_subscriber::fmt().with_test_writer().try_init(); - - let (username, password) = ("your_username", "your_password"); - - let mock_server = MockServer::start().await; - Mock::given(method("GET")) - .and(path("/hello")) - .and(basic_auth(username, password)) - .respond_with( - ResponseTemplate::new(200) - .insert_header("content-length", "13") - .set_body_string("Hello, World!"), - ) - .mount(&mock_server) - .await; - Mock::given(method("HEAD")) - .and(path("/hello")) - .and(basic_auth(username, password)) - .respond_with(ResponseTemplate::new(200).insert_header("content-length", "13")) - .mount(&mock_server) - .await; - - let mut builder = HttpBuilder::default(); - builder.endpoint(&mock_server.uri()); - builder.root("/"); - builder.username(username).password(password); - let op = Operator::new(builder)?.finish(); - - let bs = op.read("hello").await?; - - assert_eq!(bs, b"Hello, World!"); - Ok(()) - } - - #[tokio::test] - async fn test_read_via_bearer_auth() -> Result<()> { - let _ = tracing_subscriber::fmt().with_test_writer().try_init(); - - let token = "your_token"; - - let mock_server = MockServer::start().await; - Mock::given(method("GET")) - .and(path("/hello")) - .and(bearer_token(token)) - .respond_with( - ResponseTemplate::new(200) - .insert_header("content-length", "13") - .set_body_string("Hello, World!"), - ) - .mount(&mock_server) - .await; - Mock::given(method("HEAD")) - .and(path("/hello")) - .and(bearer_token(token)) - .respond_with(ResponseTemplate::new(200).insert_header("content-length", "13")) - .mount(&mock_server) - .await; - - let mut builder = HttpBuilder::default(); - builder.endpoint(&mock_server.uri()); - builder.root("/"); - builder.token(token); - let op = Operator::new(builder)?.finish(); - - let bs = op.read("hello").await?; - - assert_eq!(bs, b"Hello, World!"); - Ok(()) - } - - #[tokio::test] - async fn test_stat() -> Result<()> { - let _ = tracing_subscriber::fmt().with_test_writer().try_init(); - - let mock_server = MockServer::start().await; - Mock::given(method("HEAD")) - .and(path("/hello")) - .respond_with(ResponseTemplate::new(200).insert_header("content-length", "128")) - .mount(&mock_server) - .await; - - let mut builder = HttpBuilder::default(); - builder.endpoint(&mock_server.uri()); - builder.root("/"); - let op = Operator::new(builder)?.finish(); - let bs = op.stat("hello").await?; - - assert_eq!(bs.mode(), EntryMode::FILE); - assert_eq!(bs.content_length(), 128); - Ok(()) - } - - #[tokio::test] - async fn test_read_with() -> Result<()> { - let _ = tracing_subscriber::fmt().with_test_writer().try_init(); - - let mock_server = MockServer::start().await; - Mock::given(method("GET")) - .and(path("/hello")) - .and(headers("if-none-match", vec!["*"])) - .respond_with( - ResponseTemplate::new(200) - .insert_header("content-length", "13") - .set_body_string("Hello, World!"), - ) - .mount(&mock_server) - .await; - Mock::given(method("HEAD")) - .and(path("/hello")) - .respond_with(ResponseTemplate::new(200).insert_header("content-length", "13")) - .mount(&mock_server) - .await; - - let mut builder = HttpBuilder::default(); - builder.endpoint(&mock_server.uri()); - builder.root("/"); - let op = Operator::new(builder)?.finish(); - - let match_bs = op.read_with("hello").if_none_match("*").await?; - assert_eq!(match_bs, b"Hello, World!"); - - Ok(()) - } - - #[tokio::test] - async fn test_stat_with() -> Result<()> { - let _ = tracing_subscriber::fmt().with_test_writer().try_init(); - - let mock_server = MockServer::start().await; - Mock::given(method("HEAD")) - .and(path("/hello")) - .and(headers("if-none-match", vec!["*"])) - .respond_with(ResponseTemplate::new(200).insert_header("content-length", "128")) - .mount(&mock_server) - .await; - - let mut builder = HttpBuilder::default(); - builder.endpoint(&mock_server.uri()); - builder.root("/"); - let op = Operator::new(builder)?.finish(); - let bs = op.stat_with("hello").if_none_match("*").await?; - - assert_eq!(bs.mode(), EntryMode::FILE); - assert_eq!(bs.content_length(), 128); - Ok(()) - } -} diff --git a/core/src/services/http/error.rs b/core/src/services/http/error.rs index c8afa09a963f..451f89d10cd1 100644 --- a/core/src/services/http/error.rs +++ b/core/src/services/http/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,9 +25,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/http/mod.rs b/core/src/services/http/mod.rs index 7424d97c2b9b..4bab91f879ae 100644 --- a/core/src/services/http/mod.rs +++ b/core/src/services/http/mod.rs @@ -20,3 +20,4 @@ pub use backend::HttpBuilder as Http; pub use backend::HttpConfig; mod error; +mod reader; diff --git a/core/src/services/http/reader.rs b/core/src/services/http/reader.rs new file mode 100644 index 000000000000..d38dc4b2a2cf --- /dev/null +++ b/core/src/services/http/reader.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use http::StatusCode; + +use super::error::parse_error; +use crate::raw::*; +use crate::services::http::backend::HttpBackend; + +pub struct HttpReader { + core: HttpBackend, + + path: String, + op: OpRead, +} + +impl HttpReader { + pub fn new(core: HttpBackend, path: &str, op: OpRead) -> Self { + HttpReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for HttpReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.http_get(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/huggingface/backend.rs b/core/src/services/huggingface/backend.rs index defbc7b6086e..380438ceeca0 100644 --- a/core/src/services/huggingface/backend.rs +++ b/core/src/services/huggingface/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -30,6 +31,7 @@ use super::core::HuggingfaceStatus; use super::error::parse_error; use super::lister::HuggingfaceLister; use crate::raw::*; +use crate::services::huggingface::reader::HuggingfaceReader; use crate::*; /// Configuration for Huggingface service support. @@ -243,7 +245,7 @@ pub struct HuggingfaceBackend { #[async_trait] impl Accessor for HuggingfaceBackend { - type Reader = IncomingAsyncBody; + type Reader = HuggingfaceReader; type Writer = (); type Lister = oio::PageLister; type BlockingReader = (); @@ -257,8 +259,6 @@ impl Accessor for HuggingfaceBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, list: true, list_with_recursive: true, @@ -281,10 +281,10 @@ impl Accessor for HuggingfaceBackend { match status { StatusCode::OK => { let mut meta = parse_into_metadata(path, resp.headers())?; - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let decoded_response = serde_json::from_slice::>(&bs) - .map_err(new_json_deserialize_error)?; + let decoded_response: Vec = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; // NOTE: if the file is not found, the server will return 200 with an empty array if let Some(status) = decoded_response.first() { @@ -312,25 +312,10 @@ impl Accessor for HuggingfaceBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.hf_resolve(path, args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + HuggingfaceReader::new(self.core.clone(), path, args), + )) } async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { diff --git a/core/src/services/huggingface/core.rs b/core/src/services/huggingface/core.rs index 768b416006d6..ed9d271c160b 100644 --- a/core/src/services/huggingface/core.rs +++ b/core/src/services/huggingface/core.rs @@ -49,7 +49,7 @@ impl Debug for HuggingfaceCore { } impl HuggingfaceCore { - pub async fn hf_path_info(&self, path: &str) -> Result> { + pub async fn hf_path_info(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -83,11 +83,7 @@ impl HuggingfaceCore { self.client.send(req).await } - pub async fn hf_list( - &self, - path: &str, - recursive: bool, - ) -> Result> { + pub async fn hf_list(&self, path: &str, recursive: bool) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -125,7 +121,12 @@ impl HuggingfaceCore { self.client.send(req).await } - pub async fn hf_resolve(&self, path: &str, arg: OpRead) -> Result> { + pub async fn hf_resolve( + &self, + path: &str, + range: BytesRange, + _args: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -152,9 +153,8 @@ impl HuggingfaceCore { req = req.header(header::AUTHORIZATION, auth_header_content); } - let range = arg.range(); if !range.is_full() { - req = req.header(header::RANGE, &range.to_header()); + req = req.header(header::RANGE, range.to_header()); } let req = req diff --git a/core/src/services/huggingface/error.rs b/core/src/services/huggingface/error.rs index 4e5361ece07d..c17cd2ee0d3f 100644 --- a/core/src/services/huggingface/error.rs +++ b/core/src/services/huggingface/error.rs @@ -17,6 +17,7 @@ use std::fmt::Debug; +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -39,9 +40,9 @@ impl Debug for HuggingfaceError { } } -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/huggingface/lister.rs b/core/src/services/huggingface/lister.rs index 836b62481a41..8c2189c8b0a6 100644 --- a/core/src/services/huggingface/lister.rs +++ b/core/src/services/huggingface/lister.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use bytes::Buf; + use super::core::HuggingfaceCore; use super::core::HuggingfaceStatus; use super::error::parse_error; @@ -49,9 +51,9 @@ impl oio::PageList for HuggingfaceLister { return Err(error); } - let bytes = response.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::>(&bytes) - .map_err(new_json_deserialize_error)?; + let bytes = response.into_body(); + let decoded_response: Vec = + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; ctx.done = true; diff --git a/core/src/services/huggingface/mod.rs b/core/src/services/huggingface/mod.rs index 3a692fa45e53..73149c15ad3f 100644 --- a/core/src/services/huggingface/mod.rs +++ b/core/src/services/huggingface/mod.rs @@ -22,3 +22,4 @@ pub use backend::HuggingfaceConfig; mod core; mod error; mod lister; +mod reader; diff --git a/core/src/services/huggingface/reader.rs b/core/src/services/huggingface/reader.rs new file mode 100644 index 000000000000..d7caca047748 --- /dev/null +++ b/core/src/services/huggingface/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::HuggingfaceCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct HuggingfaceReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl HuggingfaceReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + HuggingfaceReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for HuggingfaceReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.hf_resolve(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/icloud/backend.rs b/core/src/services/icloud/backend.rs index 8f6628ae0e0f..188147b76934 100644 --- a/core/src/services/icloud/backend.rs +++ b/core/src/services/icloud/backend.rs @@ -21,12 +21,12 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; -use http::StatusCode; use serde::Deserialize; use tokio::sync::Mutex; use super::core::*; use crate::raw::*; +use crate::services::icloud::reader::IcloudReader; use crate::*; /// Config for icloud services support. @@ -267,7 +267,7 @@ pub struct IcloudBackend { #[async_trait] impl Accessor for IcloudBackend { - type Reader = IncomingAsyncBody; + type Reader = IcloudReader; type BlockingReader = (); type Writer = (); type BlockingWriter = (); @@ -312,23 +312,9 @@ impl Accessor for IcloudBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.read(path, &args).await?; - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + IcloudReader::new(self.core.clone(), path, args), + )) } } diff --git a/core/src/services/icloud/core.rs b/core/src/services/icloud/core.rs index f98006563172..26c2aed890d9 100644 --- a/core/src/services/icloud/core.rs +++ b/core/src/services/icloud/core.rs @@ -78,6 +78,12 @@ pub struct SessionData { docws_url: String, } +impl Default for SessionData { + fn default() -> Self { + Self::new() + } +} + impl SessionData { pub fn new() -> SessionData { Self { @@ -192,9 +198,9 @@ impl IcloudSigner { // Updata SessionData cookies.We need obtain `X-APPLE-WEBAUTH-USER` cookie to get file. self.update(&resp)?; - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let auth_info: IcloudWebservicesResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; // Check if we have extra challenge to take. if auth_info.hsa_challenge_required && !auth_info.hsa_trusted_browser { @@ -269,7 +275,7 @@ impl IcloudSigner { } /// Update signer's data after request sent out. - fn update(&mut self, resp: &Response) -> Result<()> { + fn update(&mut self, resp: &Response) -> Result<()> { if let Some(account_country) = parse_header_to_str(resp.headers(), ACCOUNT_COUNTRY_HEADER)? { self.data.account_country = Some(account_country.to_string()); @@ -306,10 +312,7 @@ impl IcloudSigner { /// - Init the signer if it's not initiated. /// - Sign the request. /// - Update the session data if needed. - pub async fn send( - &mut self, - mut req: Request, - ) -> Result> { + pub async fn send(&mut self, mut req: Request) -> Result> { self.sign(&mut req)?; let resp = self.client.send(req).await?; @@ -359,7 +362,7 @@ impl IcloudCore { return Err(parse_error(resp).await?); } - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let drive_node: Vec = serde_json::from_slice(body.chunk()).map_err(new_json_deserialize_error)?; Ok(drive_node[0].clone()) @@ -369,8 +372,9 @@ impl IcloudCore { &self, id: &str, zone: &str, + range: BytesRange, args: OpRead, - ) -> Result> { + ) -> Result> { let mut signer = self.signer.lock().await; let uri = format!( @@ -389,7 +393,7 @@ impl IcloudCore { return Err(parse_error(resp).await?); } - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let object: IcloudObject = serde_json::from_slice(body.chunk()).map_err(new_json_deserialize_error)?; @@ -401,8 +405,7 @@ impl IcloudCore { req = req.header(IF_MATCH, if_match); } - let range = args.range(); - if !range.is_full() { + if range.is_full() { req = req.header(header::RANGE, range.to_header()) } if let Some(if_none_match) = args.if_none_match() { @@ -418,7 +421,12 @@ impl IcloudCore { Ok(resp) } - pub async fn read(&self, path: &str, args: &OpRead) -> Result> { + pub async fn read( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let path = build_rooted_abs_path(&self.root, path); let base = get_basename(&path); @@ -429,7 +437,7 @@ impl IcloudCore { if let Some(docwsid) = path_id.strip_prefix("FILE::com.apple.CloudDocs::") { Ok(self - .get_file(docwsid, "com.apple.CloudDocs", args.clone()) + .get_file(docwsid, "com.apple.CloudDocs", range, args.clone()) .await?) } else { Err(Error::new( @@ -516,7 +524,7 @@ impl PathQuery for IcloudPathQuery { return Err(parse_error(resp).await?); } - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let root: Vec = serde_json::from_slice(body.chunk()).map_err(new_json_deserialize_error)?; @@ -557,16 +565,16 @@ impl PathQuery for IcloudPathQuery { return Err(parse_error(resp).await?); } - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let create_folder: IcloudCreateFolder = serde_json::from_slice(body.chunk()).map_err(new_json_deserialize_error)?; Ok(create_folder.destination_drivews_id) } } -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let mut kind = match parts.status.as_u16() { 421 | 450 | 500 => ErrorKind::NotFound, @@ -711,9 +719,10 @@ pub struct IcloudCreateFolder { #[cfg(test)] mod tests { + use pretty_assertions::assert_eq; + use super::IcloudRoot; use super::IcloudWebservicesResponse; - use pretty_assertions::assert_eq; #[test] fn test_parse_icloud_drive_root_json() { diff --git a/core/src/services/icloud/mod.rs b/core/src/services/icloud/mod.rs index d76c934ddde2..9366484735e8 100644 --- a/core/src/services/icloud/mod.rs +++ b/core/src/services/icloud/mod.rs @@ -19,3 +19,4 @@ mod backend; pub use backend::IcloudBuilder as Icloud; mod core; +mod reader; diff --git a/core/src/services/icloud/reader.rs b/core/src/services/icloud/reader.rs new file mode 100644 index 000000000000..f001cb001a78 --- /dev/null +++ b/core/src/services/icloud/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::parse_error; +use super::core::IcloudCore; +use crate::raw::*; + +pub struct IcloudReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl IcloudReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + IcloudReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for IcloudReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.read(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/ipfs/backend.rs b/core/src/services/ipfs/backend.rs index 964a1cef1923..d489832174b7 100644 --- a/core/src/services/ipfs/backend.rs +++ b/core/src/services/ipfs/backend.rs @@ -30,6 +30,7 @@ use prost::Message; use super::error::parse_error; use super::ipld::PBNode; use crate::raw::*; +use crate::services::ipfs::reader::IpfsReader; use crate::*; /// IPFS file system support based on [IPFS HTTP Gateway](https://docs.ipfs.tech/concepts/ipfs-gateway/). @@ -161,7 +162,7 @@ impl Debug for IpfsBackend { #[async_trait] impl Accessor for IpfsBackend { - type Reader = IncomingAsyncBody; + type Reader = IpfsReader; type Writer = (); type Lister = oio::PageLister; type BlockingReader = (); @@ -176,8 +177,6 @@ impl Accessor for IpfsBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, list: true, @@ -339,14 +338,7 @@ impl Accessor for IpfsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.ipfs_get(path, args.range()).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), - _ => Err(parse_error(resp).await?), - } + Ok((RpRead::default(), IpfsReader::new(self.clone(), path, args))) } async fn list(&self, path: &str, _: OpList) -> Result<(RpList, Self::Lister)> { @@ -356,7 +348,7 @@ impl Accessor for IpfsBackend { } impl IpfsBackend { - async fn ipfs_get(&self, path: &str, range: BytesRange) -> Result> { + pub async fn ipfs_get(&self, path: &str, range: BytesRange) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&p)); @@ -374,7 +366,7 @@ impl IpfsBackend { self.client.send(req).await } - async fn ipfs_head(&self, path: &str) -> Result> { + async fn ipfs_head(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&p)); @@ -388,7 +380,7 @@ impl IpfsBackend { self.client.send(req).await } - async fn ipfs_list(&self, path: &str) -> Result> { + async fn ipfs_list(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&p)); @@ -431,7 +423,7 @@ impl oio::PageList for DirStream { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let pb_node = PBNode::decode(bs).map_err(|e| { Error::new(ErrorKind::Unexpected, "deserialize protobuf from response").set_source(e) })?; diff --git a/core/src/services/ipfs/error.rs b/core/src/services/ipfs/error.rs index 1470b209899e..99177e9b388c 100644 --- a/core/src/services/ipfs/error.rs +++ b/core/src/services/ipfs/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,9 +25,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/ipfs/mod.rs b/core/src/services/ipfs/mod.rs index 4bd46822fbb3..85aeb674f001 100644 --- a/core/src/services/ipfs/mod.rs +++ b/core/src/services/ipfs/mod.rs @@ -20,3 +20,4 @@ pub use backend::IpfsBuilder as Ipfs; mod error; mod ipld; +mod reader; diff --git a/core/src/services/ipfs/reader.rs b/core/src/services/ipfs/reader.rs new file mode 100644 index 000000000000..75b1cd1c197b --- /dev/null +++ b/core/src/services/ipfs/reader.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use http::StatusCode; + +use super::error::parse_error; +use crate::raw::*; +use crate::services::ipfs::backend::IpfsBackend; + +pub struct IpfsReader { + core: IpfsBackend, + + path: String, + _op: OpRead, +} + +impl IpfsReader { + pub fn new(core: IpfsBackend, path: &str, op: OpRead) -> Self { + IpfsReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for IpfsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.ipfs_get(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/ipmfs/backend.rs b/core/src/services/ipmfs/backend.rs index 4d5f0c6d64c2..7a5674d8d31c 100644 --- a/core/src/services/ipmfs/backend.rs +++ b/core/src/services/ipmfs/backend.rs @@ -21,6 +21,7 @@ use std::str; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use http::Request; use http::Response; @@ -31,6 +32,7 @@ use super::error::parse_error; use super::lister::IpmfsLister; use super::writer::IpmfsWriter; use crate::raw::*; +use crate::services::ipmfs::reader::IpmfsReader; use crate::*; /// IPFS Mutable File System (IPMFS) backend. @@ -63,7 +65,7 @@ impl IpmfsBackend { #[async_trait] impl Accessor for IpmfsBackend { - type Reader = IncomingAsyncBody; + type Reader = IpmfsReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; type BlockingReader = (); @@ -78,8 +80,6 @@ impl Accessor for IpmfsBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, delete: true, @@ -98,10 +98,7 @@ impl Accessor for IpmfsBackend { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCreateDir::default()) - } + StatusCode::CREATED | StatusCode::OK => Ok(RpCreateDir::default()), _ => Err(parse_error(resp).await?), } } @@ -118,10 +115,10 @@ impl Accessor for IpmfsBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let res: IpfsStatResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let mode = match res.file_type.as_str() { "file" => EntryMode::FILE, @@ -139,14 +136,10 @@ impl Accessor for IpmfsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.ipmfs_read(path, args.range()).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => Ok((RpRead::new(), resp.into_body())), - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + IpmfsReader::new(self.clone(), path, args), + )) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -162,10 +155,7 @@ impl Accessor for IpmfsBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpDelete::default()) - } + StatusCode::OK => Ok(RpDelete::default()), _ => Err(parse_error(resp).await?), } } @@ -177,7 +167,7 @@ impl Accessor for IpmfsBackend { } impl IpmfsBackend { - async fn ipmfs_stat(&self, path: &str) -> Result> { + async fn ipmfs_stat(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!( @@ -194,11 +184,7 @@ impl IpmfsBackend { self.client.send(req).await } - async fn ipmfs_read( - &self, - path: &str, - range: BytesRange, - ) -> Result> { + pub async fn ipmfs_read(&self, path: &str, range: BytesRange) -> Result> { let p = build_rooted_abs_path(&self.root, path); let mut url = format!( @@ -207,9 +193,7 @@ impl IpmfsBackend { percent_encode_path(&p) ); - if let Some(offset) = range.offset() { - write!(url, "&offset={offset}").expect("write into string must succeed") - } + write!(url, "&offset={}", range.offset()).expect("write into string must succeed"); if let Some(count) = range.size() { write!(url, "&count={count}").expect("write into string must succeed") } @@ -222,7 +206,7 @@ impl IpmfsBackend { self.client.send(req).await } - async fn ipmfs_rm(&self, path: &str) -> Result> { + async fn ipmfs_rm(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!( @@ -239,7 +223,7 @@ impl IpmfsBackend { self.client.send(req).await } - pub(crate) async fn ipmfs_ls(&self, path: &str) -> Result> { + pub(crate) async fn ipmfs_ls(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!( @@ -256,7 +240,7 @@ impl IpmfsBackend { self.client.send(req).await } - async fn ipmfs_mkdir(&self, path: &str) -> Result> { + async fn ipmfs_mkdir(&self, path: &str) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!( @@ -274,11 +258,7 @@ impl IpmfsBackend { } /// Support write from reader. - pub async fn ipmfs_write( - &self, - path: &str, - body: Bytes, - ) -> Result> { + pub async fn ipmfs_write(&self, path: &str, body: Bytes) -> Result> { let p = build_rooted_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/ipmfs/error.rs b/core/src/services/ipmfs/error.rs index df6e7f4752d7..ee07566e1bdf 100644 --- a/core/src/services/ipmfs/error.rs +++ b/core/src/services/ipmfs/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -45,9 +46,9 @@ struct IpfsError { /// > (if no error, check the daemon logs). /// /// ref: https://docs.ipfs.tech/reference/kubo/rpc/#http-status-codes -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let ipfs_error = de::from_slice::(&bs).ok(); diff --git a/core/src/services/ipmfs/lister.rs b/core/src/services/ipmfs/lister.rs index 81bff72dca52..0ba252da6255 100644 --- a/core/src/services/ipmfs/lister.rs +++ b/core/src/services/ipmfs/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use serde::Deserialize; @@ -51,9 +52,9 @@ impl oio::PageList for IpmfsLister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let entries_body: IpfsLsResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; // Mark dir stream has been consumed. ctx.done = true; diff --git a/core/src/services/ipmfs/mod.rs b/core/src/services/ipmfs/mod.rs index 2a7ce5c0ad71..2e511206ca52 100644 --- a/core/src/services/ipmfs/mod.rs +++ b/core/src/services/ipmfs/mod.rs @@ -21,4 +21,5 @@ pub use builder::IpmfsBuilder as Ipmfs; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/ipmfs/reader.rs b/core/src/services/ipmfs/reader.rs new file mode 100644 index 000000000000..3aaac81a3e7d --- /dev/null +++ b/core/src/services/ipmfs/reader.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use http::StatusCode; + +use super::error::parse_error; +use crate::raw::*; +use crate::services::ipmfs::backend::IpmfsBackend; + +pub struct IpmfsReader { + core: IpmfsBackend, + + path: String, + _op: OpRead, +} + +impl IpmfsReader { + pub fn new(core: IpmfsBackend, path: &str, op: OpRead) -> Self { + IpmfsReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for IpmfsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.ipmfs_read(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/ipmfs/writer.rs b/core/src/services/ipmfs/writer.rs index 53f79b840c4f..f27fe5c77b3a 100644 --- a/core/src/services/ipmfs/writer.rs +++ b/core/src/services/ipmfs/writer.rs @@ -20,7 +20,6 @@ use http::StatusCode; use super::backend::IpmfsBackend; use super::error::parse_error; - use crate::raw::*; use crate::*; @@ -43,10 +42,7 @@ impl oio::OneShotWrite for IpmfsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/koofr/backend.rs b/core/src/services/koofr/backend.rs index a7f8fc35901c..859be7dadef9 100644 --- a/core/src/services/koofr/backend.rs +++ b/core/src/services/koofr/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -35,6 +36,7 @@ use super::lister::KoofrLister; use super::writer::KoofrWriter; use super::writer::KoofrWriters; use crate::raw::*; +use crate::services::koofr::reader::KoofrReader; use crate::*; /// Config for backblaze Koofr services support. @@ -235,7 +237,7 @@ pub struct KoofrBackend { #[async_trait] impl Accessor for KoofrBackend { - type Reader = IncomingAsyncBody; + type Reader = KoofrReader; type Writer = KoofrWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -286,9 +288,10 @@ impl Accessor for KoofrBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let file: File = serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + let file: File = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let mode = if file.ty == "dir" { EntryMode::DIR @@ -308,22 +311,11 @@ impl Accessor for KoofrBackend { } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.get(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + KoofrReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, _args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -372,11 +364,7 @@ impl Accessor for KoofrBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -401,11 +389,7 @@ impl Accessor for KoofrBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(RpRename::default()) - } + StatusCode::OK => Ok(RpRename::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/koofr/core.rs b/core/src/services/koofr/core.rs index 441c3ffb0142..dc6372d2cae9 100644 --- a/core/src/services/koofr/core.rs +++ b/core/src/services/koofr/core.rs @@ -20,6 +20,7 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::sync::Arc; +use bytes::Buf; use bytes::Bytes; use http::header; use http::request; @@ -67,7 +68,7 @@ impl Debug for KoofrCore { impl KoofrCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -90,10 +91,10 @@ impl KoofrCore { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: MountsResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; for mount in resp.mounts { if mount.is_primary { @@ -137,9 +138,9 @@ impl KoofrCore { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: TokenResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; signer.token = resp.token; @@ -220,7 +221,7 @@ impl KoofrCore { } } - pub async fn info(&self, path: &str) -> Result> { + pub async fn info(&self, path: &str) -> Result> { let mount_id = self.get_mount_id().await?; let url = format!( @@ -241,7 +242,7 @@ impl KoofrCore { self.send(req).await } - pub async fn get(&self, path: &str) -> Result> { + pub async fn get(&self, path: &str, range: BytesRange) -> Result> { let path = build_rooted_abs_path(&self.root, path); let mount_id = self.get_mount_id().await?; @@ -253,7 +254,7 @@ impl KoofrCore { percent_encode_path(&path) ); - let req = Request::get(url); + let req = Request::get(url).header(header::RANGE, range.to_header()); let req = self.sign(req).await?; @@ -264,7 +265,7 @@ impl KoofrCore { self.send(req).await } - pub async fn put(&self, path: &str, bs: Bytes) -> Result> { + pub async fn put(&self, path: &str, bs: Bytes) -> Result> { let path = build_rooted_abs_path(&self.root, path); let filename = get_basename(&path); @@ -300,7 +301,7 @@ impl KoofrCore { self.send(req).await } - pub async fn remove(&self, path: &str) -> Result> { + pub async fn remove(&self, path: &str) -> Result> { let path = build_rooted_abs_path(&self.root, path); let mount_id = self.get_mount_id().await?; @@ -323,7 +324,7 @@ impl KoofrCore { self.send(req).await } - pub async fn copy(&self, from: &str, to: &str) -> Result> { + pub async fn copy(&self, from: &str, to: &str) -> Result> { let from = build_rooted_abs_path(&self.root, from); let to = build_rooted_abs_path(&self.root, to); @@ -355,7 +356,7 @@ impl KoofrCore { self.send(req).await } - pub async fn move_object(&self, from: &str, to: &str) -> Result> { + pub async fn move_object(&self, from: &str, to: &str) -> Result> { let from = build_rooted_abs_path(&self.root, from); let to = build_rooted_abs_path(&self.root, to); @@ -387,7 +388,7 @@ impl KoofrCore { self.send(req).await } - pub async fn list(&self, path: &str) -> Result> { + pub async fn list(&self, path: &str) -> Result> { let path = build_rooted_abs_path(&self.root, path); let mount_id = self.get_mount_id().await?; diff --git a/core/src/services/koofr/error.rs b/core/src/services/koofr/error.rs index c3104c781fd8..d3ee3ff3ae8c 100644 --- a/core/src/services/koofr/error.rs +++ b/core/src/services/koofr/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use crate::raw::*; @@ -23,9 +24,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status.as_u16() { 403 => (ErrorKind::PermissionDenied, false), @@ -53,7 +54,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -64,10 +64,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/koofr/lister.rs b/core/src/services/koofr/lister.rs index 8d56e65f5be8..fdb7487fe483 100644 --- a/core/src/services/koofr/lister.rs +++ b/core/src/services/koofr/lister.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use bytes::Buf; + use super::core::KoofrCore; use super::core::ListResponse; use super::error::parse_error; @@ -57,10 +59,10 @@ impl oio::PageList for KoofrLister { } } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let response: ListResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; for file in response.files { let path = build_abs_path(&normalize_root(&self.path), &file.name); diff --git a/core/src/services/koofr/mod.rs b/core/src/services/koofr/mod.rs index 445d69fbb82e..fce486b246b9 100644 --- a/core/src/services/koofr/mod.rs +++ b/core/src/services/koofr/mod.rs @@ -22,4 +22,5 @@ pub use backend::KoofrConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/koofr/reader.rs b/core/src/services/koofr/reader.rs new file mode 100644 index 000000000000..94c48ea93b20 --- /dev/null +++ b/core/src/services/koofr/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::KoofrCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct KoofrReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl KoofrReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + KoofrReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for KoofrReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.get(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/koofr/writer.rs b/core/src/services/koofr/writer.rs index 97c2dbdbc9b8..ff8039e8afd2 100644 --- a/core/src/services/koofr/writer.rs +++ b/core/src/services/koofr/writer.rs @@ -47,10 +47,7 @@ impl oio::OneShotWrite for KoofrWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::CREATED => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/libsql/backend.rs b/core/src/services/libsql/backend.rs index c000a9ddd2ec..2fbbf47606aa 100644 --- a/core/src/services/libsql/backend.rs +++ b/core/src/services/libsql/backend.rs @@ -20,6 +20,7 @@ use std::fmt::Debug; use std::str; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use hrana_client_proto::pipeline::ClientMsg; use hrana_client_proto::pipeline::Response; @@ -309,9 +310,9 @@ impl Adapter { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp: ServerMsg = serde_json::from_slice(&bs).map_err(|e| { + let resp: ServerMsg = serde_json::from_reader(bs.reader()).map_err(|e| { Error::new(ErrorKind::Unexpected, "deserialize json from response").set_source(e) })?; diff --git a/core/src/services/libsql/error.rs b/core/src/services/libsql/error.rs index a2a80a02ec10..59804a6e4fd5 100644 --- a/core/src/services/libsql/error.rs +++ b/core/src/services/libsql/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,9 +25,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/memory/backend.rs b/core/src/services/memory/backend.rs index 03c59ee297dc..47220e0db047 100644 --- a/core/src/services/memory/backend.rs +++ b/core/src/services/memory/backend.rs @@ -22,13 +22,11 @@ use std::sync::Arc; use std::sync::Mutex; use async_trait::async_trait; - -use crate::raw::adapters::typed_kv; -use crate::*; - use serde::Deserialize; use self::raw::ConfigDeserializer; +use crate::raw::adapters::typed_kv; +use crate::*; ///Config for memory. #[derive(Default, Deserialize)] diff --git a/core/src/services/mod.rs b/core/src/services/mod.rs index 71cd71b8084f..62acbb0cc6d6 100644 --- a/core/src/services/mod.rs +++ b/core/src/services/mod.rs @@ -85,9 +85,9 @@ pub use gridfs::Gridfs; #[cfg(feature = "services-hdfs")] mod hdfs; #[cfg(feature = "services-hdfs")] -pub use hdfs::Hdfs; +pub use self::hdfs::Hdfs; #[cfg(feature = "services-hdfs")] -pub use hdfs::HdfsConfig; +pub use self::hdfs::HdfsConfig; #[cfg(feature = "services-http")] mod http; diff --git a/core/src/services/obs/backend.rs b/core/src/services/obs/backend.rs index 91bfa8893de3..e3700eccbc81 100644 --- a/core/src/services/obs/backend.rs +++ b/core/src/services/obs/backend.rs @@ -32,6 +32,7 @@ use super::error::parse_error; use super::lister::ObsLister; use super::writer::ObsWriter; use crate::raw::*; +use crate::services::obs::reader::ObsReader; use crate::services::obs::writer::ObsWriters; use crate::*; @@ -248,7 +249,7 @@ pub struct ObsBackend { #[async_trait] impl Accessor for ObsBackend { - type Reader = IncomingAsyncBody; + type Reader = ObsReader; type Writer = ObsWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -266,8 +267,7 @@ impl Accessor for ObsBackend { stat_with_if_none_match: true, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, @@ -323,25 +323,10 @@ impl Accessor for ObsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.obs_get_object(path, &args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + ObsReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -380,10 +365,7 @@ impl Accessor for ObsBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -391,7 +373,10 @@ impl Accessor for ObsBackend { async fn presign(&self, path: &str, args: OpPresign) -> Result { let mut req = match args.operation() { PresignOperation::Stat(v) => self.core.obs_head_object_request(path, v)?, - PresignOperation::Read(v) => self.core.obs_get_object_request(path, v)?, + PresignOperation::Read(v) => { + self.core + .obs_get_object_request(path, BytesRange::default(), v)? + } PresignOperation::Write(v) => { self.core .obs_put_object_request(path, None, v, AsyncBody::Empty)? diff --git a/core/src/services/obs/core.rs b/core/src/services/obs/core.rs index 344f104561de..ca01ee4135c5 100644 --- a/core/src/services/obs/core.rs +++ b/core/src/services/obs/core.rs @@ -95,7 +95,7 @@ impl ObsCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } } @@ -104,16 +104,22 @@ impl ObsCore { pub async fn obs_get_object( &self, path: &str, + range: BytesRange, args: &OpRead, - ) -> Result> { - let mut req = self.obs_get_object_request(path, args)?; + ) -> Result> { + let mut req = self.obs_get_object_request(path, range, args)?; self.sign(&mut req).await?; self.send(req).await } - pub fn obs_get_object_request(&self, path: &str, args: &OpRead) -> Result> { + pub fn obs_get_object_request( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}", self.endpoint, percent_encode_path(&p)); @@ -124,8 +130,7 @@ impl ObsCore { req = req.header(IF_MATCH, if_match); } - let range = args.range(); - if !range.is_full() { + if range.is_full() { req = req.header(http::header::RANGE, range.to_header()) } @@ -173,7 +178,7 @@ impl ObsCore { &self, path: &str, args: &OpStat, - ) -> Result> { + ) -> Result> { let mut req = self.obs_head_object_request(path, args)?; self.sign(&mut req).await?; @@ -206,7 +211,7 @@ impl ObsCore { Ok(req) } - pub async fn obs_delete_object(&self, path: &str) -> Result> { + pub async fn obs_delete_object(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}", self.endpoint, percent_encode_path(&p)); @@ -258,11 +263,7 @@ impl ObsCore { Ok(req) } - pub async fn obs_copy_object( - &self, - from: &str, - to: &str, - ) -> Result> { + pub async fn obs_copy_object(&self, from: &str, to: &str) -> Result> { let source = build_abs_path(&self.root, from); let target = build_abs_path(&self.root, to); @@ -285,7 +286,7 @@ impl ObsCore { next_marker: &str, delimiter: &str, limit: Option, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut queries = vec![]; @@ -320,7 +321,7 @@ impl ObsCore { &self, path: &str, content_type: Option<&str>, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}?uploads", self.endpoint, percent_encode_path(&p)); @@ -344,7 +345,7 @@ impl ObsCore { part_number: usize, size: Option, body: AsyncBody, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -374,7 +375,7 @@ impl ObsCore { path: &str, upload_id: &str, parts: Vec, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( "{}/{}?uploadId={}", @@ -407,7 +408,7 @@ impl ObsCore { &self, path: &str, upload_id: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/obs/error.rs b/core/src/services/obs/error.rs index 0c2081312bc9..034def4d2aa4 100644 --- a/core/src/services/obs/error.rs +++ b/core/src/services/obs/error.rs @@ -38,9 +38,9 @@ struct ObsError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/obs/lister.rs b/core/src/services/obs/lister.rs index e38ebe75197f..819283a1b86b 100644 --- a/core/src/services/obs/lister.rs +++ b/core/src/services/obs/lister.rs @@ -58,7 +58,7 @@ impl oio::PageList for ObsLister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ListObjectsOutput = de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; diff --git a/core/src/services/obs/mod.rs b/core/src/services/obs/mod.rs index f8a78290f0b5..93eb1328daca 100644 --- a/core/src/services/obs/mod.rs +++ b/core/src/services/obs/mod.rs @@ -21,4 +21,5 @@ pub use backend::ObsBuilder as Obs; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/obs/reader.rs b/core/src/services/obs/reader.rs new file mode 100644 index 000000000000..fb220e2eeb9d --- /dev/null +++ b/core/src/services/obs/reader.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::ObsCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct ObsReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl ObsReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + ObsReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for ObsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .obs_get_object(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/obs/writer.rs b/core/src/services/obs/writer.rs index 7df2e7cbcd04..e738bd04f270 100644 --- a/core/src/services/obs/writer.rs +++ b/core/src/services/obs/writer.rs @@ -57,10 +57,7 @@ impl oio::MultipartWrite for ObsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -75,7 +72,7 @@ impl oio::MultipartWrite for ObsWriter { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: InitiateMultipartUploadResult = quick_xml::de::from_reader(bytes::Buf::reader(bs)) @@ -115,8 +112,6 @@ impl oio::MultipartWrite for ObsWriter { })? .to_string(); - resp.into_body().consume().await?; - Ok(MultipartPart { part_number, etag }) } _ => Err(parse_error(resp).await?), @@ -140,11 +135,7 @@ impl oio::MultipartWrite for ObsWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -157,10 +148,7 @@ impl oio::MultipartWrite for ObsWriter { match resp.status() { // Obs returns code 204 No Content if abort succeeds. // Reference: https://support.huaweicloud.com/intl/en-us/api-obs/obs_04_0103.html - StatusCode::NO_CONTENT => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::NO_CONTENT => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/onedrive/backend.rs b/core/src/services/onedrive/backend.rs index b63288c38c14..40f26967d044 100644 --- a/core/src/services/onedrive/backend.rs +++ b/core/src/services/onedrive/backend.rs @@ -18,6 +18,7 @@ use std::fmt::Debug; use async_trait::async_trait; +use bytes::Buf; use bytes::Bytes; use http::header; use http::Request; @@ -32,6 +33,7 @@ use super::graph_model::OnedriveGetItemBody; use super::lister::OnedriveLister; use super::writer::OneDriveWriter; use crate::raw::*; +use crate::services::onedrive::reader::OnedriveReader; use crate::*; #[derive(Clone)] @@ -62,7 +64,7 @@ impl Debug for OnedriveBackend { #[async_trait] impl Accessor for OnedriveBackend { - type Reader = IncomingAsyncBody; + type Reader = OnedriveReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; type BlockingReader = (); @@ -120,9 +122,9 @@ impl Accessor for OnedriveBackend { let status = resp.status(); if status.is_success() { - let bytes = resp.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::(&bytes) - .map_err(new_json_deserialize_error)?; + let bytes = resp.into_body(); + let decoded_response: OnedriveGetItemBody = + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; let entry_mode: EntryMode = match decoded_response.item_type { ItemType::Folder { .. } => EntryMode::DIR, @@ -149,22 +151,11 @@ impl Accessor for OnedriveBackend { } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.onedrive_get_content(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + OnedriveReader::new(self.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -199,7 +190,7 @@ impl Accessor for OnedriveBackend { impl OnedriveBackend { pub(crate) const BASE_URL: &'static str = "https://graph.microsoft.com/v1.0/me"; - async fn onedrive_get_stat(&self, path: &str) -> Result> { + async fn onedrive_get_stat(&self, path: &str) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url: String = format!( "https://graph.microsoft.com/v1.0/me/drive/root:{}{}", @@ -222,7 +213,7 @@ impl OnedriveBackend { pub(crate) async fn onedrive_get_next_list_page( &self, url: &str, - ) -> Result> { + ) -> Result> { let mut req = Request::get(url); let auth_header_content = format!("Bearer {}", self.access_token); @@ -235,7 +226,11 @@ impl OnedriveBackend { self.client.send(req).await } - async fn onedrive_get_content(&self, path: &str) -> Result> { + pub async fn onedrive_get_content( + &self, + path: &str, + range: BytesRange, + ) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url: String = format!( "https://graph.microsoft.com/v1.0/me/drive/root:{}{}", @@ -243,7 +238,7 @@ impl OnedriveBackend { ":/content" ); - let mut req = Request::get(&url); + let mut req = Request::get(&url).header(header::RANGE, range.to_header()); let auth_header_content = format!("Bearer {}", self.access_token); req = req.header(header::AUTHORIZATION, auth_header_content); @@ -261,7 +256,7 @@ impl OnedriveBackend { size: Option, args: &OpWrite, body: AsyncBody, - ) -> Result> { + ) -> Result> { let url = format!( "https://graph.microsoft.com/v1.0/me/drive/root:{}:/content", percent_encode_path(path) @@ -293,7 +288,7 @@ impl OnedriveBackend { chunk_end: usize, total_len: usize, body: AsyncBody, - ) -> Result> { + ) -> Result> { let mut req = Request::put(url); let auth_header_content = format!("Bearer {}", self.access_token); @@ -318,7 +313,7 @@ impl OnedriveBackend { &self, url: &str, body: OneDriveUploadSessionCreationRequestBody, - ) -> Result> { + ) -> Result> { let mut req = Request::post(url); let auth_header_content = format!("Bearer {}", self.access_token); @@ -337,7 +332,7 @@ impl OnedriveBackend { &self, url: &str, body: CreateDirPayload, - ) -> Result> { + ) -> Result> { let mut req = Request::post(url); let auth_header_content = format!("Bearer {}", self.access_token); @@ -351,7 +346,7 @@ impl OnedriveBackend { self.client.send(req).await } - pub(crate) async fn onedrive_delete(&self, path: &str) -> Result> { + pub(crate) async fn onedrive_delete(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( "https://graph.microsoft.com/v1.0/me/drive/root:/{}", diff --git a/core/src/services/onedrive/error.rs b/core/src/services/onedrive/error.rs index 60de6ae0d0a9..30a90f7a4f92 100644 --- a/core/src/services/onedrive/error.rs +++ b/core/src/services/onedrive/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,9 +25,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/onedrive/lister.rs b/core/src/services/onedrive/lister.rs index 32fdc426f7b9..591d44e9f1f3 100644 --- a/core/src/services/onedrive/lister.rs +++ b/core/src/services/onedrive/lister.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; + use super::backend::OnedriveBackend; use super::error::parse_error; use super::graph_model::GraphApiOnedriveListResponse; @@ -76,9 +78,9 @@ impl oio::PageList for OnedriveLister { return Err(error); } - let bytes = resp.into_body().bytes().await?; - let decoded_response = serde_json::from_slice::(&bytes) - .map_err(new_json_deserialize_error)?; + let bytes = resp.into_body(); + let decoded_response: GraphApiOnedriveListResponse = + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; if let Some(next_link) = decoded_response.next_link { ctx.token = next_link; diff --git a/core/src/services/onedrive/mod.rs b/core/src/services/onedrive/mod.rs index 672fb6c7e1e7..b5211c55e4a0 100644 --- a/core/src/services/onedrive/mod.rs +++ b/core/src/services/onedrive/mod.rs @@ -24,4 +24,5 @@ pub use builder::OnedriveBuilder as Onedrive; pub use builder::OnedriveConfig; mod lister; +mod reader; mod writer; diff --git a/core/src/services/onedrive/reader.rs b/core/src/services/onedrive/reader.rs new file mode 100644 index 000000000000..355789936a7d --- /dev/null +++ b/core/src/services/onedrive/reader.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use http::StatusCode; + +use super::error::parse_error; +use crate::raw::*; +use crate::services::onedrive::backend::OnedriveBackend; + +pub struct OnedriveReader { + core: OnedriveBackend, + + path: String, + _op: OpRead, +} + +impl OnedriveReader { + pub fn new(core: OnedriveBackend, path: &str, op: OpRead) -> Self { + OnedriveReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for OnedriveReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.onedrive_get_content(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/onedrive/writer.rs b/core/src/services/onedrive/writer.rs index 60988f20008b..8efb65b5c849 100644 --- a/core/src/services/onedrive/writer.rs +++ b/core/src/services/onedrive/writer.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use bytes::Bytes; use http::StatusCode; @@ -68,10 +69,7 @@ impl OneDriveWriter { match status { // Typical response code: 201 Created // Reference: https://learn.microsoft.com/en-us/onedrive/developer/rest-api/api/driveitem_put_content?view=odsp-graph-online#response - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -113,9 +111,7 @@ impl OneDriveWriter { match status { // Typical response code: 202 Accepted // Reference: https://learn.microsoft.com/en-us/onedrive/developer/rest-api/api/driveitem_put_content?view=odsp-graph-online#response - StatusCode::ACCEPTED | StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - } + StatusCode::ACCEPTED | StatusCode::CREATED | StatusCode::OK => {} _ => return Err(parse_error(resp).await?), } @@ -149,9 +145,9 @@ impl OneDriveWriter { match status { // Reference: https://learn.microsoft.com/en-us/onedrive/developer/rest-api/api/driveitem_createuploadsession?view=odsp-graph-online#response StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: OneDriveUploadSessionCreationResponseBody = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(result) } _ => Err(parse_error(resp).await?), diff --git a/core/src/services/oss/backend.rs b/core/src/services/oss/backend.rs index 2b5583d34550..843b8ebc1051 100644 --- a/core/src/services/oss/backend.rs +++ b/core/src/services/oss/backend.rs @@ -35,6 +35,7 @@ use super::error::parse_error; use super::lister::OssLister; use super::writer::OssWriter; use crate::raw::*; +use crate::services::oss::reader::OssReader; use crate::services::oss::writer::OssWriters; use crate::*; @@ -376,7 +377,7 @@ pub struct OssBackend { #[async_trait] impl Accessor for OssBackend { - type Reader = IncomingAsyncBody; + type Reader = OssReader; type Writer = OssWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -394,8 +395,7 @@ impl Accessor for OssBackend { stat_with_if_none_match: true, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, @@ -456,34 +456,10 @@ impl Accessor for OssBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self - .core - .oss_get_object( - path, - args.range(), - args.if_match(), - args.if_none_match(), - args.override_content_disposition(), - ) - .await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + OssReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -502,10 +478,7 @@ impl Accessor for OssBackend { let resp = self.core.oss_delete_object(path).await?; let status = resp.status(); match status { - StatusCode::NO_CONTENT | StatusCode::NOT_FOUND => { - resp.into_body().consume().await?; - Ok(RpDelete::default()) - } + StatusCode::NO_CONTENT | StatusCode::NOT_FOUND => Ok(RpDelete::default()), _ => Err(parse_error(resp).await?), } } @@ -526,10 +499,7 @@ impl Accessor for OssBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -541,14 +511,10 @@ impl Accessor for OssBackend { self.core .oss_head_object_request(path, true, v.if_match(), v.if_none_match())? } - PresignOperation::Read(v) => self.core.oss_get_object_request( - path, - v.range(), - true, - v.if_match(), - v.if_none_match(), - v.override_content_disposition(), - )?, + PresignOperation::Read(v) => { + self.core + .oss_get_object_request(path, BytesRange::default(), true, v)? + } PresignOperation::Write(v) => { self.core .oss_put_object_request(path, None, v, AsyncBody::Empty, true)? @@ -595,7 +561,7 @@ impl Accessor for OssBackend { let status = resp.status(); if let StatusCode::OK = status { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: DeleteObjectsResult = quick_xml::de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; diff --git a/core/src/services/oss/core.rs b/core/src/services/oss/core.rs index 66d4654b6e64..6d3924ab68b7 100644 --- a/core/src/services/oss/core.rs +++ b/core/src/services/oss/core.rs @@ -126,7 +126,7 @@ impl OssCore { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -238,9 +238,7 @@ impl OssCore { path: &str, range: BytesRange, is_presign: bool, - if_match: Option<&str>, - if_none_match: Option<&str>, - override_content_disposition: Option<&str>, + args: &OpRead, ) -> Result> { let p = build_abs_path(&self.root, path); let endpoint = self.get_endpoint(is_presign); @@ -248,7 +246,7 @@ impl OssCore { // Add query arguments to the URL based on response overrides let mut query_args = Vec::new(); - if let Some(override_content_disposition) = override_content_disposition { + if let Some(override_content_disposition) = args.override_content_disposition() { query_args.push(format!( "{}={}", constants::RESPONSE_CONTENT_DISPOSITION, @@ -270,10 +268,10 @@ impl OssCore { req = req.header("x-oss-range-behavior", "standard"); } - if let Some(if_match) = if_match { + if let Some(if_match) = args.if_match() { req = req.header(IF_MATCH, if_match) } - if let Some(if_none_match) = if_none_match { + if let Some(if_none_match) = args.if_none_match() { req = req.header(IF_NONE_MATCH, if_none_match); } @@ -370,18 +368,9 @@ impl OssCore { &self, path: &str, range: BytesRange, - if_match: Option<&str>, - if_none_match: Option<&str>, - override_content_disposition: Option<&str>, - ) -> Result> { - let mut req = self.oss_get_object_request( - path, - range, - false, - if_match, - if_none_match, - override_content_disposition, - )?; + args: &OpRead, + ) -> Result> { + let mut req = self.oss_get_object_request(path, range, false, args)?; self.sign(&mut req).await?; self.send(req).await } @@ -391,7 +380,7 @@ impl OssCore { path: &str, if_match: Option<&str>, if_none_match: Option<&str>, - ) -> Result> { + ) -> Result> { let mut req = self.oss_head_object_request(path, false, if_match, if_none_match)?; self.sign(&mut req).await?; @@ -404,18 +393,14 @@ impl OssCore { size: Option, args: &OpWrite, body: AsyncBody, - ) -> Result> { + ) -> Result> { let mut req = self.oss_put_object_request(path, size, args, body, false)?; self.sign(&mut req).await?; self.send(req).await } - pub async fn oss_copy_object( - &self, - from: &str, - to: &str, - ) -> Result> { + pub async fn oss_copy_object(&self, from: &str, to: &str) -> Result> { let source = build_abs_path(&self.root, from); let target = build_abs_path(&self.root, to); @@ -447,23 +432,20 @@ impl OssCore { delimiter: &str, limit: Option, start_after: Option, - ) -> Result> { + ) -> Result> { let mut req = self.oss_list_object_request(path, token, delimiter, limit, start_after)?; self.sign(&mut req).await?; self.send(req).await } - pub async fn oss_delete_object(&self, path: &str) -> Result> { + pub async fn oss_delete_object(&self, path: &str) -> Result> { let mut req = self.oss_delete_object_request(path)?; self.sign(&mut req).await?; self.send(req).await } - pub async fn oss_delete_objects( - &self, - paths: Vec, - ) -> Result> { + pub async fn oss_delete_objects(&self, paths: Vec) -> Result> { let url = format!("{}/?delete", self.endpoint); let req = Request::post(&url); @@ -509,7 +491,7 @@ impl OssCore { content_disposition: Option<&str>, cache_control: Option<&str>, is_presign: bool, - ) -> Result> { + ) -> Result> { let path = build_abs_path(&self.root, path); let endpoint = self.get_endpoint(is_presign); let url = format!("{}/{}?uploads", endpoint, percent_encode_path(&path)); @@ -540,7 +522,7 @@ impl OssCore { is_presign: bool, size: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let endpoint = self.get_endpoint(is_presign); @@ -565,7 +547,7 @@ impl OssCore { upload_id: &str, is_presign: bool, parts: Vec, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let endpoint = self.get_endpoint(is_presign); let url = format!( @@ -600,7 +582,7 @@ impl OssCore { &self, path: &str, upload_id: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/oss/error.rs b/core/src/services/oss/error.rs index ecead41c4f76..e1d8fcf2a200 100644 --- a/core/src/services/oss/error.rs +++ b/core/src/services/oss/error.rs @@ -37,9 +37,9 @@ struct OssError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/oss/lister.rs b/core/src/services/oss/lister.rs index 28919cfa80e9..0022597a7eed 100644 --- a/core/src/services/oss/lister.rs +++ b/core/src/services/oss/lister.rs @@ -76,7 +76,7 @@ impl oio::PageList for OssLister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ListObjectsOutput = de::from_reader(bs.reader()) .map_err(|e| Error::new(ErrorKind::Unexpected, "deserialize xml").set_source(e))?; diff --git a/core/src/services/oss/mod.rs b/core/src/services/oss/mod.rs index 3954a9885c0d..03b2f9612a51 100644 --- a/core/src/services/oss/mod.rs +++ b/core/src/services/oss/mod.rs @@ -21,4 +21,5 @@ pub use backend::OssBuilder as Oss; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/oss/reader.rs b/core/src/services/oss/reader.rs new file mode 100644 index 000000000000..4efb4f6dfe96 --- /dev/null +++ b/core/src/services/oss/reader.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::OssCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct OssReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl OssReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + OssReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for OssReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .oss_get_object(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/oss/writer.rs b/core/src/services/oss/writer.rs index 066efe6a7c13..f55c7d7af8ee 100644 --- a/core/src/services/oss/writer.rs +++ b/core/src/services/oss/writer.rs @@ -56,10 +56,7 @@ impl oio::MultipartWrite for OssWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -80,7 +77,7 @@ impl oio::MultipartWrite for OssWriter { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: InitiateMultipartUploadResult = quick_xml::de::from_reader(bytes::Buf::reader(bs)) @@ -120,8 +117,6 @@ impl oio::MultipartWrite for OssWriter { })? .to_string(); - resp.into_body().consume().await?; - Ok(oio::MultipartPart { part_number, etag }) } _ => Err(parse_error(resp).await?), @@ -145,11 +140,7 @@ impl oio::MultipartWrite for OssWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -161,10 +152,7 @@ impl oio::MultipartWrite for OssWriter { .await?; match resp.status() { // OSS returns code 204 if abort succeeds. - StatusCode::NO_CONTENT => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::NO_CONTENT => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/pcloud/backend.rs b/core/src/services/pcloud/backend.rs index 8283ae884610..aa6fade91fdc 100644 --- a/core/src/services/pcloud/backend.rs +++ b/core/src/services/pcloud/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -32,6 +33,7 @@ use super::lister::PcloudLister; use super::writer::PcloudWriter; use super::writer::PcloudWriters; use crate::raw::*; +use crate::services::pcloud::reader::PcloudReader; use crate::*; /// Config for backblaze Pcloud services support. @@ -228,7 +230,7 @@ pub struct PcloudBackend { #[async_trait] impl Accessor for PcloudBackend { - type Reader = IncomingAsyncBody; + type Reader = PcloudReader; type Writer = PcloudWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -245,7 +247,6 @@ impl Accessor for PcloudBackend { create_dir: true, read: true, - read_can_next: true, write: true, @@ -273,9 +274,9 @@ impl Accessor for PcloudBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: StatResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; if result == 2010 || result == 2055 || result == 2002 { return Err(Error::new(ErrorKind::NotFound, &format!("{resp:?}"))); @@ -295,24 +296,13 @@ impl Accessor for PcloudBackend { } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { let link = self.core.get_file_link(path).await?; - let resp = self.core.download(&link).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + PcloudReader::new(self.core.clone(), &link, args), + )) } async fn write(&self, path: &str, _args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -334,9 +324,9 @@ impl Accessor for PcloudBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: PcloudError = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; // pCloud returns 2005 or 2009 if the file or folder is not found @@ -368,9 +358,9 @@ impl Accessor for PcloudBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: PcloudError = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; if result == 2009 || result == 2010 || result == 2055 || result == 2002 { return Err(Error::new(ErrorKind::NotFound, &format!("{resp:?}"))); @@ -398,9 +388,9 @@ impl Accessor for PcloudBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: PcloudError = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; if result == 2009 || result == 2010 || result == 2055 || result == 2002 { return Err(Error::new(ErrorKind::NotFound, &format!("{resp:?}"))); diff --git a/core/src/services/pcloud/core.rs b/core/src/services/pcloud/core.rs index 21d3dfedd780..9bdaaa5c6f45 100644 --- a/core/src/services/pcloud/core.rs +++ b/core/src/services/pcloud/core.rs @@ -18,7 +18,9 @@ use std::fmt::Debug; use std::fmt::Formatter; +use bytes::Buf; use bytes::Bytes; +use http::header; use http::Request; use http::Response; use http::StatusCode; @@ -55,7 +57,7 @@ impl Debug for PcloudCore { impl PcloudCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } } @@ -84,9 +86,9 @@ impl PcloudCore { let status = resp.status(); match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: GetFileLinkResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; if result == 2010 || result == 2055 || result == 2002 { return Err(Error::new(ErrorKind::NotFound, &format!("{resp:?}"))); @@ -108,11 +110,12 @@ impl PcloudCore { } } - pub async fn download(&self, url: &str) -> Result> { + pub async fn download(&self, url: &str, range: BytesRange) -> Result> { let req = Request::get(url); // set body let req = req + .header(header::RANGE, range.to_header()) .body(AsyncBody::Empty) .map_err(new_request_build_error)?; @@ -132,9 +135,9 @@ impl PcloudCore { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: PcloudError = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; if result == 2010 || result == 2055 || result == 2002 { return Err(Error::new(ErrorKind::NotFound, &format!("{resp:?}"))); @@ -153,10 +156,7 @@ impl PcloudCore { Ok(()) } - pub async fn create_folder_if_not_exists( - &self, - path: &str, - ) -> Result> { + pub async fn create_folder_if_not_exists(&self, path: &str) -> Result> { let url = format!( "{}/createfolderifnotexists?path=/{}&username={}&password={}", self.endpoint, @@ -175,7 +175,7 @@ impl PcloudCore { self.send(req).await } - pub async fn rename_file(&self, from: &str, to: &str) -> Result> { + pub async fn rename_file(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let to = build_abs_path(&self.root, to); @@ -198,7 +198,7 @@ impl PcloudCore { self.send(req).await } - pub async fn rename_folder(&self, from: &str, to: &str) -> Result> { + pub async fn rename_folder(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let to = build_abs_path(&self.root, to); let url = format!( @@ -220,7 +220,7 @@ impl PcloudCore { self.send(req).await } - pub async fn delete_folder(&self, path: &str) -> Result> { + pub async fn delete_folder(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -241,7 +241,7 @@ impl PcloudCore { self.send(req).await } - pub async fn delete_file(&self, path: &str) -> Result> { + pub async fn delete_file(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -262,7 +262,7 @@ impl PcloudCore { self.send(req).await } - pub async fn copy_file(&self, from: &str, to: &str) -> Result> { + pub async fn copy_file(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let to = build_abs_path(&self.root, to); @@ -285,7 +285,7 @@ impl PcloudCore { self.send(req).await } - pub async fn copy_folder(&self, from: &str, to: &str) -> Result> { + pub async fn copy_folder(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let to = build_abs_path(&self.root, to); @@ -308,7 +308,7 @@ impl PcloudCore { self.send(req).await } - pub async fn stat(&self, path: &str) -> Result> { + pub async fn stat(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let path = path.trim_end_matches('/'); @@ -331,7 +331,7 @@ impl PcloudCore { self.send(req).await } - pub async fn upload_file(&self, path: &str, bs: Bytes) -> Result> { + pub async fn upload_file(&self, path: &str, bs: Bytes) -> Result> { let path = build_abs_path(&self.root, path); let (name, path) = (get_basename(&path), get_parent(&path).trim_end_matches('/')); @@ -355,7 +355,7 @@ impl PcloudCore { self.send(req).await } - pub async fn list_folder(&self, path: &str) -> Result> { + pub async fn list_folder(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let path = normalize_root(&path); diff --git a/core/src/services/pcloud/error.rs b/core/src/services/pcloud/error.rs index e59bcb672cc8..2f840969101e 100644 --- a/core/src/services/pcloud/error.rs +++ b/core/src/services/pcloud/error.rs @@ -18,6 +18,7 @@ use std::fmt::Debug; use std::fmt::Formatter; +use bytes::Buf; use http::Response; use serde::Deserialize; @@ -43,9 +44,9 @@ impl Debug for PcloudError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let message = String::from_utf8_lossy(&bs).into_owned(); let mut err = Error::new(ErrorKind::Unexpected, &message); @@ -57,7 +58,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -70,9 +70,9 @@ mod test { Invalid link - + This link was generated for another IP address. Try previous step again. - + "#, ErrorKind::Unexpected, StatusCode::GONE, @@ -80,10 +80,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/pcloud/lister.rs b/core/src/services/pcloud/lister.rs index eb4a0c670bf7..18d706529d07 100644 --- a/core/src/services/pcloud/lister.rs +++ b/core/src/services/pcloud/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use super::core::*; @@ -48,10 +49,10 @@ impl oio::PageList for PcloudLister { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp: ListFolderResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + let resp: ListFolderResponse = serde_json::from_reader(bs.clone().reader()) + .map_err(new_json_deserialize_error)?; let result = resp.result; if result == 2005 { @@ -85,7 +86,7 @@ impl oio::PageList for PcloudLister { return Err(Error::new( ErrorKind::Unexpected, - &String::from_utf8_lossy(&bs), + &String::from_utf8_lossy(&bs.to_bytes()), )); } _ => Err(parse_error(resp).await?), diff --git a/core/src/services/pcloud/mod.rs b/core/src/services/pcloud/mod.rs index 50fb5e5f53c3..5ec313a14f26 100644 --- a/core/src/services/pcloud/mod.rs +++ b/core/src/services/pcloud/mod.rs @@ -22,4 +22,5 @@ pub use backend::PcloudConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/pcloud/reader.rs b/core/src/services/pcloud/reader.rs new file mode 100644 index 000000000000..317073a670c7 --- /dev/null +++ b/core/src/services/pcloud/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::PcloudCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct PcloudReader { + core: Arc, + + link: String, + _op: OpRead, +} + +impl PcloudReader { + pub fn new(core: Arc, link: &str, op: OpRead) -> Self { + PcloudReader { + core, + link: link.to_string(), + _op: op, + } + } +} + +impl oio::Read for PcloudReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.download(&self.link, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/pcloud/writer.rs b/core/src/services/pcloud/writer.rs index a19ba154c394..d4d3bd53cade 100644 --- a/core/src/services/pcloud/writer.rs +++ b/core/src/services/pcloud/writer.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use bytes::Bytes; use http::StatusCode; @@ -49,9 +50,9 @@ impl oio::OneShotWrite for PcloudWriter { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: PcloudError = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let result = resp.result; if result != 0 { diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index a9853a82f3e3..ef090f66db6e 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -45,6 +45,7 @@ use super::core::*; use super::error::parse_error; use super::error::parse_s3_error_code; use super::lister::S3Lister; +use super::reader::S3Reader; use super::writer::S3Writer; use super::writer::S3Writers; use crate::raw::*; @@ -992,7 +993,7 @@ pub struct S3Backend { #[cfg_attr(not(target_arch = "wasm32"), async_trait)] #[cfg_attr(target_arch = "wasm32", async_trait(?Send))] impl Accessor for S3Backend { - type Reader = IncomingAsyncBody; + type Reader = S3Reader; type Writer = S3Writers; type Lister = oio::PageLister; type BlockingReader = (); @@ -1013,8 +1014,7 @@ impl Accessor for S3Backend { stat_with_override_content_type: !self.core.disable_stat_with_override, read: true, - read_can_next: true, - read_with_range: true, + read_with_if_match: true, read_with_if_none_match: true, read_with_override_cache_control: true, @@ -1073,25 +1073,10 @@ impl Accessor for S3Backend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.s3_get_object(path, args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + S3Reader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -1135,13 +1120,7 @@ impl Accessor for S3Backend { let status = resp.status(); match status { - StatusCode::OK => { - // According to the documentation, when using copy_object, a 200 error may occur and we need to detect it. - // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html#API_CopyObject_RequestSyntax - resp.into_body().consume().await?; - - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -1152,7 +1131,10 @@ impl Accessor for S3Backend { // We will not send this request out, just for signing. let mut req = match op { PresignOperation::Stat(v) => self.core.s3_head_object_request(path, v)?, - PresignOperation::Read(v) => self.core.s3_get_object_request(path, v)?, + PresignOperation::Read(v) => { + self.core + .s3_get_object_request(path, BytesRange::default(), &v)? + } PresignOperation::Write(_) => self.core.s3_put_object_request( path, None, @@ -1190,7 +1172,7 @@ impl Accessor for S3Backend { let status = resp.status(); if let StatusCode::OK = status { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: DeleteObjectsResult = quick_xml::de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; diff --git a/core/src/services/s3/core.rs b/core/src/services/s3/core.rs index 7a271bec8e88..4f93f08fea37 100644 --- a/core/src/services/s3/core.rs +++ b/core/src/services/s3/core.rs @@ -182,7 +182,7 @@ impl S3Core { } #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -300,7 +300,12 @@ impl S3Core { Ok(req) } - pub fn s3_get_object_request(&self, path: &str, args: OpRead) -> Result> { + pub fn s3_get_object_request( + &self, + path: &str, + range: BytesRange, + args: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path); // Construct headers to add to the request @@ -335,7 +340,6 @@ impl S3Core { let mut req = Request::get(&url); - let range = args.range(); if !range.is_full() { req = req.header(http::header::RANGE, range.to_header()); } @@ -361,9 +365,10 @@ impl S3Core { pub async fn s3_get_object( &self, path: &str, - args: OpRead, - ) -> Result> { - let mut req = self.s3_get_object_request(path, args)?; + range: BytesRange, + args: &OpRead, + ) -> Result> { + let mut req = self.s3_get_object_request(path, range, args)?; self.sign(&mut req).await?; @@ -413,11 +418,7 @@ impl S3Core { Ok(req) } - pub async fn s3_head_object( - &self, - path: &str, - args: OpStat, - ) -> Result> { + pub async fn s3_head_object(&self, path: &str, args: OpStat) -> Result> { let mut req = self.s3_head_object_request(path, args)?; self.sign(&mut req).await?; @@ -425,7 +426,7 @@ impl S3Core { self.send(req).await } - pub async fn s3_delete_object(&self, path: &str) -> Result> { + pub async fn s3_delete_object(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}", self.endpoint, percent_encode_path(&p)); @@ -439,11 +440,7 @@ impl S3Core { self.send(req).await } - pub async fn s3_copy_object( - &self, - from: &str, - to: &str, - ) -> Result> { + pub async fn s3_copy_object(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let to = build_abs_path(&self.root, to); @@ -508,7 +505,7 @@ impl S3Core { delimiter: &str, limit: Option, start_after: Option, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!("{}?list-type=2", self.endpoint); @@ -553,7 +550,7 @@ impl S3Core { &self, path: &str, args: &OpWrite, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!("{}/{}?uploads", self.endpoint, percent_encode_path(&p)); @@ -625,7 +622,7 @@ impl S3Core { path: &str, upload_id: &str, parts: Vec, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -661,7 +658,7 @@ impl S3Core { &self, path: &str, upload_id: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -678,10 +675,7 @@ impl S3Core { self.send(req).await } - pub async fn s3_delete_objects( - &self, - paths: Vec, - ) -> Result> { + pub async fn s3_delete_objects(&self, paths: Vec) -> Result> { let url = format!("{}/?delete", self.endpoint); let req = Request::post(&url); diff --git a/core/src/services/s3/error.rs b/core/src/services/s3/error.rs index f683554877f7..388324b9efe5 100644 --- a/core/src/services/s3/error.rs +++ b/core/src/services/s3/error.rs @@ -36,9 +36,9 @@ struct S3Error { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (mut kind, mut retryable) = match parts.status.as_u16() { 403 => (ErrorKind::PermissionDenied, false), diff --git a/core/src/services/s3/lister.rs b/core/src/services/s3/lister.rs index 1e1c25b1d702..2a86ae4c2d7d 100644 --- a/core/src/services/s3/lister.rs +++ b/core/src/services/s3/lister.rs @@ -81,7 +81,7 @@ impl oio::PageList for S3Lister { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let output: ListObjectsOutput = de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; diff --git a/core/src/services/s3/mod.rs b/core/src/services/s3/mod.rs index 4e2f283cb215..01a986b6bb14 100644 --- a/core/src/services/s3/mod.rs +++ b/core/src/services/s3/mod.rs @@ -22,4 +22,5 @@ pub use backend::S3Config; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/s3/reader.rs b/core/src/services/s3/reader.rs new file mode 100644 index 000000000000..91edf6f43d03 --- /dev/null +++ b/core/src/services/s3/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::*; +use super::error::*; +use crate::raw::*; +use crate::*; + +pub struct S3Reader { + core: Arc, + + path: String, + op: OpRead, +} + +impl S3Reader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + S3Reader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for S3Reader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let range = BytesRange::new(offset, Some(limit as u64)); + let resp = self.core.s3_get_object(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/s3/writer.rs b/core/src/services/s3/writer.rs index 0f6d8194566f..43ef3b37ba20 100644 --- a/core/src/services/s3/writer.rs +++ b/core/src/services/s3/writer.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use super::core::*; @@ -56,10 +57,7 @@ impl oio::MultipartWrite for S3Writer { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -74,11 +72,10 @@ impl oio::MultipartWrite for S3Writer { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let result: InitiateMultipartUploadResult = - quick_xml::de::from_reader(bytes::Buf::reader(bs)) - .map_err(new_xml_deserialize_error)?; + quick_xml::de::from_reader(bs.reader()).map_err(new_xml_deserialize_error)?; Ok(result.upload_id) } @@ -117,8 +114,6 @@ impl oio::MultipartWrite for S3Writer { })? .to_string(); - resp.into_body().consume().await?; - Ok(oio::MultipartPart { part_number, etag }) } _ => Err(parse_error(resp).await?), @@ -142,11 +137,7 @@ impl oio::MultipartWrite for S3Writer { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -158,10 +149,7 @@ impl oio::MultipartWrite for S3Writer { .await?; match resp.status() { // s3 returns code 204 if abort succeeds. - StatusCode::NO_CONTENT => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::NO_CONTENT => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/seafile/backend.rs b/core/src/services/seafile/backend.rs index 5bf5879c14ee..3529e2e50fd7 100644 --- a/core/src/services/seafile/backend.rs +++ b/core/src/services/seafile/backend.rs @@ -21,7 +21,6 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; -use http::StatusCode; use log::debug; use serde::Deserialize; use tokio::sync::RwLock; @@ -29,12 +28,12 @@ use tokio::sync::RwLock; use super::core::parse_dir_detail; use super::core::parse_file_detail; use super::core::SeafileCore; -use super::error::parse_error; use super::lister::SeafileLister; use super::writer::SeafileWriter; use super::writer::SeafileWriters; use crate::raw::*; use crate::services::seafile::core::SeafileSigner; +use crate::services::seafile::reader::SeafileReader; use crate::*; /// Config for backblaze seafile services support. @@ -256,7 +255,7 @@ pub struct SeafileBackend { #[async_trait] impl Accessor for SeafileBackend { - type Reader = IncomingAsyncBody; + type Reader = SeafileReader; type Writer = SeafileWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -271,7 +270,6 @@ impl Accessor for SeafileBackend { stat: true, read: true, - read_can_next: true, write: true, write_can_empty: true, @@ -303,22 +301,11 @@ impl Accessor for SeafileBackend { metadata.map(RpStat::new) } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.download_file(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + SeafileReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/seafile/core.rs b/core/src/services/seafile/core.rs index c8af7e17014e..69fc94f3d562 100644 --- a/core/src/services/seafile/core.rs +++ b/core/src/services/seafile/core.rs @@ -19,6 +19,7 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::sync::Arc; +use bytes::Buf; use bytes::Bytes; use http::header; use http::Request; @@ -64,7 +65,7 @@ impl Debug for SeafileCore { impl SeafileCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -96,9 +97,10 @@ impl SeafileCore { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let auth_response = serde_json::from_slice::(resp_body) - .map_err(new_json_deserialize_error)?; + let resp_body = resp.into_body(); + let auth_response: AuthTokenResponse = + serde_json::from_reader(resp_body.reader()) + .map_err(new_json_deserialize_error)?; signer.auth_info = AuthInfo { token: auth_response.token, repo_id: "".to_string(), @@ -125,9 +127,9 @@ impl SeafileCore { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let list_library_response = - serde_json::from_slice::>(resp_body) + let resp_body = resp.into_body(); + let list_library_response: Vec = + serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; for library in list_library_response { @@ -174,8 +176,8 @@ impl SeafileCore { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let upload_url = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let upload_url = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(upload_url) } @@ -205,8 +207,8 @@ impl SeafileCore { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let download_url = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let download_url = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(download_url) @@ -216,22 +218,21 @@ impl SeafileCore { } /// download file - pub async fn download_file(&self, path: &str) -> Result> { + pub async fn download_file( + &self, + path: &str, + range: BytesRange, + ) -> Result> { let download_url = self.get_download_url(path).await?; let req = Request::get(download_url); let req = req + .header(header::RANGE, range.to_header()) .body(AsyncBody::Empty) .map_err(new_request_build_error)?; - let resp = self.send(req).await?; - let status = resp.status(); - - match status { - StatusCode::OK => Ok(resp), - _ => Err(parse_error(resp).await?), - } + self.send(req).await } /// file detail @@ -256,8 +257,8 @@ impl SeafileCore { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let file_detail = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let file_detail: FileDetail = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(file_detail) } @@ -287,8 +288,8 @@ impl SeafileCore { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let dir_detail = serde_json::from_slice::(resp_body) + let resp_body = resp.into_body(); + let dir_detail: DirDetail = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; Ok(dir_detail) } diff --git a/core/src/services/seafile/error.rs b/core/src/services/seafile/error.rs index 05a0fed99919..1a9a4d5537cb 100644 --- a/core/src/services/seafile/error.rs +++ b/core/src/services/seafile/error.rs @@ -32,9 +32,9 @@ struct SeafileError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, _retryable) = match parts.status.as_u16() { 400 => (ErrorKind::InvalidInput, false), @@ -57,7 +57,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -79,10 +78,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/seafile/lister.rs b/core/src/services/seafile/lister.rs index ff687981ffe9..c07e75d7c245 100644 --- a/core/src/services/seafile/lister.rs +++ b/core/src/services/seafile/lister.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::header; use http::Request; use http::StatusCode; @@ -69,8 +70,8 @@ impl oio::PageList for SeafileLister { match status { StatusCode::OK => { - let resp_body = &resp.into_body().bytes().await?; - let infos = serde_json::from_slice::>(resp_body) + let resp_body = resp.into_body(); + let infos: Vec = serde_json::from_reader(resp_body.reader()) .map_err(new_json_deserialize_error)?; for info in infos { diff --git a/core/src/services/seafile/mod.rs b/core/src/services/seafile/mod.rs index dc9e1ccce078..63a112ee7b5c 100644 --- a/core/src/services/seafile/mod.rs +++ b/core/src/services/seafile/mod.rs @@ -22,4 +22,5 @@ pub use backend::SeafileConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/seafile/reader.rs b/core/src/services/seafile/reader.rs new file mode 100644 index 000000000000..8db38dba88d5 --- /dev/null +++ b/core/src/services/seafile/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::SeafileCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct SeafileReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl SeafileReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + SeafileReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for SeafileReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.download_file(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/seafile/writer.rs b/core/src/services/seafile/writer.rs index dccba7283072..ad7cc98b2e6a 100644 --- a/core/src/services/seafile/writer.rs +++ b/core/src/services/seafile/writer.rs @@ -80,10 +80,7 @@ impl oio::OneShotWrite for SeafileWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index beb843110be6..a855523a64c0 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -20,28 +20,28 @@ use std::fmt::Debug; use std::fmt::Formatter; use std::path::Path; use std::path::PathBuf; -use std::pin::Pin; use async_trait::async_trait; -use futures::StreamExt; +use bb8::{PooledConnection, RunError}; use log::debug; use openssh::KnownHosts; use openssh::SessionBuilder; -use openssh_sftp_client::file::TokioCompatFile; use openssh_sftp_client::Sftp; use openssh_sftp_client::SftpOptions; use serde::Deserialize; +use tokio::sync::OnceCell; use super::error::is_not_found; use super::error::is_sftp_protocol_error; use super::error::parse_sftp_error; use super::error::parse_ssh_error; use super::lister::SftpLister; +use super::reader::SftpReader; use super::writer::SftpWriter; use crate::raw::*; use crate::*; -/// Config for Sftpservices support. +/// Config for Sftp Service support. #[derive(Default, Deserialize)] #[serde(default)] #[non_exhaustive] @@ -71,7 +71,7 @@ impl Debug for SftpConfig { /// SFTP services support. (only works on unix) /// -/// If you are interested in working on windows, please refer to [this](https://github.com/apache/opendal/issues/2963) issue. +/// If you are interested in working on windows, pl ease refer to [this](https://github.com/apache/opendal/issues/2963) issue. /// Welcome to leave your comments or make contributions. /// /// Warning: Maximum number of file holdings is depending on the remote system configuration. @@ -211,7 +211,8 @@ impl Builder for SftpBuilder { key: self.config.key.clone(), known_hosts_strategy, copyable: self.config.enable_copy, - client: tokio::sync::OnceCell::new(), + + client: OnceCell::new(), }) } @@ -224,14 +225,87 @@ impl Builder for SftpBuilder { } /// Backend is used to serve `Accessor` support for sftp. +#[derive(Clone)] pub struct SftpBackend { + copyable: bool, endpoint: String, root: String, user: Option, key: Option, known_hosts_strategy: KnownHosts, - copyable: bool, - client: tokio::sync::OnceCell, + + client: OnceCell>, +} + +pub struct Manager { + endpoint: String, + root: String, + user: Option, + key: Option, + known_hosts_strategy: KnownHosts, +} + +#[async_trait] +impl bb8::ManageConnection for Manager { + type Connection = Sftp; + type Error = Error; + + async fn connect(&self) -> std::result::Result { + let mut session = SessionBuilder::default(); + + if let Some(user) = &self.user { + session.user(user.clone()); + } + + if let Some(key) = &self.key { + session.keyfile(key); + } + + session.known_hosts_check(self.known_hosts_strategy.clone()); + + let session = session + .connect(&self.endpoint) + .await + .map_err(parse_ssh_error)?; + + let sftp = Sftp::from_session(session, SftpOptions::default()) + .await + .map_err(parse_sftp_error)?; + + if !self.root.is_empty() { + let mut fs = sftp.fs(); + + let paths = Path::new(&self.root).components(); + let mut current = PathBuf::new(); + for p in paths { + current.push(p); + let res = fs.create_dir(p).await; + + if let Err(e) = res { + // ignore error if dir already exists + if !is_sftp_protocol_error(&e) { + return Err(parse_sftp_error(e)); + } + } + fs.set_cwd(¤t); + } + } + + debug!("sftp connection created at {}", self.root); + Ok(sftp) + } + + // Check if connect valid by checking the root path. + async fn is_valid(&self, conn: &mut Self::Connection) -> std::result::Result<(), Self::Error> { + let _ = conn.fs().metadata("./").await.map_err(parse_sftp_error)?; + + Ok(()) + } + + /// Always allow reuse conn. + fn has_broken(&self, _: &mut Self::Connection) -> bool { + false + } } impl Debug for SftpBackend { @@ -240,9 +314,36 @@ impl Debug for SftpBackend { } } +impl SftpBackend { + pub async fn connect(&self) -> Result> { + let client = self + .client + .get_or_try_init(|| async { + bb8::Pool::builder() + .max_size(64) + .build(Manager { + endpoint: self.endpoint.clone(), + root: self.root.clone(), + user: self.user.clone(), + key: self.key.clone(), + known_hosts_strategy: self.known_hosts_strategy.clone(), + }) + .await + }) + .await?; + + client.get_owned().await.map_err(|err| match err { + RunError::User(err) => err, + RunError::TimedOut => { + Error::new(ErrorKind::Unexpected, "connection request: timeout").set_temporary() + } + }) + } +} + #[async_trait] impl Accessor for SftpBackend { - type Reader = oio::TokioReader>>; + type Reader = SftpReader; type Writer = SftpWriter; type Lister = Option; type BlockingReader = (); @@ -257,7 +358,6 @@ impl Accessor for SftpBackend { stat: true, read: true, - read_can_seek: true, write: true, write_can_multi: true, @@ -311,26 +411,10 @@ impl Accessor for SftpBackend { } async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { - let client = self.connect().await?; - - let mut fs = client.fs(); - fs.set_cwd(&self.root); - let path = fs.canonicalize(path).await.map_err(parse_sftp_error)?; - - let f = client - .open(path.as_path()) - .await - .map_err(parse_sftp_error)?; - - // Sorry for the ugly code... - // - // - `f` is a openssh file. - // - `TokioCompatFile::new(f)` makes it implements tokio AsyncRead + AsyncSeek for openssh File. - // - `Box::pin(x)` to make sure this reader implements `Unpin`, since `TokioCompatFile` is not. - // - `oio::TokioReader::new(x)` makes it a `oio::TokioReader` which implements `oio::Read`. - let r = oio::TokioReader::new(Box::pin(TokioCompatFile::new(f))); - - Ok((RpRead::new(), r)) + Ok(( + RpRead::default(), + SftpReader::new(self.clone(), self.root.clone(), path.to_owned()), + )) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -363,54 +447,17 @@ impl Accessor for SftpBackend { let mut fs = client.fs(); fs.set_cwd(&self.root); - if path.ends_with('/') { - let file_path = format!("./{}", path); - let mut dir = match fs.open_dir(&file_path).await { - Ok(dir) => dir, - Err(e) => { - if is_not_found(&e) { - return Ok(RpDelete::default()); - } else { - return Err(parse_sftp_error(e)); - } - } - } - .read_dir() - .boxed(); - - while let Some(file) = dir.next().await { - let file = file.map_err(parse_sftp_error)?; - let file_name = file.filename().to_str(); - if file_name == Some(".") || file_name == Some("..") { - continue; - } - let file_path = Path::new(&self.root).join(file.filename()); - self.delete( - file_path.to_str().ok_or(Error::new( - ErrorKind::Unexpected, - "unable to convert file path to str", - ))?, - OpDelete::default(), - ) - .await?; - } - - match fs.remove_dir(path).await { - Err(e) if !is_not_found(&e) => { - return Err(parse_sftp_error(e)); - } - _ => {} - } + let res = if path.ends_with('/') { + fs.remove_dir(path).await } else { - match fs.remove_file(path).await { - Err(e) if !is_not_found(&e) => { - return Err(parse_sftp_error(e)); - } - _ => {} - } + fs.remove_file(path).await }; - Ok(RpDelete::default()) + match res { + Ok(()) => Ok(RpDelete::default()), + Err(e) if is_not_found(&e) => Ok(RpDelete::default()), + Err(e) => Err(parse_sftp_error(e)), + } } async fn list(&self, path: &str, _: OpList) -> Result<(RpList, Self::Lister)> { @@ -475,71 +522,3 @@ impl Accessor for SftpBackend { Ok(RpRename::default()) } } - -impl SftpBackend { - async fn connect(&self) -> Result<&Sftp> { - let sftp = self - .client - .get_or_try_init(|| { - Box::pin(connect_sftp( - self.endpoint.as_str(), - self.root.clone(), - self.user.clone(), - self.key.clone(), - self.known_hosts_strategy.clone(), - )) - }) - .await?; - - Ok(sftp) - } -} - -async fn connect_sftp( - endpoint: &str, - root: String, - user: Option, - key: Option, - known_hosts_strategy: KnownHosts, -) -> Result { - let mut session = SessionBuilder::default(); - - if let Some(user) = user { - session.user(user); - } - - if let Some(key) = &key { - session.keyfile(key); - } - - session.known_hosts_check(known_hosts_strategy); - - let session = session.connect(&endpoint).await.map_err(parse_ssh_error)?; - - let sftp = Sftp::from_session(session, SftpOptions::default()) - .await - .map_err(parse_sftp_error)?; - - if !root.is_empty() { - let mut fs = sftp.fs(); - - let paths = Path::new(&root).components(); - let mut current = PathBuf::new(); - for p in paths { - current.push(p); - let res = fs.create_dir(p).await; - - if let Err(e) = res { - // ignore error if dir already exists - if !is_sftp_protocol_error(&e) { - return Err(parse_sftp_error(e)); - } - } - fs.set_cwd(¤t); - } - } - - debug!("sftp connection created at {}", root); - - Ok(sftp) -} diff --git a/core/src/services/sftp/error.rs b/core/src/services/sftp/error.rs index 4c6fd31beaad..9fd947b21b49 100644 --- a/core/src/services/sftp/error.rs +++ b/core/src/services/sftp/error.rs @@ -34,7 +34,14 @@ pub fn parse_sftp_error(e: SftpClientError) -> Error { _ => ErrorKind::Unexpected, }; - Error::new(kind, "sftp error").set_source(e) + let mut err = Error::new(kind, "sftp error").set_source(e); + + // Mark error as temporary if it's unexpected. + if kind == ErrorKind::Unexpected { + err = err.set_temporary(); + } + + err } pub fn parse_ssh_error(e: SshError) -> Error { diff --git a/core/src/services/sftp/mod.rs b/core/src/services/sftp/mod.rs index fcc74afc2951..001898171b0f 100644 --- a/core/src/services/sftp/mod.rs +++ b/core/src/services/sftp/mod.rs @@ -21,5 +21,6 @@ pub use backend::SftpConfig; mod backend; mod error; mod lister; +mod reader; mod utils; mod writer; diff --git a/core/src/services/sftp/reader.rs b/core/src/services/sftp/reader.rs new file mode 100644 index 000000000000..6f421023fe2d --- /dev/null +++ b/core/src/services/sftp/reader.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::backend::SftpBackend; +use super::error::parse_sftp_error; +use crate::raw::*; +use crate::*; +use bytes::BytesMut; +use std::io::SeekFrom; +use tokio::io::AsyncSeekExt; + +pub struct SftpReader { + inner: SftpBackend, + root: String, + path: String, +} + +impl SftpReader { + pub fn new(inner: SftpBackend, root: String, path: String) -> Self { + Self { inner, root, path } + } +} + +impl oio::Read for SftpReader { + async fn read_at(&self, offset: u64, limit: usize) -> Result { + let client = self.inner.connect().await?; + + let mut fs = client.fs(); + fs.set_cwd(&self.root); + + let path = fs + .canonicalize(&self.path) + .await + .map_err(parse_sftp_error)?; + + let mut f = client + .open(path.as_path()) + .await + .map_err(parse_sftp_error)?; + + f.seek(SeekFrom::Start(offset)) + .await + .map_err(new_std_io_error)?; + + let mut size = limit; + if size == 0 { + return Ok(oio::Buffer::new()); + } + + let mut buf = BytesMut::with_capacity(limit); + while size > 0 { + let len = buf.len(); + if let Some(bytes) = f + .read(size as u32, buf.split_off(len)) + .await + .map_err(parse_sftp_error)? + { + size -= bytes.len(); + buf.unsplit(bytes); + } else { + break; + } + } + Ok(oio::Buffer::from(buf.freeze())) + } +} diff --git a/core/src/services/sftp/writer.rs b/core/src/services/sftp/writer.rs index 9bee111e2440..d06505c92da6 100644 --- a/core/src/services/sftp/writer.rs +++ b/core/src/services/sftp/writer.rs @@ -22,7 +22,7 @@ use openssh_sftp_client::file::File; use openssh_sftp_client::file::TokioCompatFile; use tokio::io::AsyncWriteExt; -use crate::raw::{new_std_io_error, oio}; +use crate::raw::*; use crate::*; pub struct SftpWriter { diff --git a/core/src/services/supabase/backend.rs b/core/src/services/supabase/backend.rs index 99f04723ad90..9619e9824eb4 100644 --- a/core/src/services/supabase/backend.rs +++ b/core/src/services/supabase/backend.rs @@ -26,6 +26,7 @@ use super::core::*; use super::error::parse_error; use super::writer::*; use crate::raw::*; +use crate::services::supabase::reader::SupabaseReader; use crate::*; /// [Supabase](https://supabase.com/) service support @@ -156,7 +157,7 @@ pub struct SupabaseBackend { #[async_trait] impl Accessor for SupabaseBackend { - type Reader = IncomingAsyncBody; + type Reader = SupabaseReader; type Writer = oio::OneShotWriter; // todo: implement Lister to support list and scan type Lister = (); @@ -204,14 +205,10 @@ impl Accessor for SupabaseBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.supabase_get_object(path, args.range()).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + SupabaseReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/supabase/core.rs b/core/src/services/supabase/core.rs index ccdb2e1abad0..628af9e601b4 100644 --- a/core/src/services/supabase/core.rs +++ b/core/src/services/supabase/core.rs @@ -221,7 +221,7 @@ impl SupabaseCore { // core utils impl SupabaseCore { - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.http_client.send(req).await } @@ -229,7 +229,7 @@ impl SupabaseCore { &self, path: &str, range: BytesRange, - ) -> Result> { + ) -> Result> { let mut req = if self.key.is_some() { self.supabase_get_object_auth_request(path, range)? } else { @@ -239,7 +239,7 @@ impl SupabaseCore { self.send(req).await } - pub async fn supabase_head_object(&self, path: &str) -> Result> { + pub async fn supabase_head_object(&self, path: &str) -> Result> { let mut req = if self.key.is_some() { self.supabase_head_object_auth_request(path)? } else { @@ -249,10 +249,7 @@ impl SupabaseCore { self.send(req).await } - pub async fn supabase_get_object_info( - &self, - path: &str, - ) -> Result> { + pub async fn supabase_get_object_info(&self, path: &str) -> Result> { let mut req = if self.key.is_some() { self.supabase_get_object_info_auth_request(path)? } else { @@ -262,7 +259,7 @@ impl SupabaseCore { self.send(req).await } - pub async fn supabase_delete_object(&self, path: &str) -> Result> { + pub async fn supabase_delete_object(&self, path: &str) -> Result> { let mut req = self.supabase_delete_object_request(path)?; self.sign(&mut req)?; self.send(req).await diff --git a/core/src/services/supabase/error.rs b/core/src/services/supabase/error.rs index 58288959b001..4e4602be1ebb 100644 --- a/core/src/services/supabase/error.rs +++ b/core/src/services/supabase/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; use serde::Deserialize; @@ -35,9 +36,9 @@ struct SupabaseError { } /// Parse the supabase error type to the OpenDAL error type -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); // Check HTTP status code first/ let (mut kind, mut retryable) = match parts.status.as_u16() { diff --git a/core/src/services/supabase/mod.rs b/core/src/services/supabase/mod.rs index 89aeada88983..5abaac6b8575 100644 --- a/core/src/services/supabase/mod.rs +++ b/core/src/services/supabase/mod.rs @@ -19,4 +19,5 @@ mod backend; pub use backend::SupabaseBuilder as Supabase; mod core; mod error; +mod reader; mod writer; diff --git a/core/src/services/supabase/reader.rs b/core/src/services/supabase/reader.rs new file mode 100644 index 000000000000..7554b57bd90c --- /dev/null +++ b/core/src/services/supabase/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::SupabaseCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct SupabaseReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl SupabaseReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + SupabaseReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for SupabaseReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.supabase_get_object(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/supabase/writer.rs b/core/src/services/supabase/writer.rs index e5a753942eb5..38be282c3a66 100644 --- a/core/src/services/supabase/writer.rs +++ b/core/src/services/supabase/writer.rs @@ -22,7 +22,6 @@ use http::StatusCode; use super::core::*; use super::error::parse_error; - use crate::raw::*; use crate::*; @@ -57,10 +56,7 @@ impl oio::OneShotWrite for SupabaseWriter { let resp = self.core.send(req).await?; match resp.status() { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/swift/backend.rs b/core/src/services/swift/backend.rs index 393e3038729b..c3325bee39f2 100644 --- a/core/src/services/swift/backend.rs +++ b/core/src/services/swift/backend.rs @@ -29,6 +29,7 @@ use super::error::parse_error; use super::lister::SwiftLister; use super::writer::SwiftWriter; use crate::raw::*; +use crate::services::swift::reader::SwiftReader; use crate::*; /// [OpenStack Swift](https://docs.openstack.org/api-ref/object-store/#)'s REST API support. @@ -190,7 +191,7 @@ pub struct SwiftBackend { #[async_trait] impl Accessor for SwiftBackend { - type Reader = IncomingAsyncBody; + type Reader = SwiftReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; type BlockingReader = (); @@ -205,8 +206,6 @@ impl Accessor for SwiftBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_empty: true, @@ -235,23 +234,10 @@ impl Accessor for SwiftBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.swift_read(path, args).await?; - - match resp.status() { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + SwiftReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -293,10 +279,7 @@ impl Accessor for SwiftBackend { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpCopy::default()) - } + StatusCode::CREATED | StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/swift/core.rs b/core/src/services/swift/core.rs index 1eb0f3d2a94e..c1cd294e97c9 100644 --- a/core/src/services/swift/core.rs +++ b/core/src/services/swift/core.rs @@ -44,7 +44,7 @@ impl Debug for SwiftCore { } impl SwiftCore { - pub async fn swift_delete(&self, path: &str) -> Result> { + pub async fn swift_delete(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -71,7 +71,7 @@ impl SwiftCore { delimiter: &str, limit: Option, marker: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); // The delimiter is used to disable recursive listing. @@ -107,7 +107,7 @@ impl SwiftCore { path: &str, length: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -127,9 +127,12 @@ impl SwiftCore { self.client.send(req).await } - pub async fn swift_read(&self, path: &str, arg: OpRead) -> Result> { - let range = arg.range(); - + pub async fn swift_read( + &self, + path: &str, + range: BytesRange, + _arg: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path) .trim_end_matches('/') .to_string(); @@ -146,7 +149,7 @@ impl SwiftCore { req = req.header("X-Auth-Token", &self.token); if !range.is_full() { - req = req.header("Range", &range.to_header()); + req = req.header(header::RANGE, range.to_header()); } let req = req @@ -156,11 +159,7 @@ impl SwiftCore { self.client.send(req).await } - pub async fn swift_copy( - &self, - src_p: &str, - dst_p: &str, - ) -> Result> { + pub async fn swift_copy(&self, src_p: &str, dst_p: &str) -> Result> { // NOTE: current implementation is limited to same container and root let src_p = format!( @@ -197,7 +196,7 @@ impl SwiftCore { self.client.send(req).await } - pub async fn swift_get_metadata(&self, path: &str) -> Result> { + pub async fn swift_get_metadata(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/swift/error.rs b/core/src/services/swift/error.rs index 836787ec0029..6c956b20f442 100644 --- a/core/src/services/swift/error.rs +++ b/core/src/services/swift/error.rs @@ -34,9 +34,9 @@ struct ErrorResponse { p: String, } -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/swift/lister.rs b/core/src/services/swift/lister.rs index 79b8fdfa6f92..703780d71f79 100644 --- a/core/src/services/swift/lister.rs +++ b/core/src/services/swift/lister.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use bytes::Buf; + use super::core::*; use super::error::parse_error; use crate::raw::*; @@ -55,9 +57,9 @@ impl oio::PageList for SwiftLister { return Err(error); } - let bytes = response.into_body().bytes().await?; + let bytes = response.into_body(); let decoded_response: Vec = - serde_json::from_slice(&bytes).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; // Update token and done based on resp. if let Some(entry) = decoded_response.last() { diff --git a/core/src/services/swift/mod.rs b/core/src/services/swift/mod.rs index 9cfeb92e07a0..c4d08c8c44cb 100644 --- a/core/src/services/swift/mod.rs +++ b/core/src/services/swift/mod.rs @@ -21,4 +21,5 @@ pub use backend::SwiftBuilder as Swift; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/swift/reader.rs b/core/src/services/swift/reader.rs new file mode 100644 index 000000000000..885655761e73 --- /dev/null +++ b/core/src/services/swift/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::SwiftCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct SwiftReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl SwiftReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + SwiftReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for SwiftReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.swift_read(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/swift/writer.rs b/core/src/services/swift/writer.rs index 6004580c7e5b..d4ab3ca297ef 100644 --- a/core/src/services/swift/writer.rs +++ b/core/src/services/swift/writer.rs @@ -22,7 +22,6 @@ use http::StatusCode; use super::core::SwiftCore; use super::error::parse_error; - use crate::raw::*; use crate::*; @@ -47,10 +46,7 @@ impl oio::OneShotWrite for SwiftWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/upyun/backend.rs b/core/src/services/upyun/backend.rs index c198bb32b50c..718f04d1dd68 100644 --- a/core/src/services/upyun/backend.rs +++ b/core/src/services/upyun/backend.rs @@ -33,6 +33,7 @@ use super::writer::UpyunWriter; use super::writer::UpyunWriters; use crate::raw::*; use crate::services::upyun::core::UpyunSigner; +use crate::services::upyun::reader::UpyunReader; use crate::*; /// Config for backblaze upyun services support. @@ -233,7 +234,7 @@ pub struct UpyunBackend { #[async_trait] impl Accessor for UpyunBackend { - type Reader = IncomingAsyncBody; + type Reader = UpyunReader; type Writer = UpyunWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -250,7 +251,6 @@ impl Accessor for UpyunBackend { create_dir: true, read: true, - read_can_next: true, write: true, write_can_empty: true, @@ -297,22 +297,11 @@ impl Accessor for UpyunBackend { } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.download_file(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + UpyunReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -348,11 +337,7 @@ impl Accessor for UpyunBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } @@ -363,11 +348,7 @@ impl Accessor for UpyunBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(RpRename::default()) - } + StatusCode::OK => Ok(RpRename::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/upyun/core.rs b/core/src/services/upyun/core.rs index 49b430768f71..1fcaf429db96 100644 --- a/core/src/services/upyun/core.rs +++ b/core/src/services/upyun/core.rs @@ -82,7 +82,7 @@ impl Debug for UpyunCore { impl UpyunCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -104,7 +104,11 @@ impl UpyunCore { } impl UpyunCore { - pub async fn download_file(&self, path: &str) -> Result> { + pub async fn download_file( + &self, + path: &str, + range: BytesRange, + ) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -116,6 +120,7 @@ impl UpyunCore { let req = Request::get(url); let mut req = req + .header(header::RANGE, range.to_header()) .body(AsyncBody::Empty) .map_err(new_request_build_error)?; @@ -124,7 +129,7 @@ impl UpyunCore { self.send(req).await } - pub async fn info(&self, path: &str) -> Result> { + pub async fn info(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -185,7 +190,7 @@ impl UpyunCore { Ok(req) } - pub async fn delete(&self, path: &str) -> Result> { + pub async fn delete(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -205,7 +210,7 @@ impl UpyunCore { self.send(req).await } - pub async fn copy(&self, from: &str, to: &str) -> Result> { + pub async fn copy(&self, from: &str, to: &str) -> Result> { let from = format!("/{}/{}", self.bucket, build_abs_path(&self.root, from)); let to = build_abs_path(&self.root, to); @@ -233,7 +238,7 @@ impl UpyunCore { self.send(req).await } - pub async fn move_object(&self, from: &str, to: &str) -> Result> { + pub async fn move_object(&self, from: &str, to: &str) -> Result> { let from = format!("/{}/{}", self.bucket, build_abs_path(&self.root, from)); let to = build_abs_path(&self.root, to); @@ -261,7 +266,7 @@ impl UpyunCore { self.send(req).await } - pub async fn create_dir(&self, path: &str) -> Result> { + pub async fn create_dir(&self, path: &str) -> Result> { let path = build_abs_path(&self.root, path); let path = path[..path.len() - 1].to_string(); @@ -290,7 +295,7 @@ impl UpyunCore { &self, path: &str, args: &OpWrite, - ) -> Result> { + ) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( @@ -364,7 +369,7 @@ impl UpyunCore { &self, path: &str, upload_id: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -393,7 +398,7 @@ impl UpyunCore { path: &str, iter: &str, limit: Option, - ) -> Result> { + ) -> Result> { let path = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/upyun/error.rs b/core/src/services/upyun/error.rs index 51f4934b11b2..d3eec02d7b9b 100644 --- a/core/src/services/upyun/error.rs +++ b/core/src/services/upyun/error.rs @@ -35,9 +35,9 @@ struct UpyunError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status.as_u16() { 403 => (ErrorKind::PermissionDenied, false), @@ -67,7 +67,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -89,10 +88,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/upyun/lister.rs b/core/src/services/upyun/lister.rs index 547facbf7508..9d6a756d2d49 100644 --- a/core/src/services/upyun/lister.rs +++ b/core/src/services/upyun/lister.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use bytes::Buf; + use super::core::ListObjectsResponse; use super::core::UpyunCore; use super::error::parse_error; @@ -66,10 +68,10 @@ impl oio::PageList for UpyunLister { } } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let response = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)?; + let response: ListObjectsResponse = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; // ref https://help.upyun.com/knowledge-base/rest_api/#e88eb7e58f96e79baee5bd95e69687e4bbb6e58897e8a1a8 // when iter is "g2gCZAAEbmV4dGQAA2VvZg", it means the list is done. diff --git a/core/src/services/upyun/mod.rs b/core/src/services/upyun/mod.rs index 039f2aa22944..a390359a7140 100644 --- a/core/src/services/upyun/mod.rs +++ b/core/src/services/upyun/mod.rs @@ -22,4 +22,5 @@ pub use backend::UpyunConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/upyun/reader.rs b/core/src/services/upyun/reader.rs new file mode 100644 index 000000000000..e2c129cf4f75 --- /dev/null +++ b/core/src/services/upyun/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::UpyunCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct UpyunReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl UpyunReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + UpyunReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for UpyunReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.download_file(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/upyun/writer.rs b/core/src/services/upyun/writer.rs index 833fff34f3ba..fc05fbf818ea 100644 --- a/core/src/services/upyun/writer.rs +++ b/core/src/services/upyun/writer.rs @@ -51,10 +51,7 @@ impl oio::MultipartWrite for UpyunWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -98,14 +95,10 @@ impl oio::MultipartWrite for UpyunWriter { let status = resp.status(); match status { - StatusCode::NO_CONTENT | StatusCode::CREATED => { - resp.into_body().consume().await?; - - Ok(oio::MultipartPart { - part_number, - etag: "".to_string(), - }) - } + StatusCode::NO_CONTENT | StatusCode::CREATED => Ok(oio::MultipartPart { + part_number, + etag: "".to_string(), + }), _ => Err(parse_error(resp).await?), } } @@ -119,11 +112,7 @@ impl oio::MultipartWrite for UpyunWriter { let status = resp.status(); match status { - StatusCode::NO_CONTENT => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::NO_CONTENT => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/vercel_artifacts/backend.rs b/core/src/services/vercel_artifacts/backend.rs index 27ef788e19bc..b2e633df283d 100644 --- a/core/src/services/vercel_artifacts/backend.rs +++ b/core/src/services/vercel_artifacts/backend.rs @@ -26,6 +26,7 @@ use http::StatusCode; use super::error::parse_error; use super::writer::VercelArtifactsWriter; use crate::raw::*; +use crate::services::vercel_artifacts::reader::VercelArtifactsReader; use crate::*; #[doc = include_str!("docs.md")] @@ -45,7 +46,7 @@ impl Debug for VercelArtifactsBackend { #[async_trait] impl Accessor for VercelArtifactsBackend { - type Reader = IncomingAsyncBody; + type Reader = VercelArtifactsReader; type Writer = oio::OneShotWriter; type Lister = (); type BlockingReader = (); @@ -59,7 +60,6 @@ impl Accessor for VercelArtifactsBackend { stat: true, read: true, - read_can_next: true, write: true, @@ -85,15 +85,10 @@ impl Accessor for VercelArtifactsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.vercel_artifacts_get(path, args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), - - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + VercelArtifactsReader::new(self.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -109,11 +104,12 @@ impl Accessor for VercelArtifactsBackend { } impl VercelArtifactsBackend { - async fn vercel_artifacts_get( + pub async fn vercel_artifacts_get( &self, hash: &str, - args: OpRead, - ) -> Result> { + range: BytesRange, + _: &OpRead, + ) -> Result> { let url: String = format!( "https://api.vercel.com/v8/artifacts/{}", percent_encode_path(hash) @@ -121,8 +117,8 @@ impl VercelArtifactsBackend { let mut req = Request::get(&url); - if !args.range().is_full() { - req = req.header(header::RANGE, args.range().to_header()); + if !range.is_full() { + req = req.header(header::RANGE, range.to_header()); } let auth_header_content = format!("Bearer {}", self.access_token); @@ -140,7 +136,7 @@ impl VercelArtifactsBackend { hash: &str, size: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let url = format!( "https://api.vercel.com/v8/artifacts/{}", percent_encode_path(hash) @@ -158,7 +154,7 @@ impl VercelArtifactsBackend { self.client.send(req).await } - pub async fn vercel_artifacts_stat(&self, hash: &str) -> Result> { + pub async fn vercel_artifacts_stat(&self, hash: &str) -> Result> { let url = format!( "https://api.vercel.com/v8/artifacts/{}", percent_encode_path(hash) diff --git a/core/src/services/vercel_artifacts/error.rs b/core/src/services/vercel_artifacts/error.rs index 60de6ae0d0a9..30a90f7a4f92 100644 --- a/core/src/services/vercel_artifacts/error.rs +++ b/core/src/services/vercel_artifacts/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,9 +25,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/vercel_artifacts/mod.rs b/core/src/services/vercel_artifacts/mod.rs index 3bfddabca7af..656bbfdc5df3 100644 --- a/core/src/services/vercel_artifacts/mod.rs +++ b/core/src/services/vercel_artifacts/mod.rs @@ -18,6 +18,7 @@ mod backend; mod builder; mod error; +mod reader; mod writer; pub use builder::VercelArtifactsBuilder as VercelArtifacts; diff --git a/core/src/services/vercel_artifacts/reader.rs b/core/src/services/vercel_artifacts/reader.rs new file mode 100644 index 000000000000..1e80edb23244 --- /dev/null +++ b/core/src/services/vercel_artifacts/reader.rs @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use http::StatusCode; + +use super::error::parse_error; +use crate::raw::*; +use crate::services::vercel_artifacts::backend::VercelArtifactsBackend; + +pub struct VercelArtifactsReader { + core: VercelArtifactsBackend, + + path: String, + op: OpRead, +} + +impl VercelArtifactsReader { + pub fn new(core: VercelArtifactsBackend, path: &str, op: OpRead) -> Self { + VercelArtifactsReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for VercelArtifactsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self + .core + .vercel_artifacts_get(&self.path, range, &self.op) + .await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/vercel_artifacts/writer.rs b/core/src/services/vercel_artifacts/writer.rs index e48b8fe5bb78..b5d804dab14e 100644 --- a/core/src/services/vercel_artifacts/writer.rs +++ b/core/src/services/vercel_artifacts/writer.rs @@ -50,10 +50,7 @@ impl oio::OneShotWrite for VercelArtifactsWriter { let status = resp.status(); match status { - StatusCode::OK | StatusCode::ACCEPTED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK | StatusCode::ACCEPTED => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/vercel_blob/backend.rs b/core/src/services/vercel_blob/backend.rs index 4f63fc0362a7..d1bc66618894 100644 --- a/core/src/services/vercel_blob/backend.rs +++ b/core/src/services/vercel_blob/backend.rs @@ -21,6 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -33,6 +34,7 @@ use super::lister::VercelBlobLister; use super::writer::VercelBlobWriter; use super::writer::VercelBlobWriters; use crate::raw::*; +use crate::services::vercel_blob::reader::VercelBlobReader; use crate::*; /// Config for backblaze VercelBlob services support. @@ -178,7 +180,7 @@ pub struct VercelBlobBackend { #[async_trait] impl Accessor for VercelBlobBackend { - type Reader = IncomingAsyncBody; + type Reader = VercelBlobReader; type Writer = VercelBlobWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -193,8 +195,6 @@ impl Accessor for VercelBlobBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_empty: true, @@ -220,9 +220,10 @@ impl Accessor for VercelBlobBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp: Blob = serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + let resp: Blob = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; parse_blob(&resp).map(RpStat::new) } @@ -231,21 +232,10 @@ impl Accessor for VercelBlobBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.download(path, args).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + VercelBlobReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -267,11 +257,7 @@ impl Accessor for VercelBlobBackend { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(RpCopy::default()) - } + StatusCode::OK => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/vercel_blob/core.rs b/core/src/services/vercel_blob/core.rs index 9fea54d9f9eb..7ab84b11b6c0 100644 --- a/core/src/services/vercel_blob/core.rs +++ b/core/src/services/vercel_blob/core.rs @@ -18,6 +18,7 @@ use std::fmt::Debug; use std::fmt::Formatter; +use bytes::Buf; use bytes::Bytes; use http::header; use http::request; @@ -73,7 +74,7 @@ impl Debug for VercelBlobCore { impl VercelBlobCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -83,7 +84,12 @@ impl VercelBlobCore { } impl VercelBlobCore { - pub async fn download(&self, path: &str, args: OpRead) -> Result> { + pub async fn download( + &self, + path: &str, + range: BytesRange, + _: &OpRead, + ) -> Result> { let p = build_abs_path(&self.root, path); // Vercel blob use an unguessable random id url to download the file // So we use list to get the url of the file and then use it to download the file @@ -98,9 +104,8 @@ impl VercelBlobCore { let mut req = Request::get(url); - let range = args.range(); if !range.is_full() { - req = req.header(http::header::RANGE, range.to_header()); + req = req.header(header::RANGE, range.to_header()); } // Set body @@ -179,7 +184,7 @@ impl VercelBlobCore { } } - pub async fn head(&self, path: &str) -> Result> { + pub async fn head(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let resp = self.list(&p, Some(1)).await?; @@ -205,7 +210,7 @@ impl VercelBlobCore { self.send(req).await } - pub async fn copy(&self, from: &str, to: &str) -> Result> { + pub async fn copy(&self, from: &str, to: &str) -> Result> { let from = build_abs_path(&self.root, from); let resp = self.list(&from, Some(1)).await?; @@ -265,10 +270,10 @@ impl VercelBlobCore { return Err(parse_error(resp).await?); } - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let resp: ListResponse = - serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; Ok(resp) } @@ -277,7 +282,7 @@ impl VercelBlobCore { &self, path: &str, args: &OpWrite, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -311,7 +316,7 @@ impl VercelBlobCore { part_number: usize, size: u64, body: AsyncBody, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( @@ -340,7 +345,7 @@ impl VercelBlobCore { path: &str, upload_id: &str, parts: Vec, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let url = format!( diff --git a/core/src/services/vercel_blob/error.rs b/core/src/services/vercel_blob/error.rs index 2f8f5ef600f0..842b919c1629 100644 --- a/core/src/services/vercel_blob/error.rs +++ b/core/src/services/vercel_blob/error.rs @@ -40,9 +40,9 @@ struct VercelBlobErrorDetail { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status.as_u16() { 403 => (ErrorKind::PermissionDenied, false), @@ -68,7 +68,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -87,11 +86,7 @@ mod test { )]; for res in err_res { - let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(res.0.as_bytes().to_vec()); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/vercel_blob/mod.rs b/core/src/services/vercel_blob/mod.rs index 2804d3fe7357..050f922abba5 100644 --- a/core/src/services/vercel_blob/mod.rs +++ b/core/src/services/vercel_blob/mod.rs @@ -22,4 +22,5 @@ pub use backend::VercelBlobConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/vercel_blob/reader.rs b/core/src/services/vercel_blob/reader.rs new file mode 100644 index 000000000000..b43f4ec66f59 --- /dev/null +++ b/core/src/services/vercel_blob/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::VercelBlobCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct VercelBlobReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl VercelBlobReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + VercelBlobReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for VercelBlobReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.download(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/vercel_blob/writer.rs b/core/src/services/vercel_blob/writer.rs index c3bcbf87cc63..23938ed8a5f8 100644 --- a/core/src/services/vercel_blob/writer.rs +++ b/core/src/services/vercel_blob/writer.rs @@ -17,6 +17,7 @@ use std::sync::Arc; +use bytes::Buf; use http::StatusCode; use super::core::InitiateMultipartUploadResponse; @@ -53,10 +54,7 @@ impl oio::MultipartWrite for VercelBlobWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -71,10 +69,10 @@ impl oio::MultipartWrite for VercelBlobWriter { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)?; + let resp: InitiateMultipartUploadResponse = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(resp.upload_id) } @@ -100,10 +98,10 @@ impl oio::MultipartWrite for VercelBlobWriter { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)?; + let resp: UploadPartResponse = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(oio::MultipartPart { part_number, @@ -131,11 +129,7 @@ impl oio::MultipartWrite for VercelBlobWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index ed358b943f5c..998a478d540f 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -31,6 +31,7 @@ use super::error::parse_error; use super::lister::WebdavLister; use super::writer::WebdavWriter; use crate::raw::*; +use crate::services::webdav::reader::WebdavReader; use crate::*; /// Config for [WebDAV](https://datatracker.ietf.org/doc/html/rfc4918) backend support. @@ -237,7 +238,7 @@ impl Debug for WebdavBackend { #[async_trait] impl Accessor for WebdavBackend { - type Reader = IncomingAsyncBody; + type Reader = WebdavReader; type Writer = oio::OneShotWriter; type Lister = oio::PageLister; type BlockingReader = (); @@ -252,8 +253,6 @@ impl Accessor for WebdavBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_empty: true, @@ -285,23 +284,10 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.core.webdav_get(path, args).await?; - let status = resp.status(); - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + WebdavReader::new(self.core.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/webdav/core.rs b/core/src/services/webdav/core.rs index 9f24e5f0609f..301f57202564 100644 --- a/core/src/services/webdav/core.rs +++ b/core/src/services/webdav/core.rs @@ -116,9 +116,9 @@ impl WebdavCore { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let result: Multistatus = deserialize_multistatus(&bs)?; + let result: Multistatus = deserialize_multistatus(&bs.to_bytes())?; let propfind_resp = result.response.first().ok_or_else(|| { Error::new( ErrorKind::NotFound, @@ -133,8 +133,9 @@ impl WebdavCore { pub async fn webdav_get( &self, path: &str, - args: OpRead, - ) -> Result> { + range: BytesRange, + _: &OpRead, + ) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url: String = format!("{}{}", self.endpoint, percent_encode_path(&path)); @@ -144,7 +145,6 @@ impl WebdavCore { req = req.header(header::AUTHORIZATION, auth.clone()) } - let range = args.range(); if !range.is_full() { req = req.header(header::RANGE, range.to_header()); } @@ -162,7 +162,7 @@ impl WebdavCore { size: Option, args: &OpWrite, body: AsyncBody, - ) -> Result> { + ) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&path)); @@ -189,7 +189,7 @@ impl WebdavCore { self.client.send(req).await } - pub async fn webdav_delete(&self, path: &str) -> Result> { + pub async fn webdav_delete(&self, path: &str) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&path)); @@ -206,7 +206,7 @@ impl WebdavCore { self.client.send(req).await } - pub async fn webdav_copy(&self, from: &str, to: &str) -> Result> { + pub async fn webdav_copy(&self, from: &str, to: &str) -> Result> { // Check if source file exists. let _ = self.webdav_stat(from).await?; // Make sure target's dir is exist. @@ -234,7 +234,7 @@ impl WebdavCore { self.client.send(req).await } - pub async fn webdav_move(&self, from: &str, to: &str) -> Result> { + pub async fn webdav_move(&self, from: &str, to: &str) -> Result> { // Check if source file exists. let _ = self.webdav_stat(from).await?; // Make sure target's dir is exist. @@ -262,11 +262,7 @@ impl WebdavCore { self.client.send(req).await } - pub async fn webdav_list( - &self, - path: &str, - args: &OpList, - ) -> Result> { + pub async fn webdav_list(&self, path: &str, args: &OpList) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url = format!("{}{}", self.endpoint, percent_encode_path(&path)); @@ -357,7 +353,7 @@ impl WebdavCore { // The MKCOL method can only be performed on a deleted or non-existent resource. // This error means the directory already exists which is allowed by create_dir. | StatusCode::METHOD_NOT_ALLOWED => { - resp.into_body().consume().await?; + Ok(()) } _ => Err(parse_error(resp).await?), diff --git a/core/src/services/webdav/error.rs b/core/src/services/webdav/error.rs index 967dc19f52a6..930c136a13c6 100644 --- a/core/src/services/webdav/error.rs +++ b/core/src/services/webdav/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::Response; use http::StatusCode; @@ -24,9 +25,9 @@ use crate::ErrorKind; use crate::Result; /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), diff --git a/core/src/services/webdav/lister.rs b/core/src/services/webdav/lister.rs index f1749d522dd2..d94914dfe181 100644 --- a/core/src/services/webdav/lister.rs +++ b/core/src/services/webdav/lister.rs @@ -56,7 +56,7 @@ impl oio::PageList for WebdavLister { }; let bs = if resp.status().is_success() { - resp.into_body().bytes().await? + resp.into_body() } else if resp.status() == StatusCode::NOT_FOUND && self.path.ends_with('/') { ctx.done = true; return Ok(()); @@ -64,7 +64,7 @@ impl oio::PageList for WebdavLister { return Err(parse_error(resp).await?); }; - let result: Multistatus = deserialize_multistatus(&bs)?; + let result: Multistatus = deserialize_multistatus(&bs.to_bytes())?; for res in result.response { let mut path = res diff --git a/core/src/services/webdav/mod.rs b/core/src/services/webdav/mod.rs index 1e18871a98c8..455fbe3d2ddd 100644 --- a/core/src/services/webdav/mod.rs +++ b/core/src/services/webdav/mod.rs @@ -22,4 +22,5 @@ pub use backend::WebdavConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/webdav/reader.rs b/core/src/services/webdav/reader.rs new file mode 100644 index 000000000000..b7d6426b7d7d --- /dev/null +++ b/core/src/services/webdav/reader.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::StatusCode; + +use super::core::WebdavCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct WebdavReader { + core: Arc, + + path: String, + op: OpRead, +} + +impl WebdavReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + WebdavReader { + core, + path: path.to_string(), + op, + } + } +} + +impl oio::Read for WebdavReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.webdav_get(&self.path, range, &self.op).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/webdav/writer.rs b/core/src/services/webdav/writer.rs index 525928adcd52..f4755f54fa64 100644 --- a/core/src/services/webdav/writer.rs +++ b/core/src/services/webdav/writer.rs @@ -22,7 +22,6 @@ use http::StatusCode; use super::core::*; use super::error::parse_error; - use crate::raw::*; use crate::*; @@ -54,10 +53,7 @@ impl oio::OneShotWrite for WebdavWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK | StatusCode::NO_CONTENT => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK | StatusCode::NO_CONTENT => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index 7944b03e3c2f..a9d3dd7c0d50 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -19,6 +19,7 @@ use core::fmt::Debug; use std::collections::HashMap; use async_trait::async_trait; +use bytes::Buf; use http::header::CONTENT_LENGTH; use http::header::CONTENT_TYPE; use http::Request; @@ -29,7 +30,6 @@ use serde::Deserialize; use tokio::sync::OnceCell; use super::error::parse_error; -use super::error::parse_error_msg; use super::lister::WebhdfsLister; use super::message::BooleanResp; use super::message::FileStatusType; @@ -37,6 +37,7 @@ use super::message::FileStatusWrapper; use super::writer::WebhdfsWriter; use super::writer::WebhdfsWriters; use crate::raw::*; +use crate::services::webhdfs::reader::WebhdfsReader; use crate::*; const WEBHDFS_DEFAULT_ENDPOINT: &str = "http://127.0.0.1:9870"; @@ -267,10 +268,10 @@ impl WebhdfsBackend { return Err(parse_error(resp).await?); } - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp = - serde_json::from_slice::(&bs).map_err(new_json_deserialize_error)?; + let resp: LocationResponse = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; let mut req = Request::put(&resp.location); @@ -307,9 +308,9 @@ impl WebhdfsBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let resp: LocationResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; Ok(resp.location) } @@ -321,7 +322,7 @@ impl WebhdfsBackend { &self, from: &str, to: &str, - ) -> Result> { + ) -> Result> { let from = build_abs_path(&self.root, from); let to = build_rooted_abs_path(&self.root, to); @@ -408,17 +409,7 @@ impl WebhdfsBackend { } if !range.is_full() { - // Webhdfs does not support read from end - if range.offset().is_none() && range.size().is_some() { - return Err(Error::new( - ErrorKind::Unsupported, - "webhdfs doesn't support read with suffix range", - )); - }; - - if let Some(offset) = range.offset() { - url += &format!("&offset={offset}"); - } + url += &format!("&offset={}", range.offset()); if let Some(size) = range.size() { url += &format!("&length={size}") } @@ -431,10 +422,7 @@ impl WebhdfsBackend { Ok(req) } - pub async fn webhdfs_list_status_request( - &self, - path: &str, - ) -> Result> { + pub async fn webhdfs_list_status_request(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( "{}/webhdfs/v1/{}?op=LISTSTATUS", @@ -455,7 +443,7 @@ impl WebhdfsBackend { &self, path: &str, start_after: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( @@ -476,11 +464,11 @@ impl WebhdfsBackend { self.client.send(req).await } - async fn webhdfs_read_file( + pub async fn webhdfs_read_file( &self, path: &str, range: BytesRange, - ) -> Result> { + ) -> Result> { let req = self.webhdfs_open_request(path, &range).await?; self.client.send(req).await } @@ -488,7 +476,7 @@ impl WebhdfsBackend { pub(super) async fn webhdfs_get_file_status( &self, path: &str, - ) -> Result> { + ) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( "{}/webhdfs/v1/{}?op=GETFILESTATUS", @@ -507,7 +495,7 @@ impl WebhdfsBackend { self.client.send(req).await } - pub async fn webhdfs_delete(&self, path: &str) -> Result> { + pub async fn webhdfs_delete(&self, path: &str) -> Result> { let p = build_abs_path(&self.root, path); let mut url = format!( "{}/webhdfs/v1/{}?op=DELETE&recursive=false", @@ -529,9 +517,9 @@ impl WebhdfsBackend { let resp = self.webhdfs_get_file_status("/").await?; match resp.status() { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let file_status = serde_json::from_slice::(&bs) + let file_status = serde_json::from_reader::<_, FileStatusWrapper>(bs.reader()) .map_err(new_json_deserialize_error)? .file_status; @@ -553,7 +541,7 @@ impl WebhdfsBackend { #[async_trait] impl Accessor for WebhdfsBackend { - type Reader = IncomingAsyncBody; + type Reader = WebhdfsReader; type Writer = WebhdfsWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -568,8 +556,6 @@ impl Accessor for WebhdfsBackend { stat: true, read: true, - read_can_next: true, - read_with_range: true, write: true, write_can_append: true, @@ -599,9 +585,9 @@ impl Accessor for WebhdfsBackend { // the redirection should be done automatically. match status { StatusCode::CREATED | StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let resp = serde_json::from_slice::(&bs) + let resp = serde_json::from_reader::<_, BooleanResp>(bs.reader()) .map_err(new_json_deserialize_error)?; if resp.boolean { @@ -627,9 +613,9 @@ impl Accessor for WebhdfsBackend { let status = resp.status(); match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); - let file_status = serde_json::from_slice::(&bs) + let file_status = serde_json::from_reader::<_, FileStatusWrapper>(bs.reader()) .map_err(new_json_deserialize_error)? .file_status; @@ -650,34 +636,10 @@ impl Accessor for WebhdfsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let range = args.range(); - let resp = self.webhdfs_read_file(path, range).await?; - match resp.status() { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - // WebHDFS will returns 403 when range is outside of the end. - StatusCode::FORBIDDEN => { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; - let s = String::from_utf8_lossy(&bs); - if s.contains("out of the range") { - Ok((RpRead::new(), IncomingAsyncBody::empty())) - } else { - Err(parse_error_msg(parts, &s)?) - } - } - StatusCode::RANGE_NOT_SATISFIABLE => { - resp.into_body().consume().await?; - Ok((RpRead::new().with_size(Some(0)), IncomingAsyncBody::empty())) - } - _ => Err(parse_error(resp).await?), - } + Ok(( + RpRead::default(), + WebhdfsReader::new(self.clone(), path, args), + )) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -696,10 +658,7 @@ impl Accessor for WebhdfsBackend { let resp = self.webhdfs_delete(path).await?; match resp.status() { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(RpDelete::default()) - } + StatusCode::OK => Ok(RpDelete::default()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webhdfs/error.rs b/core/src/services/webhdfs/error.rs index d243b84811d1..971997a7be8b 100644 --- a/core/src/services/webhdfs/error.rs +++ b/core/src/services/webhdfs/error.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::response::Parts; use http::Response; use http::StatusCode; @@ -39,9 +40,9 @@ struct WebHdfsError { java_class_name: String, } -pub(super) async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub(super) async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let s = String::from_utf8_lossy(&bs); parse_error_msg(parts, &s) } @@ -79,7 +80,6 @@ pub(super) fn parse_error_msg(parts: Parts, body: &str) -> Result { #[cfg(test)] mod tests { use bytes::Buf; - use futures::stream; use serde_json::from_reader; use super::*; @@ -99,10 +99,7 @@ mod tests { } "#, ); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(ill_args.clone())]))), - None, - ); + let body = oio::Buffer::from(ill_args.clone()); let resp = Response::builder() .status(StatusCode::BAD_REQUEST) .body(body) diff --git a/core/src/services/webhdfs/lister.rs b/core/src/services/webhdfs/lister.rs index cc7db189f257..fe758c4142a4 100644 --- a/core/src/services/webhdfs/lister.rs +++ b/core/src/services/webhdfs/lister.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use bytes::Buf; use http::StatusCode; use super::backend::WebhdfsBackend; @@ -45,8 +46,8 @@ impl oio::PageList for WebhdfsLister { StatusCode::OK => { ctx.done = true; - let bs = resp.into_body().bytes().await?; - serde_json::from_slice::(&bs) + let bs = resp.into_body(); + serde_json::from_reader::<_, FileStatusesWrapper>(bs.reader()) .map_err(new_json_deserialize_error)? .file_statuses .file_status @@ -64,10 +65,10 @@ impl oio::PageList for WebhdfsLister { .await?; match resp.status() { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; - let directory_listing = serde_json::from_slice::(&bs) - .map_err(new_json_deserialize_error)? - .directory_listing; + let bs = resp.into_body(); + let res: DirectoryListingWrapper = + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; + let directory_listing = res.directory_listing; let file_statuses = directory_listing.partial_listing.file_statuses.file_status; if directory_listing.remaining_entries == 0 { diff --git a/core/src/services/webhdfs/mod.rs b/core/src/services/webhdfs/mod.rs index 3bfbbc77dd14..5118af268109 100644 --- a/core/src/services/webhdfs/mod.rs +++ b/core/src/services/webhdfs/mod.rs @@ -21,4 +21,5 @@ pub use backend::WebhdfsBuilder as Webhdfs; mod error; mod lister; mod message; +mod reader; mod writer; diff --git a/core/src/services/webhdfs/reader.rs b/core/src/services/webhdfs/reader.rs new file mode 100644 index 000000000000..76786398f878 --- /dev/null +++ b/core/src/services/webhdfs/reader.rs @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use bytes::Buf; +use http::StatusCode; + +use super::error::parse_error; +use super::error::parse_error_msg; +use crate::raw::*; +use crate::services::webhdfs::backend::WebhdfsBackend; + +pub struct WebhdfsReader { + core: WebhdfsBackend, + + path: String, + _op: OpRead, +} + +impl WebhdfsReader { + pub fn new(core: WebhdfsBackend, path: &str, op: OpRead) -> Self { + WebhdfsReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for WebhdfsReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + let resp = self.core.webhdfs_read_file(&self.path, range).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + // WebHDFS will returns 403 when range is outside of the end. + StatusCode::FORBIDDEN => { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); + let s = String::from_utf8_lossy(&bs); + if s.contains("out of the range") { + Ok(oio::Buffer::new()) + } else { + Err(parse_error_msg(parts, &s)?) + } + } + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/webhdfs/writer.rs b/core/src/services/webhdfs/writer.rs index 02757a57afb9..df553ac10fbb 100644 --- a/core/src/services/webhdfs/writer.rs +++ b/core/src/services/webhdfs/writer.rs @@ -50,10 +50,7 @@ impl oio::BlockWrite for WebhdfsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -79,10 +76,7 @@ impl oio::BlockWrite for WebhdfsWriter { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED | StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -129,10 +123,7 @@ impl oio::BlockWrite for WebhdfsWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } @@ -141,9 +132,7 @@ impl oio::BlockWrite for WebhdfsWriter { for block_id in block_ids { let resp = self.backend.webhdfs_delete(&block_id.to_string()).await?; match resp.status() { - StatusCode::OK => { - resp.into_body().consume().await?; - } + StatusCode::OK => {} _ => return Err(parse_error(resp).await?), } } @@ -179,8 +168,6 @@ impl oio::AppendWrite for WebhdfsWriter { match status { StatusCode::CREATED | StatusCode::OK => { - resp.into_body().consume().await?; - location = self.backend.webhdfs_init_append_request(&self.path).await?; } _ => return Err(parse_error(resp).await?), @@ -198,10 +185,7 @@ impl oio::AppendWrite for WebhdfsWriter { let status = resp.status(); match status { - StatusCode::OK => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::OK => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/yandex_disk/backend.rs b/core/src/services/yandex_disk/backend.rs index d581e8fd4d90..90630284396c 100644 --- a/core/src/services/yandex_disk/backend.rs +++ b/core/src/services/yandex_disk/backend.rs @@ -21,7 +21,7 @@ use std::fmt::Formatter; use std::sync::Arc; use async_trait::async_trait; -use http::Request; +use bytes::Buf; use http::StatusCode; use log::debug; use serde::Deserialize; @@ -32,6 +32,7 @@ use super::lister::YandexDiskLister; use super::writer::YandexDiskWriter; use super::writer::YandexDiskWriters; use crate::raw::*; +use crate::services::yandex_disk::reader::YandexDiskReader; use crate::*; /// Config for backblaze YandexDisk services support. @@ -179,7 +180,7 @@ pub struct YandexDiskBackend { #[async_trait] impl Accessor for YandexDiskBackend { - type Reader = IncomingAsyncBody; + type Reader = YandexDiskReader; type Writer = YandexDiskWriters; type Lister = oio::PageLister; type BlockingReader = (); @@ -227,11 +228,7 @@ impl Accessor for YandexDiskBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - - Ok(RpRename::default()) - } + StatusCode::OK | StatusCode::CREATED => Ok(RpRename::default()), _ => Err(parse_error(resp).await?), } } @@ -244,36 +241,16 @@ impl Accessor for YandexDiskBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::CREATED => { - resp.into_body().consume().await?; - - Ok(RpCopy::default()) - } + StatusCode::OK | StatusCode::CREATED => Ok(RpCopy::default()), _ => Err(parse_error(resp).await?), } } - async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - let download_url = self.core.get_download_url(path).await?; - - let req = Request::get(download_url) - .body(AsyncBody::Empty) - .map_err(new_request_build_error)?; - let resp = self.core.send(req).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => { - let size = parse_content_length(resp.headers())?; - let range = parse_content_range(resp.headers())?; - Ok(( - RpRead::new().with_size(size).with_range(range), - resp.into_body(), - )) - } - _ => Err(parse_error(resp).await?), - } + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + Ok(( + RpRead::default(), + YandexDiskReader::new(self.core.clone(), path, args), + )) } async fn stat(&self, path: &str, _args: OpStat) -> Result { @@ -283,10 +260,10 @@ impl Accessor for YandexDiskBackend { match status { StatusCode::OK => { - let bs = resp.into_body().bytes().await?; + let bs = resp.into_body(); let mf: MetainformationResponse = - serde_json::from_slice(&bs).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?; parse_info(mf).map(RpStat::new) } diff --git a/core/src/services/yandex_disk/core.rs b/core/src/services/yandex_disk/core.rs index f206a52c0eb9..7c76f4d47f2a 100644 --- a/core/src/services/yandex_disk/core.rs +++ b/core/src/services/yandex_disk/core.rs @@ -18,6 +18,7 @@ use std::fmt::Debug; use std::fmt::Formatter; +use bytes::Buf; use http::header; use http::request; use http::Request; @@ -49,7 +50,7 @@ impl Debug for YandexDiskCore { impl YandexDiskCore { #[inline] - pub async fn send(&self, req: Request) -> Result> { + pub async fn send(&self, req: Request) -> Result> { self.client.send(req).await } @@ -87,10 +88,10 @@ impl YandexDiskCore { match status { StatusCode::OK => { - let bytes = resp.into_body().bytes().await?; + let bytes = resp.into_body(); let resp: GetUploadUrlResponse = - serde_json::from_slice(&bytes).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; Ok(resp.href) } @@ -121,10 +122,10 @@ impl YandexDiskCore { match status { StatusCode::OK => { - let bytes = resp.into_body().bytes().await?; + let bytes = resp.into_body(); let resp: GetUploadUrlResponse = - serde_json::from_slice(&bytes).map_err(new_json_deserialize_error)?; + serde_json::from_reader(bytes.reader()).map_err(new_json_deserialize_error)?; Ok(resp.href) } @@ -144,16 +145,14 @@ impl YandexDiskCore { let status = resp.status(); match status { - StatusCode::CREATED | StatusCode::CONFLICT => { - resp.into_body().consume().await?; - } + StatusCode::CREATED | StatusCode::CONFLICT => {} _ => return Err(parse_error(resp).await?), } } Ok(()) } - pub async fn create_dir(&self, path: &str) -> Result> { + pub async fn create_dir(&self, path: &str) -> Result> { let url = format!( "https://cloud-api.yandex.net/v1/disk/resources?path=/{}", percent_encode_path(path), @@ -171,7 +170,7 @@ impl YandexDiskCore { self.send(req).await } - pub async fn copy(&self, from: &str, to: &str) -> Result> { + pub async fn copy(&self, from: &str, to: &str) -> Result> { let from = build_rooted_abs_path(&self.root, from); let to = build_rooted_abs_path(&self.root, to); @@ -193,7 +192,7 @@ impl YandexDiskCore { self.send(req).await } - pub async fn move_object(&self, from: &str, to: &str) -> Result> { + pub async fn move_object(&self, from: &str, to: &str) -> Result> { let from = build_rooted_abs_path(&self.root, from); let to = build_rooted_abs_path(&self.root, to); @@ -215,7 +214,7 @@ impl YandexDiskCore { self.send(req).await } - pub async fn delete(&self, path: &str) -> Result> { + pub async fn delete(&self, path: &str) -> Result> { let path = build_rooted_abs_path(&self.root, path); let url = format!( @@ -240,7 +239,7 @@ impl YandexDiskCore { path: &str, limit: Option, offset: Option, - ) -> Result> { + ) -> Result> { let path = build_rooted_abs_path(&self.root, path); let mut url = format!( diff --git a/core/src/services/yandex_disk/error.rs b/core/src/services/yandex_disk/error.rs index 05020e64d799..38e6e3c72f8b 100644 --- a/core/src/services/yandex_disk/error.rs +++ b/core/src/services/yandex_disk/error.rs @@ -35,9 +35,9 @@ struct YandexDiskError { } /// Parse error response into Error. -pub async fn parse_error(resp: Response) -> Result { - let (parts, body) = resp.into_parts(); - let bs = body.bytes().await?; +pub async fn parse_error(resp: Response) -> Result { + let (parts, mut body) = resp.into_parts(); + let bs = body.copy_to_bytes(body.remaining()); let (kind, retryable) = match parts.status.as_u16() { 400 => (ErrorKind::InvalidInput, false), @@ -67,7 +67,6 @@ pub async fn parse_error(resp: Response) -> Result { #[cfg(test)] mod test { - use futures::stream; use http::StatusCode; use super::*; @@ -97,10 +96,7 @@ mod test { for res in err_res { let bs = bytes::Bytes::from(res.0); - let body = IncomingAsyncBody::new( - Box::new(oio::into_stream(stream::iter(vec![Ok(bs.clone())]))), - None, - ); + let body = oio::Buffer::from(bs); let resp = Response::builder().status(res.2).body(body).unwrap(); let err = parse_error(resp).await; diff --git a/core/src/services/yandex_disk/lister.rs b/core/src/services/yandex_disk/lister.rs index 9eaec4535778..4bdd8034cf54 100644 --- a/core/src/services/yandex_disk/lister.rs +++ b/core/src/services/yandex_disk/lister.rs @@ -17,6 +17,8 @@ use std::sync::Arc; +use bytes::Buf; + use super::core::parse_info; use super::core::MetainformationResponse; use super::core::YandexDiskCore; @@ -62,10 +64,10 @@ impl oio::PageList for YandexDiskLister { match resp.status() { http::StatusCode::OK => { - let body = resp.into_body().bytes().await?; + let body = resp.into_body(); let resp: MetainformationResponse = - serde_json::from_slice(&body).map_err(new_json_deserialize_error)?; + serde_json::from_reader(body.reader()).map_err(new_json_deserialize_error)?; if let Some(embedded) = resp.embedded { let n = embedded.items.len(); diff --git a/core/src/services/yandex_disk/mod.rs b/core/src/services/yandex_disk/mod.rs index e2f2aff44888..606f8f816109 100644 --- a/core/src/services/yandex_disk/mod.rs +++ b/core/src/services/yandex_disk/mod.rs @@ -22,4 +22,5 @@ pub use backend::YandexDiskConfig; mod core; mod error; mod lister; +mod reader; mod writer; diff --git a/core/src/services/yandex_disk/reader.rs b/core/src/services/yandex_disk/reader.rs new file mode 100644 index 000000000000..ec9f3ffd437e --- /dev/null +++ b/core/src/services/yandex_disk/reader.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use http::header; +use http::Request; +use http::StatusCode; + +use super::core::YandexDiskCore; +use super::error::parse_error; +use crate::raw::*; + +pub struct YandexDiskReader { + core: Arc, + + path: String, + _op: OpRead, +} + +impl YandexDiskReader { + pub fn new(core: Arc, path: &str, op: OpRead) -> Self { + YandexDiskReader { + core, + path: path.to_string(), + _op: op, + } + } +} + +impl oio::Read for YandexDiskReader { + async fn read_at(&self, offset: u64, limit: usize) -> crate::Result { + let range = BytesRange::new(offset, Some(limit as u64)); + + // TODO: move this out of reader. + let download_url = self.core.get_download_url(&self.path).await?; + + let req = Request::get(download_url) + .header(header::RANGE, range.to_header()) + .body(AsyncBody::Empty) + .map_err(new_request_build_error)?; + let resp = self.core.send(req).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok(resp.into_body()), + StatusCode::RANGE_NOT_SATISFIABLE => Ok(oio::Buffer::new()), + _ => Err(parse_error(resp).await?), + } + } +} diff --git a/core/src/services/yandex_disk/writer.rs b/core/src/services/yandex_disk/writer.rs index 7495f59ce799..0420809d3b79 100644 --- a/core/src/services/yandex_disk/writer.rs +++ b/core/src/services/yandex_disk/writer.rs @@ -54,10 +54,7 @@ impl oio::OneShotWrite for YandexDiskWriter { let status = resp.status(); match status { - StatusCode::CREATED => { - resp.into_body().consume().await?; - Ok(()) - } + StatusCode::CREATED => Ok(()), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/types/blocking_reader.rs b/core/src/types/blocking_reader.rs new file mode 100644 index 000000000000..1cac547c7d08 --- /dev/null +++ b/core/src/types/blocking_reader.rs @@ -0,0 +1,306 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::Bound; +use std::ops::{Range, RangeBounds}; + +use bytes::Buf; +use bytes::BufMut; + +use crate::raw::oio::BlockingRead; +use crate::raw::*; +use crate::*; + +/// BlockingReader is designed to read data from given path in an blocking +/// manner. +pub struct BlockingReader { + pub(crate) inner: oio::BlockingReader, +} + +impl BlockingReader { + /// Create a new blocking reader. + /// + /// Create will use internal information to decide the most suitable + /// implementation for users. + /// + /// We don't want to expose those details to users so keep this function + /// in crate only. + pub(crate) fn create(acc: FusedAccessor, path: &str, op: OpRead) -> crate::Result { + let (_, r) = acc.blocking_read(path, op)?; + + Ok(BlockingReader { inner: r }) + } + + /// Read from underlying storage and write data into the specified buffer, starting at + /// the given offset and up to the limit. + /// + /// A return value of `n` signifies that `n` bytes of data have been read into `buf`. + /// If `n < limit`, it indicates that the reader has reached EOF (End of File). + #[inline] + pub fn read(&self, buf: &mut impl BufMut, offset: u64, limit: usize) -> Result { + let bs = self.inner.read_at(offset, limit)?; + let n = bs.remaining(); + buf.put(bs); + Ok(n) + } + + /// Read given range bytes of data from reader. + pub fn read_range(&self, buf: &mut impl BufMut, range: impl RangeBounds) -> Result { + let start = match range.start_bound().cloned() { + Bound::Included(start) => start, + Bound::Excluded(start) => start + 1, + Bound::Unbounded => 0, + }; + + let end = match range.end_bound().cloned() { + Bound::Included(end) => Some(end + 1), + Bound::Excluded(end) => Some(end), + Bound::Unbounded => None, + }; + + // If range is empty, return Ok(0) directly. + if let Some(end) = end { + if end <= start { + return Ok(0); + } + } + + let mut offset = start; + let mut size = end.map(|end| end - start); + + let mut read = 0; + loop { + let bs = self + .inner + // TODO: use service preferred io size instead. + .read_at(offset, size.unwrap_or(4 * 1024 * 1024) as usize)?; + let n = bs.remaining(); + read += n; + buf.put(bs); + if n == 0 { + return Ok(read); + } + + offset += n as u64; + + size = size.map(|v| v - n as u64); + if size == Some(0) { + return Ok(read); + } + } + } + + /// Read all data from reader. + /// + /// This API is exactly the same with `BlockingReader::read_range(buf, ..)`. + #[inline] + pub fn read_to_end(&self, buf: &mut impl BufMut) -> Result { + self.read_range(buf, ..) + } + + /// Convert reader into [`FuturesIoAsyncReader`] which implements [`futures::AsyncRead`], + /// [`futures::AsyncSeek`] and [`futures::AsyncBufRead`]. + #[inline] + pub fn into_std_io_read(self, range: Range) -> StdIoReader { + // TODO: the capacity should be decided by services. + StdIoReader::new(self.inner, range) + } + + /// Convert reader into [`FuturesBytesStream`] which implements [`futures::Stream`], + /// [`futures::AsyncSeek`] and [`futures::AsyncBufRead`]. + #[inline] + pub fn into_std_bytes_iterator(self, range: Range) -> StdBytesIterator { + StdBytesIterator::new(self.inner, range) + } +} + +pub mod into_std_read { + use crate::raw::{format_std_io_error, oio}; + use bytes::Buf; + use std::io; + use std::io::Read; + use std::io::Seek; + use std::io::{BufRead, SeekFrom}; + use std::ops::Range; + + /// StdReader is the adapter of [`Read`], [`Seek`] and [`BufRead`] for [`BlockingReader`][crate::BlockingReader]. + /// + /// Users can use this adapter in cases where they need to use [`Read`] or [`BufRead`] trait. + /// + /// StdReader also implements [`Send`] and [`Sync`]. + pub struct StdIoReader { + inner: oio::BlockingReader, + offset: u64, + size: u64, + cap: usize, + + cur: u64, + buf: oio::Buffer, + } + + impl StdIoReader { + /// NOTE: don't allow users to create StdReader directly. + #[inline] + pub(super) fn new(r: oio::BlockingReader, range: Range) -> Self { + StdIoReader { + inner: r, + offset: range.start, + size: range.end - range.start, + // TODO: should use services preferred io size. + cap: 4 * 1024 * 1024, + + cur: 0, + buf: oio::Buffer::new(), + } + } + + /// Set the capacity of this reader to control the IO size. + pub fn with_capacity(mut self, cap: usize) -> Self { + self.cap = cap; + self + } + } + + impl BufRead for StdIoReader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + if self.buf.has_remaining() { + return Ok(self.buf.chunk()); + } + + // Make sure cur didn't exceed size. + if self.cur >= self.size { + return Ok(&[]); + } + + let next_offset = self.offset + self.cur; + let next_size = (self.size - self.cur).min(self.cap as u64) as usize; + self.buf = self + .inner + .read_at(next_offset, next_size) + .map_err(format_std_io_error)?; + Ok(self.buf.chunk()) + } + + fn consume(&mut self, amt: usize) { + self.buf.advance(amt); + self.cur += amt as u64; + } + } + + impl Read for StdIoReader { + #[inline] + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let bs = self.fill_buf()?; + let n = bs.len().min(buf.len()); + buf[..n].copy_from_slice(&bs[..n]); + self.consume(n); + Ok(n) + } + } + + impl Seek for StdIoReader { + #[inline] + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let new_pos = match pos { + SeekFrom::Start(pos) => pos as i64, + SeekFrom::End(pos) => self.size as i64 + pos, + SeekFrom::Current(pos) => self.cur as i64 + pos, + }; + + if new_pos < 0 { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "invalid seek to a negative position", + )); + } + + let new_pos = new_pos as u64; + + if (self.cur..self.cur + self.buf.remaining() as u64).contains(&new_pos) { + let cnt = new_pos - self.cur; + self.buf.advance(cnt as _); + } else { + self.buf = oio::Buffer::new() + } + + self.cur = new_pos; + Ok(self.cur) + } + } +} + +pub mod into_std_iterator { + use crate::raw::*; + use bytes::{Buf, Bytes}; + use std::io; + + /// StdIterator is the adapter of [`Iterator`] for [`BlockingReader`][crate::BlockingReader]. + /// + /// Users can use this adapter in cases where they need to use [`Iterator`] trait. + /// + /// StdIterator also implements [`Send`] and [`Sync`]. + pub struct StdBytesIterator { + inner: oio::BlockingReader, + offset: u64, + size: u64, + cap: usize, + + cur: u64, + } + + impl StdBytesIterator { + /// NOTE: don't allow users to create StdIterator directly. + #[inline] + pub(crate) fn new(r: oio::BlockingReader, range: std::ops::Range) -> Self { + StdBytesIterator { + inner: r, + offset: range.start, + size: range.end - range.start, + // TODO: should use services preferred io size. + cap: 4 * 1024 * 1024, + cur: 0, + } + } + + /// Set the capacity of this reader to control the IO size. + pub fn with_capacity(mut self, cap: usize) -> Self { + self.cap = cap; + self + } + } + + impl Iterator for StdBytesIterator { + type Item = io::Result; + + fn next(&mut self) -> Option { + if self.cur >= self.size { + return None; + } + + let next_offset = self.offset + self.cur; + let next_size = (self.size - self.cur).min(self.cap as u64) as usize; + match self.inner.read_at(next_offset, next_size) { + Ok(buf) if !buf.has_remaining() => None, + Ok(mut buf) => { + self.cur += buf.remaining() as u64; + Some(Ok(buf.copy_to_bytes(buf.remaining()))) + } + Err(err) => Some(Err(format_std_io_error(err))), + } + } + } +} diff --git a/core/src/types/capability.rs b/core/src/types/capability.rs index ca0936b09521..45d53682a350 100644 --- a/core/src/types/capability.rs +++ b/core/src/types/capability.rs @@ -70,12 +70,6 @@ pub struct Capability { /// If operator supports read. pub read: bool, - /// If operator supports seek on returning reader. - pub read_can_seek: bool, - /// If operator supports next on returning reader. - pub read_can_next: bool, - /// If operator supports read with range. - pub read_with_range: bool, /// If operator supports read with if match. pub read_with_if_match: bool, /// If operator supports read with if none match. diff --git a/core/src/types/list.rs b/core/src/types/list.rs index 141a13bd70fd..d02f576a81ca 100644 --- a/core/src/types/list.rs +++ b/core/src/types/list.rs @@ -283,6 +283,7 @@ impl Iterator for BlockingLister { } #[cfg(test)] +#[cfg(feature = "services-azblob")] mod tests { use futures::future; use futures::StreamExt; diff --git a/core/src/types/mod.rs b/core/src/types/mod.rs index 0f75fa50c842..daf1a25b9d51 100644 --- a/core/src/types/mod.rs +++ b/core/src/types/mod.rs @@ -26,9 +26,15 @@ pub use metadata::Metadata; pub use metadata::Metakey; mod reader; -pub use reader::BlockingReader; +pub use reader::into_futures_async_read::FuturesIoAsyncReader; +pub use reader::into_futures_stream::FuturesBytesStream; pub use reader::Reader; +mod blocking_reader; +pub use blocking_reader::into_std_iterator::StdBytesIterator; +pub use blocking_reader::into_std_read::StdIoReader; +pub use blocking_reader::BlockingReader; + mod writer; pub use writer::BlockingWriter; pub use writer::Writer; @@ -57,4 +63,5 @@ mod scheme; pub use scheme::Scheme; mod capability; + pub use capability::Capability; diff --git a/core/src/types/operator/blocking_operator.rs b/core/src/types/operator/blocking_operator.rs index 06576986d3ee..bfb592443402 100644 --- a/core/src/types/operator/blocking_operator.rs +++ b/core/src/types/operator/blocking_operator.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -use bytes::{Buf, Bytes}; +use bytes::Buf; +use bytes::Bytes; use super::operator_functions::*; use crate::raw::*; @@ -384,8 +385,8 @@ impl BlockingOperator { FunctionRead(OperatorFunction::new( self.inner().clone(), path, - OpRead::default(), - |inner, path, args| { + (OpRead::default(), BytesRange::default()), + |inner, path, (args, range)| { if !validate_path(&path, EntryMode::FILE) { return Err( Error::new(ErrorKind::IsADirectory, "read path is a directory") @@ -395,23 +396,11 @@ impl BlockingOperator { ); } - let range = args.range(); - let (size_hint, range) = if let Some(size) = range.size() { - (size, range) - } else { - let size = inner - .blocking_stat(&path, OpStat::default())? - .into_metadata() - .content_length(); - let range = range.complete(size); - (range.size().unwrap(), range) - }; - - let (_, r) = inner.blocking_read(&path, args.with_range(range))?; - let mut r = BlockingReader::new(r); - let mut buf = Vec::with_capacity(size_hint as usize); - r.read_to_end(&mut buf)?; + let size_hint = range.size(); + let r = BlockingReader::create(inner, &path, args)?; + let mut buf = Vec::with_capacity(size_hint.unwrap_or_default() as _); + r.read_range(&mut buf, range.to_range())?; Ok(buf) }, )) @@ -444,7 +433,7 @@ impl BlockingOperator { /// use opendal::EntryMode; /// use opendal::Metakey; /// # fn test(op: BlockingOperator) -> Result<()> { - /// let r = op.reader_with("path/to/file").range(0..10).call()?; + /// let r = op.reader_with("path/to/file").version("version_id").call()?; /// # Ok(()) /// # } /// ``` diff --git a/core/src/types/operator/operator.rs b/core/src/types/operator/operator.rs index aa12f8e74082..3f3e8fecc0a5 100644 --- a/core/src/types/operator/operator.rs +++ b/core/src/types/operator/operator.rs @@ -500,8 +500,8 @@ impl Operator { OperatorFuture::new( self.inner().clone(), path, - OpRead::default(), - |inner, path, args| async move { + (OpRead::default(), BytesRange::default()), + |inner, path, (args, range)| async move { if !validate_path(&path, EntryMode::FILE) { return Err( Error::new(ErrorKind::IsADirectory, "read path is a directory") @@ -511,24 +511,11 @@ impl Operator { ); } - let range = args.range(); - let (size_hint, range) = if let Some(size) = range.size() { - (size, range) - } else { - let size = inner - .stat(&path, OpStat::default()) - .await? - .into_metadata() - .content_length(); - let range = range.complete(size); - (range.size().unwrap(), range) - }; - - let (_, r) = inner.read(&path, args.with_range(range)).await?; - let mut r = Reader::new(r); - let mut buf = Vec::with_capacity(size_hint as usize); - r.read_to_end(&mut buf).await?; + let size_hint = range.size(); + let r = Reader::create(inner, &path, args).await?; + let mut buf = Vec::with_capacity(size_hint.unwrap_or_default() as _); + r.read_range(&mut buf, range.to_range()).await?; Ok(buf) }, ) @@ -570,86 +557,6 @@ impl Operator { /// /// # Options /// - /// ## `range` - /// - /// Set `range` for this `read` request. - /// - /// If we have a file with size `n`. - /// - /// - `..` means read bytes in range `[0, n)` of file. - /// - `0..1024` means read bytes in range `[0, 1024)` of file - /// - `1024..` means read bytes in range `[1024, n)` of file - /// - `..1024` means read bytes in range `(n - 1024, n)` of file - /// - /// ```no_run - /// # use opendal::Result; - /// # use opendal::Operator; - /// # use futures::TryStreamExt; - /// # async fn test(op: Operator) -> Result<()> { - /// let bs = op.reader_with("path/to/file").range(0..1024).await?; - /// # Ok(()) - /// # } - /// ``` - /// - /// ## `buffer` - /// - /// Set `buffer` for the reader. - /// - /// OpenDAL by default to read file without buffer. This is not efficient for cases like `seek` - /// after read or reading file with small chunks. To improve performance, we can set a buffer. - /// - /// The following example will create a reader with 4 MiB buffer internally. All seek operations - /// happened in buffered data will be zero cost. - /// - /// ```no_run - /// # use opendal::Result; - /// # use opendal::Operator; - /// # use futures::TryStreamExt; - /// # async fn test(op: Operator) -> Result<()> { - /// let bs = op - /// .reader_with("path/to/file") - /// .buffer(4 * 1024 * 1024) - /// .await?; - /// # Ok(()) - /// # } - /// ``` - /// - /// ## `if_match` - /// - /// Set `if_match` for this `read` request. - /// - /// This feature can be used to check if the file's `ETag` matches the given `ETag`. - /// - /// If file exists and it's etag doesn't match, an error with kind [`ErrorKind::ConditionNotMatch`] - /// will be returned. - /// - /// ```no_run - /// # use opendal::Result; - /// use opendal::Operator; - /// # async fn test(op: Operator, etag: &str) -> Result<()> { - /// let mut metadata = op.reader_with("path/to/file").if_match(etag).await?; - /// # Ok(()) - /// # } - /// ``` - /// - /// ## `if_none_match` - /// - /// Set `if_none_match` for this `read` request. - /// - /// This feature can be used to check if the file's `ETag` doesn't match the given `ETag`. - /// - /// If file exists and it's etag match, an error with kind [`ErrorKind::ConditionNotMatch`] - /// will be returned. - /// - /// ```no_run - /// # use opendal::Result; - /// use opendal::Operator; - /// # async fn test(op: Operator, etag: &str) -> Result<()> { - /// let mut metadata = op.reader_with("path/to/file").if_none_match(etag).await?; - /// # Ok(()) - /// # } - /// ``` - /// /// # Examples /// /// ```no_run @@ -657,11 +564,11 @@ impl Operator { /// # use opendal::Operator; /// # use opendal::Scheme; /// # async fn test(op: Operator) -> Result<()> { - /// let r = op.reader_with("path/to/file").range(0..10).await?; + /// let r = op.reader_with("path/to/file").version("version_id").await?; /// # Ok(()) /// # } /// ``` - pub fn reader_with(&self, path: &str) -> FutureRead>> { + pub fn reader_with(&self, path: &str) -> FutureReader>> { let path = normalize_path(path); OperatorFuture::new( diff --git a/core/src/types/operator/operator_functions.rs b/core/src/types/operator/operator_functions.rs index 07644e2089e5..1e40f78d41b0 100644 --- a/core/src/types/operator/operator_functions.rs +++ b/core/src/types/operator/operator_functions.rs @@ -320,12 +320,12 @@ impl FunctionLister { /// Function that generated by [`BlockingOperator::read_with`]. /// /// Users can add more options by public functions provided by this struct. -pub struct FunctionRead(pub(crate) OperatorFunction>); +pub struct FunctionRead(pub(crate) OperatorFunction<(OpRead, BytesRange), Vec>); impl FunctionRead { /// Set the range for this operation. pub fn range(mut self, range: impl RangeBounds) -> Self { - self.0 = self.0.map_args(|args| args.with_range(range.into())); + self.0 = self.0.map_args(|(args, _)| (args, range.into())); self } @@ -342,12 +342,6 @@ impl FunctionRead { pub struct FunctionReader(pub(crate) OperatorFunction); impl FunctionReader { - /// Set the range for this operation. - pub fn range(mut self, range: impl RangeBounds) -> Self { - self.0 = self.0.map_args(|args| args.with_range(range.into())); - self - } - /// Sets the content-disposition header that should be send back by the remote read operation. pub fn override_content_disposition(mut self, content_disposition: &str) -> Self { self.0 = self @@ -395,12 +389,6 @@ impl FunctionReader { pub fn call(self) -> Result { self.0.call() } - - /// Set the buffer capability to enable `BufferReader`. - pub fn buffer(mut self, cap: usize) -> Self { - self.0 = self.0.map_args(|args| args.with_buffer(cap)); - self - } } /// Function that generated by [`BlockingOperator::stat_with`]. diff --git a/core/src/types/operator/operator_futures.rs b/core/src/types/operator/operator_futures.rs index 2822db84ccac..c4ddd86e74b9 100644 --- a/core/src/types/operator/operator_futures.rs +++ b/core/src/types/operator/operator_futures.rs @@ -149,11 +149,6 @@ impl FuturePresignStat { pub type FuturePresignRead = OperatorFuture<(OpRead, Duration), F>; impl FuturePresignRead { - /// Create a new OpRead with range. - pub fn range(self, v: BytesRange) -> Self { - self.map(|(args, dur)| (args.with_range(v), dur)) - } - /// Sets the content-disposition header that should be send back by the remote read operation. pub fn override_content_disposition(self, v: &str) -> Self { self.map(|(args, dur)| (args.with_override_content_disposition(v), dur)) @@ -205,44 +200,40 @@ impl FuturePresignWrite { /// Future that generated by [`Operator::read_with`] or [`Operator::reader_with`]. /// /// Users can add more options by public functions provided by this struct. -pub type FutureRead = OperatorFuture; +pub type FutureRead = OperatorFuture<(OpRead, BytesRange), F>; impl FutureRead { /// Set the range header for this operation. pub fn range(self, range: impl RangeBounds) -> Self { - self.map(|args| args.with_range(range.into())) - } - - /// Set the buffer capability to enable buffer for reader. - pub fn buffer(self, v: usize) -> Self { - self.map(|args| args.with_buffer(v)) + self.map(|(args, _)| (args, range.into())) } /// Set the If-Match for this operation. pub fn if_match(self, v: &str) -> Self { - self.map(|args| args.with_if_match(v)) + self.map(|(args, range)| (args.with_if_match(v), range)) } /// Set the If-None-Match for this operation. pub fn if_none_match(self, v: &str) -> Self { - self.map(|args| args.with_if_none_match(v)) - } - - /// Sets the content-disposition header that should be send back by the remote read operation. - pub fn override_content_disposition(self, v: &str) -> Self { - self.map(|args| args.with_override_content_disposition(v)) + self.map(|(args, range)| (args.with_if_none_match(v), range)) } - /// Sets the cache-control header that should be send back by the remote read operation. - pub fn override_cache_control(self, v: &str) -> Self { - self.map(|args| args.with_override_cache_control(v)) + /// Set the version for this operation. + pub fn version(self, v: &str) -> Self { + self.map(|(args, range)| (args.with_version(v), range)) } +} - /// Sets the content-type header that should be send back by the remote read operation. - pub fn override_content_type(self, v: &str) -> Self { - self.map(|args| args.with_override_content_type(v)) - } +/// Future that generated by [`Operator::read_with`] or [`Operator::reader_with`]. +/// +/// Users can add more options by public functions provided by this struct. +/// +/// # Notes +/// +/// `(OpRead, ())` is a trick to make sure `FutureReader` is different from `FutureRead` +pub type FutureReader = OperatorFuture; +impl FutureReader { /// Set the version for this operation. pub fn version(self, v: &str) -> Self { self.map(|args| args.with_version(v)) diff --git a/core/src/types/reader.rs b/core/src/types/reader.rs index 8db4e9f9f4ad..e402f915ea33 100644 --- a/core/src/types/reader.rs +++ b/core/src/types/reader.rs @@ -15,18 +15,13 @@ // specific language governing permissions and limitations // under the License. -use std::io; -use std::io::SeekFrom; -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; - -use bytes::{BufMut, Bytes, BytesMut}; -use futures::Stream; -use tokio::io::ReadBuf; - -use crate::raw::oio::BlockingRead; +use std::ops::Bound; +use std::ops::Range; +use std::ops::RangeBounds; + +use bytes::Buf; +use bytes::BufMut; + use crate::raw::*; use crate::*; @@ -40,42 +35,12 @@ use crate::*; /// /// ## Direct /// -/// [`Reader`] provides public API including [`Reader::read`], [`Reader::seek`] and -/// [`Reader::read_to_end`]. You can use those APIs directly without extra copy. -/// -/// ## Bytes Stream -/// -/// [`Reader`] can be used as `Stream>>`. -/// -/// It also implements [`Send`], [`Sync`] and [`Unpin`]. -/// -/// ## Futures AsyncRead -/// -/// [`Reader`] can be used as [`futures::AsyncRead`] and [`futures::AsyncSeek`]. -/// -/// It also implements [`Send`], [`Sync`] and [`Unpin`]. -/// -/// [`Reader`] provides [`Reader::into_futures_read`] to remove extra APIs upon self. -/// -/// ## Tokio AsyncRead -/// -/// [`Reader`] can be used as [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`]. -/// -/// It also implements [`Send`], [`Sync`] and [`Unpin`]. -/// -/// [`Reader`] provides [`Reader::into_tokio_read`] to remove extra APIs upon self. +/// [`Reader`] provides public API including [`Reader::read`], [`Reader:read_range`], and [`Reader::read_to_end`]. You can use those APIs directly without extra copy. pub struct Reader { - state: State, + inner: oio::Reader, } impl Reader { - /// Create a new reader from an `oio::Reader`. - pub(crate) fn new(r: oio::Reader) -> Self { - Reader { - state: State::Idle(Some(r)), - } - } - /// Create a new reader. /// /// Create will use internal information to decide the most suitable @@ -86,490 +51,348 @@ impl Reader { pub(crate) async fn create(acc: FusedAccessor, path: &str, op: OpRead) -> Result { let (_, r) = acc.read(path, op).await?; - Ok(Reader { - state: State::Idle(Some(r)), - }) - } - - /// Convert [`Reader`] into an [`futures::AsyncRead`] and [`futures::AsyncSeek`] - /// - /// `Reader` itself implements [`futures::AsyncRead`], this function is used to - /// make sure that `Reader` is used as an `AsyncRead` only. - /// - /// The returning type also implements `Send`, `Sync` and `Unpin`, so users can use it - /// as `Box` and calling `poll_read_unpin` on it. - #[inline] - #[cfg(not(target_arch = "wasm32"))] - pub fn into_futures_read( - self, - ) -> impl futures::AsyncRead + futures::AsyncSeek + Send + Sync + Unpin { - self + Ok(Reader { inner: r }) } - /// Convert [`Reader`] into an [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`] - /// - /// `Reader` itself implements [`tokio::io::AsyncRead`], this function is used to - /// make sure that `Reader` is used as an [`tokio::io::AsyncRead`] only. + /// Read from underlying storage and write data into the specified buffer, starting at + /// the given offset and up to the limit. /// - /// The returning type also implements `Send`, `Sync` and `Unpin`, so users can use it - /// as `Box` and calling `poll_read_unpin` on it. + /// A return value of `n` signifies that `n` bytes of data have been read into `buf`. + /// If `n < limit`, it indicates that the reader has reached EOF (End of File). #[inline] - #[cfg(not(target_arch = "wasm32"))] - pub fn into_tokio_read( - self, - ) -> impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Send + Sync + Unpin { - self + pub async fn read(&self, buf: &mut impl BufMut, offset: u64, limit: usize) -> Result { + let bs = self.inner.read_at_dyn(offset, limit).await?; + let n = bs.remaining(); + buf.put(bs); + Ok(n) } - /// Seek to the position of `pos` of reader. - #[inline] - pub async fn seek(&mut self, pos: SeekFrom) -> Result { - let State::Idle(Some(r)) = &mut self.state else { - return Err(Error::new(ErrorKind::Unexpected, "reader must be valid")); + /// Read given range bytes of data from reader. + pub async fn read_range( + &self, + buf: &mut impl BufMut, + range: impl RangeBounds, + ) -> Result { + let start = match range.start_bound().cloned() { + Bound::Included(start) => start, + Bound::Excluded(start) => start + 1, + Bound::Unbounded => 0, }; - r.seek_dyn(pos).await - } - /// Read at most `size` bytes of data from reader. - #[inline] - pub async fn read(&mut self, limit: usize) -> Result { - let State::Idle(Some(r)) = &mut self.state else { - return Err(Error::new(ErrorKind::Unexpected, "reader must be valid")); + let end = match range.end_bound().cloned() { + Bound::Included(end) => Some(end + 1), + Bound::Excluded(end) => Some(end), + Bound::Unbounded => None, }; - r.read_dyn(limit).await - } - /// Read exact `size` bytes of data from reader. - pub async fn read_exact(&mut self, size: usize) -> Result { - let State::Idle(Some(r)) = &mut self.state else { - return Err(Error::new(ErrorKind::Unexpected, "reader must be valid")); - }; - - // Lucky path. - let bs1 = r.read_dyn(size).await?; - debug_assert!( - bs1.len() <= size, - "read should not return more bytes than expected" - ); - if bs1.len() == size { - return Ok(bs1); - } - if bs1.is_empty() { - return Err( - Error::new(ErrorKind::ContentIncomplete, "reader got too little data") - .with_context("expect", size.to_string()), - ); + // If range is empty, return Ok(0) directly. + if let Some(end) = end { + if end <= start { + return Ok(0); + } } - let mut bs = BytesMut::with_capacity(size); - bs.put_slice(&bs1); - - let mut remaining = size - bs.len(); + let mut offset = start; + let mut size = end.map(|end| end - start); + let mut read = 0; loop { - let tmp = r.read_dyn(remaining).await?; - if tmp.is_empty() { - return Err( - Error::new(ErrorKind::ContentIncomplete, "reader got too little data") - .with_context("expect", size.to_string()) - .with_context("actual", bs.len().to_string()), - ); + // TODO: use service preferred io size instead. + let limit = size.unwrap_or(4 * 1024 * 1024) as usize; + let bs = self.inner.read_at_dyn(offset, limit).await?; + let n = bs.remaining(); + read += n; + buf.put(bs); + if n < limit { + return Ok(read); } - bs.put_slice(&tmp); - debug_assert!( - tmp.len() <= remaining, - "read should not return more bytes than expected" - ); - - remaining -= tmp.len(); - if remaining == 0 { - break; + + offset += n as u64; + size = size.map(|v| v - n as u64); + if size == Some(0) { + return Ok(read); } } - - Ok(bs.freeze()) } - /// Reads all bytes until EOF in this source, placing them into buf. - pub async fn read_to_end(&mut self, buf: &mut Vec) -> Result { - let start_len = buf.len(); - - loop { - if buf.len() == buf.capacity() { - buf.reserve(32); // buf is full, need more space - } - - let spare = buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(spare); + /// Read all data from reader. + /// + /// This API is exactly the same with `Reader::read_range(buf, ..)`. + #[inline] + pub async fn read_to_end(&self, buf: &mut impl BufMut) -> Result { + self.read_range(buf, ..).await + } - // SAFETY: These bytes were initialized but not filled in the previous loop - unsafe { - read_buf.assume_init(read_buf.capacity()); - } + /// Convert reader into [`FuturesIoAsyncReader`] which implements [`futures::AsyncRead`], + /// [`futures::AsyncSeek`] and [`futures::AsyncBufRead`]. + #[inline] + pub fn into_futures_io_async_read(self, range: Range) -> FuturesIoAsyncReader { + FuturesIoAsyncReader::new(self.inner, range) + } - match self.read(read_buf.initialize_unfilled().len()).await { - Ok(bs) if bs.is_empty() => { - return Ok(buf.len() - start_len); - } - Ok(bs) => { - read_buf.initialize_unfilled()[..bs.len()].copy_from_slice(&bs); - // SAFETY: Read API makes sure that returning `n` is correct. - unsafe { - buf.set_len(buf.len() + bs.len()); - } - } - Err(e) => return Err(e), - } - } + /// Convert reader into [`FuturesBytesStream`] which implements [`futures::Stream`], + /// [`futures::AsyncSeek`] and [`futures::AsyncBufRead`]. + #[inline] + pub fn into_futures_bytes_stream(self, range: Range) -> FuturesBytesStream { + FuturesBytesStream::new(self.inner, range) } } -enum State { - Idle(Option), - Reading(BoxedStaticFuture<(oio::Reader, Result)>), - Seeking(BoxedStaticFuture<(oio::Reader, Result)>), -} +pub mod into_futures_async_read { + use std::io; + use std::io::SeekFrom; + use std::ops::Range; + use std::pin::Pin; + use std::task::ready; + use std::task::Context; + use std::task::Poll; + + use bytes::Buf; + use futures::AsyncBufRead; + use futures::AsyncRead; + use futures::AsyncSeek; + + use crate::raw::*; + use crate::*; + + /// FuturesAsyncReader is the adapter of [`AsyncRead`], [`AsyncBufRead`] and [`AsyncSeek`] + /// for [`Reader`]. + /// + /// Users can use this adapter in cases where they need to use [`AsyncRead`] related trait. + /// + /// FuturesAsyncReader also implements [`Unpin`], [`Send`] and [`Sync`] + pub struct FuturesIoAsyncReader { + state: State, + offset: u64, + size: u64, + cap: usize, + + cur: u64, + buf: oio::Buffer, + } -/// # Safety -/// -/// Reader will only be used with `&mut self`. -unsafe impl Sync for State {} - -impl futures::AsyncRead for Reader { - fn poll_read( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut [u8], - ) -> Poll> { - use oio::Read; - - match &mut self.state { - State::Idle(r) => { - let mut r = r.take().expect("reader must be valid"); - let size = buf.len(); - let fut = async move { - let res = r.read(size).await; - (r, res) - }; - self.state = State::Reading(Box::pin(fut)); - self.poll_read(cx, buf) - } - State::Reading(fut) => { - let (r, res) = ready!(fut.as_mut().poll(cx)); - self.state = State::Idle(Some(r)); - let bs = res.map_err(format_std_io_error)?; - let n = bs.len(); - buf[..n].copy_from_slice(&bs); - Poll::Ready(Ok(n)) - } - State::Seeking(_) => Poll::Ready(Err(io::Error::new( - io::ErrorKind::Interrupted, - "another io operation is in progress", - ))), - } + enum State { + Idle(Option), + Fill(BoxedStaticFuture<(oio::Reader, Result)>), } -} -impl futures::AsyncSeek for Reader { - fn poll_seek( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - pos: io::SeekFrom, - ) -> Poll> { - use oio::Read; - - match &mut self.state { - State::Idle(r) => { - let mut r = r.take().expect("reader must be valid"); - let fut = async move { - let res = r.seek(pos).await; - (r, res) - }; - self.state = State::Seeking(Box::pin(fut)); - self.poll_seek(cx, pos) - } - State::Seeking(fut) => { - let (r, res) = ready!(fut.as_mut().poll(cx)); - self.state = State::Idle(Some(r)); - Poll::Ready(res.map_err(format_std_io_error)) + /// # Safety + /// + /// FuturesReader only exposes `&mut self` to the outside world, so it's safe to be `Sync`. + unsafe impl Sync for State {} + + impl FuturesIoAsyncReader { + /// NOTE: don't allow users to create FuturesAsyncReader directly. + #[inline] + pub(super) fn new(r: oio::Reader, range: Range) -> Self { + FuturesIoAsyncReader { + state: State::Idle(Some(r)), + offset: range.start, + size: range.end - range.start, + // TODO: should use services preferred io size. + cap: 4 * 1024 * 1024, + + cur: 0, + buf: oio::Buffer::new(), } - State::Reading(_) => Poll::Ready(Err(io::Error::new( - io::ErrorKind::Interrupted, - "another io operation is in progress", - ))), } - } -} -impl tokio::io::AsyncRead for Reader { - fn poll_read( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut tokio::io::ReadBuf<'_>, - ) -> Poll> { - use oio::Read; + /// Set the capacity of this reader to control the IO size. + pub fn with_capacity(mut self, cap: usize) -> Self { + self.cap = cap; + self + } + } - loop { - match &mut self.state { - State::Idle(r) => { - // Safety: We make sure that we will set filled correctly. - unsafe { buf.assume_init(buf.remaining()) } - let size = buf.initialize_unfilled().len(); - - let mut r = r.take().expect("reader must be valid"); - let fut = async move { - let res = r.read(size).await; - (r, res) - }; - self.state = State::Reading(Box::pin(fut)); + impl AsyncBufRead for FuturesIoAsyncReader { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.get_mut(); + loop { + if this.buf.has_remaining() { + return Poll::Ready(Ok(this.buf.chunk())); } - State::Reading(fut) => { - let (r, res) = ready!(fut.as_mut().poll(cx)); - self.state = State::Idle(Some(r)); - let bs = res.map_err(format_std_io_error)?; - let n = bs.len(); - buf.initialize_unfilled()[..n].copy_from_slice(&bs); - buf.advance(n); - return Poll::Ready(Ok(())); - } - State::Seeking(_) => { - return Poll::Ready(Err(io::Error::new( - io::ErrorKind::Interrupted, - "another io operation is in progress", - ))) + + match &mut this.state { + State::Idle(r) => { + // Make sure cur didn't exceed size. + if this.cur >= this.size { + return Poll::Ready(Ok(&[])); + } + + let r = r.take().expect("reader must be present"); + let next_offset = this.offset + this.cur; + let next_size = (this.size - this.cur).min(this.cap as u64) as usize; + let fut = async move { + let res = r.read_at_dyn(next_offset, next_size).await; + (r, res) + }; + this.state = State::Fill(Box::pin(fut)); + } + State::Fill(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + this.state = State::Idle(Some(r)); + this.buf = res?; + } } } } - } -} -impl tokio::io::AsyncSeek for Reader { - fn start_seek(mut self: Pin<&mut Self>, pos: io::SeekFrom) -> io::Result<()> { - use oio::Read; - - match &mut self.state { - State::Idle(r) => { - let mut r = r.take().expect("reader must be valid"); - let fut = async move { - let res = r.seek(pos).await; - (r, res) - }; - self.state = State::Seeking(Box::pin(fut)); - Ok(()) - } - State::Seeking(_) | State::Reading(_) => Err(io::Error::new( - io::ErrorKind::Interrupted, - "another io operation is in progress", - )), + fn consume(mut self: Pin<&mut Self>, amt: usize) { + self.buf.advance(amt); + self.cur += amt as u64; } } - fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match &mut self.state { - State::Idle(_) => { - // AsyncSeek recommends calling poll_complete before start_seek. - // We don't have to guarantee that the value returned by - // poll_complete called without start_seek is correct, - // so we'll return 0. - Poll::Ready(Ok(0)) - } - State::Seeking(fut) => { - let (r, res) = ready!(fut.as_mut().poll(cx)); - self.state = State::Idle(Some(r)); - Poll::Ready(res.map_err(format_std_io_error)) - } - State::Reading(_) => Poll::Ready(Err(io::Error::new( - io::ErrorKind::Interrupted, - "another io operation is in progress", - ))), + impl AsyncRead for FuturesIoAsyncReader { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut [u8], + ) -> Poll> { + let bs = ready!(self.as_mut().poll_fill_buf(cx))?; + let n = bs.len().min(buf.len()); + buf[..n].copy_from_slice(&bs[..n]); + self.as_mut().consume(n); + Poll::Ready(Ok(n)) } } -} -impl Stream for Reader { - type Item = io::Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - use oio::Read; - - match &mut self.state { - State::Idle(r) => { - let mut r = r.take().expect("reader must be valid"); - let fut = async move { - // TODO: should allow user to tune this value. - let res = r.read(4 * 1024 * 1024).await; - (r, res) - }; - self.state = State::Reading(Box::pin(fut)); - self.poll_next(cx) + impl AsyncSeek for FuturesIoAsyncReader { + fn poll_seek( + mut self: Pin<&mut Self>, + _: &mut Context<'_>, + pos: SeekFrom, + ) -> Poll> { + let new_pos = match pos { + SeekFrom::Start(pos) => pos as i64, + SeekFrom::End(pos) => self.size as i64 + pos, + SeekFrom::Current(pos) => self.cur as i64 + pos, + }; + + if new_pos < 0 { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidInput, + "invalid seek to a negative position", + ))); } - State::Reading(fut) => { - let (r, res) = ready!(fut.as_mut().poll(cx)); - self.state = State::Idle(Some(r)); - let bs = res.map_err(format_std_io_error)?; - if bs.is_empty() { - Poll::Ready(None) - } else { - Poll::Ready(Some(Ok(bs))) - } + + let new_pos = new_pos as u64; + + if (self.cur..self.cur + self.buf.remaining() as u64).contains(&new_pos) { + let cnt = new_pos - self.cur; + self.buf.advance(cnt as _); + } else { + self.buf = oio::Buffer::new() } - State::Seeking(_) => Poll::Ready(Some(Err(io::Error::new( - io::ErrorKind::Interrupted, - "another io operation is in progress", - )))), + + self.cur = new_pos; + Poll::Ready(Ok(self.cur)) } } } -/// BlockingReader is designed to read data from given path in an blocking -/// manner. -pub struct BlockingReader { - pub(crate) inner: oio::BlockingReader, -} +pub mod into_futures_stream { + use std::io; + use std::ops::Range; + use std::pin::Pin; + use std::task::ready; + use std::task::Context; + use std::task::Poll; -impl BlockingReader { - /// Create a new blocking reader. - /// - /// Create will use internal information to decide the most suitable - /// implementation for users. - /// - /// We don't want to expose those details to users so keep this function - /// in crate only. - pub(crate) fn create(acc: FusedAccessor, path: &str, op: OpRead) -> Result { - let (_, r) = acc.blocking_read(path, op)?; - - Ok(BlockingReader { inner: r }) - } + use bytes::Buf; + use bytes::Bytes; + use futures::Stream; - /// Create a new reader from an `oio::BlockingReader`. - pub(crate) fn new(r: oio::BlockingReader) -> Self { - BlockingReader { inner: r } - } + use crate::raw::*; + use crate::*; - /// Seek to the position of `pos` of reader. - #[inline] - pub fn seek(&mut self, pos: SeekFrom) -> Result { - self.inner.seek(pos) + /// FuturesStream is the adapter of [`Stream`] for [`Reader`]. + /// + /// Users can use this adapter in cases where they need to use [`Stream`] trait. + /// + /// FuturesStream also implements [`Unpin`], [`Send`] and [`Sync`]. + pub struct FuturesBytesStream { + state: State, + offset: u64, + size: u64, + cap: usize, + + cur: u64, } - /// Read at most `size` bytes of data from reader. - #[inline] - pub fn read(&mut self, limit: usize) -> Result { - self.inner.read(limit) + enum State { + Idle(Option), + Next(BoxedStaticFuture<(oio::Reader, Result)>), } - /// Read exact `size` bytes of data from reader. - pub fn read_exact(&mut self, size: usize) -> Result { - // Lucky path. - let bs1 = self.inner.read(size)?; - debug_assert!( - bs1.len() <= size, - "read should not return more bytes than expected" - ); - if bs1.len() == size { - return Ok(bs1); - } - if bs1.is_empty() { - return Err( - Error::new(ErrorKind::ContentIncomplete, "reader got too little data") - .with_context("expect", size.to_string()), - ); - } - - let mut bs = BytesMut::with_capacity(size); - bs.put_slice(&bs1); - - let mut remaining = size - bs.len(); - - loop { - let tmp = self.inner.read(remaining)?; - if tmp.is_empty() { - return Err( - Error::new(ErrorKind::ContentIncomplete, "reader got too little data") - .with_context("expect", size.to_string()) - .with_context("actual", bs.len().to_string()), - ); - } - bs.put_slice(&tmp); - debug_assert!( - tmp.len() <= remaining, - "read should not return more bytes than expected" - ); - - remaining -= tmp.len(); - if remaining == 0 { - break; + /// # Safety + /// + /// FuturesReader only exposes `&mut self` to the outside world, so it's safe to be `Sync`. + unsafe impl Sync for State {} + + impl FuturesBytesStream { + /// NOTE: don't allow users to create FuturesStream directly. + #[inline] + pub(crate) fn new(r: oio::Reader, range: Range) -> Self { + FuturesBytesStream { + state: State::Idle(Some(r)), + offset: range.start, + size: range.end - range.start, + // TODO: should use services preferred io size. + cap: 4 * 1024 * 1024, + + cur: 0, } } - Ok(bs.freeze()) + /// Set the capacity of this reader to control the IO size. + pub fn with_capacity(mut self, cap: usize) -> Self { + self.cap = cap; + self + } } - /// Reads all bytes until EOF in this source, placing them into buf. - pub fn read_to_end(&mut self, buf: &mut Vec) -> Result { - let start_len = buf.len(); - - loop { - if buf.len() == buf.capacity() { - buf.reserve(32); // buf is full, need more space - } - - let spare = buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(spare); - - // SAFETY: These bytes were initialized but not filled in the previous loop - unsafe { - read_buf.assume_init(read_buf.capacity()); - } - match self.read(read_buf.initialized_mut().len()) { - Ok(bs) if bs.is_empty() => return Ok(buf.len() - start_len), - Ok(bs) => { - read_buf.initialized_mut()[..bs.len()].copy_from_slice(&bs); - - // SAFETY: Read API makes sure that returning `n` is correct. - unsafe { - buf.set_len(buf.len() + bs.len()); + impl Stream for FuturesBytesStream { + type Item = io::Result; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.get_mut(); + + loop { + match &mut this.state { + State::Idle(r) => { + // Make sure cur didn't exceed size. + if this.cur >= this.size { + return Poll::Ready(None); + } + + let r = r.take().expect("reader must be present"); + let next_offset = this.offset + this.cur; + let next_size = (this.size - this.cur).min(this.cap as u64) as usize; + let fut = async move { + let res = r.read_at_dyn(next_offset, next_size).await; + (r, res) + }; + this.state = State::Next(Box::pin(fut)); + } + State::Next(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + this.state = State::Idle(Some(r)); + return match res { + Ok(buf) if !buf.has_remaining() => Poll::Ready(None), + Ok(mut buf) => { + this.cur += buf.remaining() as u64; + Poll::Ready(Some(Ok(buf.copy_to_bytes(buf.remaining())))) + } + Err(err) => Poll::Ready(Some(Err(format_std_io_error(err)))), + }; } } - Err(e) => return Err(e), } } } } -impl io::Read for BlockingReader { - #[inline] - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let bs = self.inner.read(buf.len()).map_err(format_std_io_error)?; - buf[..bs.len()].copy_from_slice(&bs); - Ok(bs.len()) - } -} - -impl io::Seek for BlockingReader { - #[inline] - fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - self.inner.seek(pos).map_err(format_std_io_error) - } -} - -impl Iterator for BlockingReader { - type Item = io::Result; - - #[inline] - fn next(&mut self) -> Option { - match self - .inner - .read(4 * 1024 * 1024) - .map_err(format_std_io_error) - { - Ok(bs) if bs.is_empty() => None, - Ok(bs) => Some(Ok(bs)), - Err(err) => Some(Err(err)), - } - } -} - #[cfg(test)] mod tests { use rand::rngs::ThreadRng; @@ -598,7 +421,7 @@ mod tests { .await .expect("write must succeed"); - let mut reader = op.reader(path).await.unwrap(); + let reader = op.reader(path).await.unwrap(); let mut buf = Vec::new(); reader .read_to_end(&mut buf) @@ -618,17 +441,7 @@ mod tests { .await .expect("write must succeed"); - let mut reader = op.reader(path).await.unwrap(); - let mut buf = Vec::new(); - reader - .read_to_end(&mut buf) - .await - .expect("read to end must succeed"); - assert_eq!(buf, content); - - let n = reader.seek(tokio::io::SeekFrom::Start(0)).await.unwrap(); - assert_eq!(n, 0, "seek position must be 0"); - + let reader = op.reader(path).await.unwrap(); let mut buf = Vec::new(); reader .read_to_end(&mut buf) diff --git a/core/src/types/writer.rs b/core/src/types/writer.rs index 9a235c0a0de6..64c32667c025 100644 --- a/core/src/types/writer.rs +++ b/core/src/types/writer.rs @@ -126,12 +126,11 @@ impl Writer { /// # Examples /// /// ```no_run - /// use opendal::Result; - /// /// use bytes::Bytes; /// use futures::stream; /// use futures::StreamExt; /// use opendal::Operator; + /// use opendal::Result; /// /// async fn sink_example(op: Operator) -> Result<()> { /// let mut w = op.writer_with("path/to/file").await?; @@ -176,13 +175,12 @@ impl Writer { /// # Examples /// /// ```no_run - /// use opendal::Result; - /// /// use bytes::Bytes; /// use futures::io::Cursor; /// use futures::stream; /// use futures::StreamExt; /// use opendal::Operator; + /// use opendal::Result; /// /// async fn copy_example(op: Operator) -> Result<()> { /// let mut w = op.writer_with("path/to/file").await?; diff --git a/core/tests/behavior/async_fuzz.rs b/core/tests/behavior/async_fuzz.rs index 6113f0ee72d0..2dc3647914b5 100644 --- a/core/tests/behavior/async_fuzz.rs +++ b/core/tests/behavior/async_fuzz.rs @@ -26,12 +26,7 @@ use opendal::raw::BytesRange; use crate::*; pub fn tests(op: &Operator, tests: &mut Vec) { - tests.extend(async_trials!( - op, - test_fuzz_issue_2717, - test_fuzz_pr_3395_case_1, - test_fuzz_pr_3395_case_2 - )) + tests.extend(async_trials!(op)) } async fn test_fuzz_read( @@ -59,89 +54,3 @@ async fn test_fuzz_read( checker.check(r, actions).await; Ok(()) } - -/// This fuzz test is to reproduce . -/// -/// The simplified cases could be seen as: -/// -/// ``` -/// FuzzInput { -/// actions: [ -/// Seek( -/// End( -/// -2, -/// ), -/// ), -/// Read { -/// size: 0, -/// }, -/// ], -/// data: [ -/// 0, -/// 0, -/// ], -/// range: ( -/// 1, -/// 2, -/// ) -/// ] -/// } -/// ``` -/// -/// Which means: -/// -/// - A file with 2 bytes of content. -/// - Open as an range reader of `1..2`. -/// - Seek to `End(-2)` first -/// -/// The expected result is seek returns InvalidInput error because the seek position -/// is invalid for given range `1..2`. However, the actual behavior is we seek to `0` -/// and results in a panic. -pub async fn test_fuzz_issue_2717(op: Operator) -> Result<()> { - let actions = [ReadAction::Seek(SeekFrom::End(-2))]; - - test_fuzz_read(op, 2, .., &actions).await -} - -/// This fuzz test is to reproduce bug inside . -/// -/// The simplified cases could be seen as: -/// -/// ``` -/// FuzzInput { -/// path: "06ae5d93-c0e9-43f2-ae5a-225cfaaa40a0", -/// size: 1, -/// range: BytesRange(Some(0), None), -/// actions: [Seek(Current(1)), Next, Seek(End(-1))], -/// } -/// ``` -pub async fn test_fuzz_pr_3395_case_1(op: Operator) -> Result<()> { - let actions = [ - ReadAction::Seek(SeekFrom::Current(1)), - ReadAction::Read(1024), - ReadAction::Seek(SeekFrom::End(-1)), - ]; - test_fuzz_read(op, 1, 0.., &actions).await -} - -/// This fuzz test is to reproduce bug inside . -/// -/// The simplified cases could be seen as: -/// -/// ``` -/// FuzzInput { -/// path: "e6056989-7c7c-4075-b975-5ae380884333", -/// size: 1, -/// range: BytesRange(Some(0), None), -/// actions: [Next, Seek(Current(1)), Next, Seek(End(0))], -/// } -/// ``` -pub async fn test_fuzz_pr_3395_case_2(op: Operator) -> Result<()> { - let actions = [ - ReadAction::Read(1024), - ReadAction::Seek(SeekFrom::Current(1)), - ReadAction::Read(1024), - ReadAction::Seek(SeekFrom::End(0)), - ]; - test_fuzz_read(op, 1, 0.., &actions).await -} diff --git a/core/tests/behavior/async_read.rs b/core/tests/behavior/async_read.rs index adbbeafa5897..0ba7db507c42 100644 --- a/core/tests/behavior/async_read.rs +++ b/core/tests/behavior/async_read.rs @@ -35,12 +35,6 @@ pub fn tests(op: &Operator, tests: &mut Vec) { test_read_full, test_read_range, test_read_large_range, - test_reader_range, - test_reader_range_with_buffer, - test_reader_from, - test_reader_from_with_buffer, - test_reader_tail, - test_reader_tail_with_buffer, test_read_not_exist, test_read_with_if_match, test_read_with_if_none_match, @@ -48,8 +42,7 @@ pub fn tests(op: &Operator, tests: &mut Vec) { test_read_with_special_chars, test_read_with_override_cache_control, test_read_with_override_content_disposition, - test_read_with_override_content_type, - test_read_with_invalid_seek + test_read_with_override_content_type )) } @@ -59,9 +52,6 @@ pub fn tests(op: &Operator, tests: &mut Vec) { test_read_only_read_full, test_read_only_read_full_with_special_chars, test_read_only_read_with_range, - test_read_only_reader_with_range, - test_read_only_reader_from, - test_read_only_reader_tail, test_read_only_read_not_exist, test_read_only_read_with_dir_path, test_read_only_read_with_if_match, @@ -91,10 +81,6 @@ pub async fn test_read_full(op: Operator) -> anyhow::Result<()> { /// Read range content should match. pub async fn test_read_range(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); let (offset, length) = gen_offset_length(size); @@ -118,10 +104,6 @@ pub async fn test_read_range(op: Operator) -> anyhow::Result<()> { /// Read large range content should match. pub async fn test_read_large_range(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); let (offset, _) = gen_offset_length(size); @@ -144,198 +126,6 @@ pub async fn test_read_large_range(op: Operator) -> anyhow::Result<()> { Ok(()) } -/// Read range content should match. -pub async fn test_reader_range(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); - let (offset, length) = gen_offset_length(size); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = op.reader_with(&path).range(offset..offset + length).await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!( - "{:x}", - Sha256::digest(&content[offset as usize..(offset + length) as usize]) - ), - "read content" - ); - - Ok(()) -} - -/// Read range content should match. -pub async fn test_reader_range_with_buffer(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); - let (offset, length) = gen_offset_length(size); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = op - .reader_with(&path) - .range(offset..offset + length) - .buffer(4096) - .await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!( - "{:x}", - Sha256::digest(&content[offset as usize..(offset + length) as usize]) - ), - "read content" - ); - - Ok(()) -} - -/// Read range from should match. -pub async fn test_reader_from(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); - let (offset, _) = gen_offset_length(size); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = op.reader_with(&path).range(offset..).await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), size - offset as usize, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&content[offset as usize..])), - "read content" - ); - - Ok(()) -} - -/// Read range from should match. -pub async fn test_reader_from_with_buffer(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); - let (offset, _) = gen_offset_length(size); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = op.reader_with(&path).range(offset..).buffer(4096).await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), size - offset as usize, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&content[offset as usize..])), - "read content" - ); - - Ok(()) -} - -/// Read range tail should match. -pub async fn test_reader_tail(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); - let (_, length) = gen_offset_length(size); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = match op.reader_with(&path).range(..length).await { - Ok(r) => r, - // Not all services support range with tail range, let's tolerate this. - Err(err) if err.kind() == ErrorKind::Unsupported => { - warn!("service doesn't support range with tail"); - return Ok(()); - } - Err(err) => return Err(err.into()), - }; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), length as usize, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&content[size - length as usize..])), - "read content" - ); - - Ok(()) -} - -/// Read range tail should match. -pub async fn test_reader_tail_with_buffer(op: Operator) -> anyhow::Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - - let (path, content, size) = TEST_FIXTURE.new_file(op.clone()); - let (_, length) = gen_offset_length(size); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = match op.reader_with(&path).range(..length).buffer(4096).await { - Ok(r) => r, - // Not all services support range with tail range, let's tolerate this. - Err(err) if err.kind() == ErrorKind::Unsupported => { - warn!("service doesn't support range with tail"); - return Ok(()); - } - Err(err) => return Err(err.into()), - }; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), length as usize, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!("{:x}", Sha256::digest(&content[size - length as usize..])), - "read content" - ); - - Ok(()) -} - /// Read not exist file should return NotFound pub async fn test_read_not_exist(op: Operator) -> anyhow::Result<()> { let path = uuid::Uuid::new_v4().to_string(); @@ -596,28 +386,6 @@ pub async fn test_read_with_override_content_type(op: Operator) -> anyhow::Resul Ok(()) } -/// seeking a negative position should return a InvalidInput error -pub async fn test_read_with_invalid_seek(op: Operator) -> anyhow::Result<()> { - let (path, content, _) = TEST_FIXTURE.new_file(op.clone()); - - op.write(&path, content.clone()) - .await - .expect("write must succeed"); - - let mut r = op.reader(&path).await?; - let res = r.seek(std::io::SeekFrom::Current(-1024)).await; - - assert!(res.is_err()); - - assert_eq!( - res.unwrap_err().kind(), - ErrorKind::InvalidInput, - "seeking a negative position should return a InvalidInput error" - ); - - Ok(()) -} - /// Read full content should match. pub async fn test_read_only_read_full(op: Operator) -> anyhow::Result<()> { let bs = op.read("normal_file.txt").await?; @@ -657,57 +425,6 @@ pub async fn test_read_only_read_with_range(op: Operator) -> anyhow::Result<()> Ok(()) } -/// Read range should match. -pub async fn test_read_only_reader_with_range(op: Operator) -> anyhow::Result<()> { - let mut r = op.reader_with("normal_file.txt").range(1024..2048).await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), 1024, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - "330c6d57fdc1119d6021b37714ca5ad0ede12edd484f66be799a5cff59667034", - "read content" - ); - - Ok(()) -} - -/// Read from should match. -pub async fn test_read_only_reader_from(op: Operator) -> anyhow::Result<()> { - let mut r = op.reader_with("normal_file.txt").range(29458..).await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), 1024, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - "cc9312c869238ea9410b6716e0fc3f48056f2bfb2fe06ccf5f96f2c3bf39e71b", - "read content" - ); - - Ok(()) -} - -/// Read tail should match. -pub async fn test_read_only_reader_tail(op: Operator) -> anyhow::Result<()> { - let mut r = op.reader_with("normal_file.txt").range(..1024).await?; - - let mut bs = Vec::new(); - r.read_to_end(&mut bs).await?; - - assert_eq!(bs.len(), 1024, "read size"); - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - "cc9312c869238ea9410b6716e0fc3f48056f2bfb2fe06ccf5f96f2c3bf39e71b", - "read content" - ); - - Ok(()) -} - /// Read not exist file should return NotFound pub async fn test_read_only_read_not_exist(op: Operator) -> anyhow::Result<()> { let path = uuid::Uuid::new_v4().to_string(); diff --git a/core/tests/behavior/async_write.rs b/core/tests/behavior/async_write.rs index 9b3124c456dc..79077b73a4bf 100644 --- a/core/tests/behavior/async_write.rs +++ b/core/tests/behavior/async_write.rs @@ -31,7 +31,7 @@ use crate::*; pub fn tests(op: &Operator, tests: &mut Vec) { let cap = op.info().full_capability(); - if cap.write && cap.stat { + if cap.read && cap.write && cap.stat { tests.extend(async_trials!( op, test_write_only, @@ -54,7 +54,7 @@ pub fn tests(op: &Operator, tests: &mut Vec) { )) } - if cap.write && cap.write_can_append && cap.stat { + if cap.read && cap.write && cap.write_can_append && cap.stat { tests.extend(async_trials!( op, test_write_with_append, diff --git a/core/tests/behavior/blocking_read.rs b/core/tests/behavior/blocking_read.rs index 4b5de2d7e361..006d3349d9d0 100644 --- a/core/tests/behavior/blocking_read.rs +++ b/core/tests/behavior/blocking_read.rs @@ -71,10 +71,6 @@ pub fn test_blocking_read_full(op: BlockingOperator) -> Result<()> { /// Read range content should match. pub fn test_blocking_read_range(op: BlockingOperator) -> Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - let path = uuid::Uuid::new_v4().to_string(); debug!("Generate a random file: {}", &path); let (content, size) = gen_bytes(op.info().full_capability()); @@ -100,10 +96,6 @@ pub fn test_blocking_read_range(op: BlockingOperator) -> Result<()> { /// Read large range content should match. pub fn test_blocking_read_large_range(op: BlockingOperator) -> Result<()> { - if !op.info().full_capability().read_with_range { - return Ok(()); - } - let path = uuid::Uuid::new_v4().to_string(); debug!("Generate a random file: {}", &path); let (content, size) = gen_bytes(op.info().full_capability()); diff --git a/core/tests/behavior/main.rs b/core/tests/behavior/main.rs index 20eb832c82b4..666d36a72227 100644 --- a/core/tests/behavior/main.rs +++ b/core/tests/behavior/main.rs @@ -24,7 +24,7 @@ pub use utils::*; mod async_copy; mod async_create_dir; mod async_delete; -mod async_fuzz; +// mod async_fuzz; mod async_list; mod async_presign; mod async_read; @@ -65,7 +65,6 @@ fn main() -> anyhow::Result<()> { async_copy::tests(&op, &mut tests); async_create_dir::tests(&op, &mut tests); async_delete::tests(&op, &mut tests); - async_fuzz::tests(&op, &mut tests); async_list::tests(&op, &mut tests); async_presign::tests(&op, &mut tests); async_read::tests(&op, &mut tests); diff --git a/integrations/object_store/src/lib.rs b/integrations/object_store/src/lib.rs index 11f4e4f4f33e..dd523cc9192a 100644 --- a/integrations/object_store/src/lib.rs +++ b/integrations/object_store/src/lib.rs @@ -15,16 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::ops::Range; -use std::pin::Pin; -use std::task::Context; -use std::task::Poll; - use async_trait::async_trait; use bytes::Bytes; use futures::stream::BoxStream; use futures::FutureExt; -use futures::Stream; use futures::StreamExt; use futures::TryStreamExt; use object_store::path::Path; @@ -42,7 +36,7 @@ use opendal::Entry; use opendal::Metadata; use opendal::Metakey; use opendal::Operator; -use opendal::Reader; +use std::ops::Range; use tokio::io::AsyncWrite; #[derive(Debug)] @@ -140,8 +134,15 @@ impl ObjectStore for OpendalStore { .await .map_err(|err| format_object_store_error(err, location.as_ref()))?; + let stream = r + .into_futures_bytes_stream(0..meta.size as u64) + .map_err(|err| object_store::Error::Generic { + store: "IoError", + source: Box::new(err), + }); + Ok(GetResult { - payload: GetResultPayload::Stream(Box::pin(OpendalReader { inner: r })), + payload: GetResultPayload::Stream(Box::pin(stream)), range: (0..meta.size), meta, }) @@ -340,24 +341,6 @@ async fn try_format_object_meta(res: Result) -> Result; - - fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let inner = Pin::new(&mut self.get_mut().inner); - inner - .poll_next(cx) - .map_err(|err| object_store::Error::Generic { - store: "IoError", - source: Box::new(err), - }) - } -} - #[cfg(test)] mod tests { use std::sync::Arc;