From da155eeedfb59f74b37e1c4eebfa441dcf25ddc7 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Thu, 16 Jan 2025 23:09:30 +0800 Subject: [PATCH] RFC-5556: Write Returns Metadata (#5556) --- .../docs/rfcs/5556_write_returns_metadata.md | 121 ++++++++++++++++++ core/src/docs/rfcs/mod.rs | 4 + 2 files changed, 125 insertions(+) create mode 100644 core/src/docs/rfcs/5556_write_returns_metadata.md diff --git a/core/src/docs/rfcs/5556_write_returns_metadata.md b/core/src/docs/rfcs/5556_write_returns_metadata.md new file mode 100644 index 000000000000..a381580e516a --- /dev/null +++ b/core/src/docs/rfcs/5556_write_returns_metadata.md @@ -0,0 +1,121 @@ +- Proposal Name: `write_returns_metadata` +- Start Date: 2025-01-16 +- RFC PR: [apache/opendal#5556](https://github.com/apache/opendal/pull/5556) +- Tracking Issue: [apache/opendal#5557](https://github.com/apache/opendal/issues/5557) + +# Summary + +Enhance write operations by returning metadata after successful writes. + +# Motivation + +Currently, write operations (`write`, `write_with`, `writer`, `writer_with`) only return `Result<()>` or `Result`. +Users who need metadata after writing (like `ETag` or `version_id`) must make an additional `stat()` call. This is inefficient +and can lead to race conditions if the file is modified between the write and stat operations. + +Many storage services (like S3, GCS, Azure Blob) return metadata in their write responses. We should expose this information +to users directly after write operations. + +# Guide-level explanation + +The write operations will be enhanced to return metadata: + +```rust +// Before +op.write("path/to/file", data).await?; +let meta = op.stat("path/to/file").await?; +if Some(etag) = meta.etag() { + println!("File ETag: {}", etag); +} + +// After +let meta = op.write("path/to/file", data).await?; +if Some(etag) = meta.etag() { + println!("File ETag: {}", etag); +} +``` + +For writer operations: + +```rust +// Before +let mut writer = op.writer("path/to/file").await?; +writer.write(data).await?; +writer.close().await?; +let meta = op.stat("path/to/file").await?; +if Some(etag) = meta.etag() { + println!("File ETag: {}", etag); +} + +// After +let mut writer = op.writer("path/to/file").await?; +writer.write(data).await?; +let meta = writer.close().await?; +if Some(etag) = meta.etag() { + println!("File ETag: {}", etag); +} +``` + +The behavior remains unchanged if users don't need the metadata - they can simply ignore the return value. + +# Reference-level explanation + +## Changes to `Operator` API + +The following functions will be modified to return `Result` instead of `Result<()>`: + +- `write()` +- `write_with()` + +The `writer()` and `writer_with()` return types remain unchanged as they return `Result`. + +## Changes to struct `Writer` + +The `Writer` struct will be modified to return `Result` instead of `Result<()>` for the `close()` function. + +## Changes to trait `oio::Write` and trait `oio::MultipartWrite` + +The `Write` trait will be modified to return `Result` instead of `Result<()>` for the `close()` function. + +The `MultipartWrite` trait will be modified to return `Result` instead of `Result<()>` for the `complete_part()` +and `write_once` functions. + +## Implementation Details + +For services that return metadata in their write responses: +- The metadata will be captured from the service response +- All available fields (etag, version_id, etc.) will be populated + +For services that don't return metadata in write responses: +- for `fs`: we can use `stat` to retrieve the metadata before returning. since the metadata is cached by the kernel, +this won't cause a performance issue. +- for other services: A default metadata object will be returned. + + +# Drawbacks + +- Minor breaking change for users who explicitly type the return value of write operations +- Additional complexity in the Writer implementation + +# Rationale and alternatives + +- Provides a clean, consistent API +- Maintains backward compatibility for users who ignore the return value +- Improves performance by avoiding additional stat calls when possible + +# Prior art + +Similar patterns exist in other storage SDKs: + +- `object_store` crate returns metadata in `PutResult` after calling `put_opts` +- AWS SDK returns metadata in `PutObjectOutput` +- Azure SDK returns `UploadFileResponse` after uploads + +# Unresolved questions + +- None + + +# Future possibilities + +- None \ No newline at end of file diff --git a/core/src/docs/rfcs/mod.rs b/core/src/docs/rfcs/mod.rs index 7dcb2bf8a4b0..4d4980e81782 100644 --- a/core/src/docs/rfcs/mod.rs +++ b/core/src/docs/rfcs/mod.rs @@ -256,3 +256,7 @@ pub mod rfc_5485_conditional_reader {} /// List With Deleted #[doc = include_str!("5495_list_with_deleted.md")] pub mod rfc_5495_list_with_deleted {} + +/// Write Returns Metadata +#[doc = include_str!("5556_write_returns_metadata.md")] +pub mod rfc_5556_write_returns_metadata {}