Skip to content

Commit

Permalink
feat(core/raw/buf): Reduce one allocation by Arc::from_iter (#4440)
Browse files Browse the repository at this point in the history
* feat(core/raw/buf): Reduce one allocation by `Arc::from_iter`

Signed-off-by: Xuanwo <[email protected]>

* Format code

Signed-off-by: Xuanwo <[email protected]>

---------

Signed-off-by: Xuanwo <[email protected]>
  • Loading branch information
Xuanwo authored Apr 9, 2024
1 parent 333029c commit b13d7a5
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 81 deletions.
88 changes: 12 additions & 76 deletions core/src/raw/oio/buf/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ impl From<Bytes> for Buffer {
}
}

/// Transform `VecDeque<Bytes>` to `Arc<[Bytes]>`.
impl From<VecDeque<Bytes>> for Buffer {
fn from(bs: VecDeque<Bytes>) -> Self {
let size = bs.iter().map(|b| b.len()).sum();
Expand All @@ -110,7 +109,6 @@ impl From<VecDeque<Bytes>> for Buffer {
}
}

/// Transform `Vec<Bytes>` to `Arc<[Bytes]>`.
impl From<Vec<Bytes>> for Buffer {
fn from(bs: Vec<Bytes>) -> Self {
let size = bs.iter().map(|b| b.len()).sum();
Expand All @@ -123,6 +121,18 @@ impl From<Vec<Bytes>> for Buffer {
}
}

impl From<Arc<[Bytes]>> for Buffer {
fn from(bs: Arc<[Bytes]>) -> Self {
let size = bs.iter().map(|b| b.len()).sum();
Self(Inner::NonContiguous {
parts: bs,
size,
idx: 0,
offset: 0,
})
}
}

impl Buf for Buffer {
#[inline]
fn remaining(&self) -> usize {
Expand Down Expand Up @@ -238,80 +248,6 @@ impl Iterator for Buffer {
}
}

/// BufferQueue is a queue of [`Buffer`].
///
/// It's works like a `Vec<Buffer>` but with more efficient `advance` operation.
#[derive(Default)]
pub struct BufferQueue(VecDeque<Buffer>);

impl BufferQueue {
/// Create a new buffer queue.
#[inline]
pub fn new() -> Self {
Self::default()
}

/// Push new [`Buffer`] into the queue.
#[inline]
pub fn push(&mut self, buf: Buffer) {
self.0.push_back(buf);
}

/// Total bytes size inside the buffer queue.
#[inline]
pub fn len(&self) -> usize {
self.0.iter().map(|b| b.len()).sum()
}

/// Is the buffer queue empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}

/// Build a Buffer from the queue.
#[inline]
pub fn to_buffer(&self) -> Buffer {
if self.0.is_empty() {
Buffer::new()
} else if self.0.len() == 1 {
self.0.clone().pop_front().unwrap()
} else {
let mut bytes = Vec::with_capacity(self.0.iter().map(|b| b.size_hint().0).sum());
for buf in self.0.clone() {
for bs in buf {
bytes.push(bs);
}
}
Buffer::from(bytes)
}
}

/// Advance the buffer queue by `cnt` bytes.
#[inline]
pub fn advance(&mut self, cnt: usize) {
assert!(cnt <= self.len(), "cannot advance past {cnt} bytes");

let mut new_cnt = cnt;
while new_cnt > 0 {
let buf = self.0.front_mut().expect("buffer must be valid");
if new_cnt < buf.remaining() {
buf.advance(new_cnt);
break;
} else {
new_cnt -= buf.remaining();
self.0.pop_front();
}
}
}

/// Clear the buffer queue.
#[inline]
pub fn clear(&mut self) {
self.0.clear()
}
}

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
Expand Down
4 changes: 3 additions & 1 deletion core/src/raw/oio/buf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

mod buffer;
pub use buffer::Buffer;
pub use buffer::BufferQueue;

mod flex_buf;
pub use flex_buf::FlexBuf;

mod queue_buf;
pub use queue_buf::QueueBuf;
109 changes: 109 additions & 0 deletions core/src/raw/oio/buf/queue_buf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::raw::oio::Buffer;
use bytes::{Buf, Bytes};
use std::collections::VecDeque;
use std::sync::Arc;

/// QueueBuf is a queue of [`Buffer`].
///
/// It's designed to allow storing multiple buffers without copying underlying bytes and consume them
/// in order.
///
/// QueueBuf mainly provides the following operations:
///
/// - `push`: Push a new buffer in the queue.
/// - `collect`: Collect all buffer in the queue as a new [`Buffer`]
/// - `advance`: Advance the queue by `cnt` bytes.
#[derive(Default)]
pub struct QueueBuf(VecDeque<Buffer>);

impl QueueBuf {
/// Create a new buffer queue.
#[inline]
pub fn new() -> Self {
Self::default()
}

/// Push new [`Buffer`] into the queue.
#[inline]
pub fn push(&mut self, buf: Buffer) {
self.0.push_back(buf);
}

/// Total bytes size inside the buffer queue.
#[inline]
pub fn len(&self) -> usize {
self.0.iter().map(|b| b.len()).sum()
}

/// Is the buffer queue empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}

/// Build a new [`Buffer`] from the queue.
///
/// If the queue is empty, it will return an empty buffer. Otherwise, it will iterate over all
/// buffers and collect them into a new buffer.
///
/// # Notes
///
/// There are allocation overheads when collecting multiple buffers into a new buffer. But
/// most of them should be acceptable since we can expect the item length of buffers are slower
/// than 4k.
#[inline]
pub fn collect(&self) -> Buffer {
if self.0.is_empty() {
Buffer::new()
} else if self.0.len() == 1 {
self.0.clone().pop_front().unwrap()
} else {
let iter = self.0.clone().into_iter().flatten();
// This operation only needs one allocation from iterator to `Arc<[Bytes]>` instead
// of iterator -> `Vec<Bytes>` -> `Arc<[Bytes]>`.
let bs: Arc<[Bytes]> = Arc::from_iter(iter);
Buffer::from(bs)
}
}

/// Advance the buffer queue by `cnt` bytes.
#[inline]
pub fn advance(&mut self, cnt: usize) {
assert!(cnt <= self.len(), "cannot advance past {cnt} bytes");

let mut new_cnt = cnt;
while new_cnt > 0 {
let buf = self.0.front_mut().expect("buffer must be valid");
if new_cnt < buf.remaining() {
buf.advance(new_cnt);
break;
} else {
new_cnt -= buf.remaining();
self.0.pop_front();
}
}
}

/// Clear the buffer queue.
#[inline]
pub fn clear(&mut self) {
self.0.clear()
}
}
8 changes: 4 additions & 4 deletions core/src/raw/oio/write/exact_buf_write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub struct ExactBufWriter<W: oio::Write> {

/// The size for buffer, we will flush the underlying storage at the size of this buffer.
buffer_size: usize,
buffer: oio::BufferQueue,
buffer: oio::QueueBuf,
}

impl<W: oio::Write> ExactBufWriter<W> {
Expand All @@ -39,15 +39,15 @@ impl<W: oio::Write> ExactBufWriter<W> {
Self {
inner,
buffer_size,
buffer: oio::BufferQueue::new(),
buffer: oio::QueueBuf::new(),
}
}
}

impl<W: oio::Write> oio::Write for ExactBufWriter<W> {
async fn write(&mut self, mut bs: oio::Buffer) -> Result<usize> {
if self.buffer.len() >= self.buffer_size {
let written = self.inner.write(self.buffer.to_buffer()).await?;
let written = self.inner.write(self.buffer.collect()).await?;
self.buffer.advance(written);
}

Expand All @@ -64,7 +64,7 @@ impl<W: oio::Write> oio::Write for ExactBufWriter<W> {
break;
}

let written = self.inner.write(self.buffer.to_buffer()).await?;
let written = self.inner.write(self.buffer.collect()).await?;
self.buffer.advance(written);
}

Expand Down

0 comments on commit b13d7a5

Please sign in to comment.