From 20e5a638732b6a583baaee1a41e0cc1c9b06e8b6 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 16 Oct 2024 14:46:41 +0200 Subject: [PATCH 1/2] Guarantee address in split_off/split_to for empty slices Signed-off-by: Alice Ryhl --- src/bytes.rs | 29 +++++++++++++--- src/bytes_mut.rs | 4 ++- tests/test_bytes.rs | 80 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 6 deletions(-) diff --git a/src/bytes.rs b/src/bytes.rs index ec95e802d..df21f970e 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -184,6 +184,22 @@ impl Bytes { } } + /// Creates a new `Bytes` with length zero and the given pointer as the address. + fn new_empty_with_ptr(ptr: *const u8) -> Self { + debug_assert!(!ptr.is_null()); + + // Detach this pointer's provenance from whichever allocation it came from, and reattach it + // to the provenance of the fake ZST [u8;0] at the same address. + let ptr = ptr as usize as *const u8; + + Bytes { + ptr, + len: 0, + data: AtomicPtr::new(ptr::null_mut()), + vtable: &STATIC_VTABLE, + } + } + /// Returns the number of bytes contained in this `Bytes`. /// /// # Examples @@ -366,7 +382,9 @@ impl Bytes { /// Splits the bytes into two at the given index. /// /// Afterwards `self` contains elements `[0, at)`, and the returned `Bytes` - /// contains elements `[at, len)`. + /// contains elements `[at, len)`. It's guaranteed that the memory does not + /// move, that is, the address of `self` does not change, and the address of + /// the returned slice is `at` bytes after that. /// /// This is an `O(1)` operation that just increases the reference count and /// sets a few indices. @@ -389,11 +407,11 @@ impl Bytes { #[must_use = "consider Bytes::truncate if you don't need the other half"] pub fn split_off(&mut self, at: usize) -> Self { if at == self.len() { - return Bytes::new(); + return Bytes::new_empty_with_ptr(self.ptr.wrapping_add(at)); } if at == 0 { - return mem::replace(self, Bytes::new()); + return mem::replace(self, Bytes::new_empty_with_ptr(self.ptr)); } assert!( @@ -438,11 +456,12 @@ impl Bytes { #[must_use = "consider Bytes::advance if you don't need the other half"] pub fn split_to(&mut self, at: usize) -> Self { if at == self.len() { - return mem::replace(self, Bytes::new()); + let end_ptr = self.ptr.wrapping_add(at); + return mem::replace(self, Bytes::new_empty_with_ptr(end_ptr)); } if at == 0 { - return Bytes::new(); + return Bytes::new_empty_with_ptr(self.ptr); } assert!( diff --git a/src/bytes_mut.rs b/src/bytes_mut.rs index ec74c4e97..a0d77bc24 100644 --- a/src/bytes_mut.rs +++ b/src/bytes_mut.rs @@ -291,7 +291,9 @@ impl BytesMut { /// Splits the bytes into two at the given index. /// /// Afterwards `self` contains elements `[0, at)`, and the returned - /// `BytesMut` contains elements `[at, capacity)`. + /// `BytesMut` contains elements `[at, capacity)`. It's guaranteed that the + /// memory does not move, that is, the address of `self` does not change, + /// and the address of the returned slice is `at` bytes after that. /// /// This is an `O(1)` operation that just increases the reference count /// and sets a few indices. diff --git a/tests/test_bytes.rs b/tests/test_bytes.rs index 59c967b66..71f0e6681 100644 --- a/tests/test_bytes.rs +++ b/tests/test_bytes.rs @@ -1399,3 +1399,83 @@ fn try_reclaim_arc() { buf.advance(2); assert_eq!(true, buf.try_reclaim(6)); } + +#[test] +fn split_off_empty_addr() { + let mut buf = Bytes::from(vec![0; 1024]); + + let ptr_start = buf.as_ptr(); + let ptr_end = ptr_start.wrapping_add(1024); + + let empty_end = buf.split_off(1024); + assert_eq!(empty_end.len(), 0); + assert_eq!(empty_end.as_ptr(), ptr_end); + + let _ = buf.split_off(0); + assert_eq!(buf.len(), 0); + assert_eq!(buf.as_ptr(), ptr_start); + + // Is miri happy about the provenance? + let _ = &empty_end[..]; + let _ = &buf[..]; +} + +#[test] +fn split_to_empty_addr() { + let mut buf = Bytes::from(vec![0; 1024]); + + let ptr_start = buf.as_ptr(); + let ptr_end = ptr_start.wrapping_add(1024); + + let empty_start = buf.split_to(0); + assert_eq!(empty_start.len(), 0); + assert_eq!(empty_start.as_ptr(), ptr_start); + + let _ = buf.split_to(1024); + assert_eq!(buf.len(), 0); + assert_eq!(buf.as_ptr(), ptr_end); + + // Is miri happy about the provenance? + let _ = &empty_start[..]; + let _ = &buf[..]; +} + +#[test] +fn split_off_empty_addr_mut() { + let mut buf = BytesMut::from([0; 1024].as_slice()); + + let ptr_start = buf.as_ptr(); + let ptr_end = ptr_start.wrapping_add(1024); + + let empty_end = buf.split_off(1024); + assert_eq!(empty_end.len(), 0); + assert_eq!(empty_end.as_ptr(), ptr_end); + + let _ = buf.split_off(0); + assert_eq!(buf.len(), 0); + assert_eq!(buf.as_ptr(), ptr_start); + + // Is miri happy about the provenance? + let _ = &empty_end[..]; + let _ = &buf[..]; +} + +#[test] +fn split_to_empty_addr_mut() { + let mut buf = BytesMut::from([0; 1024].as_slice()); + + let ptr_start = buf.as_ptr(); + let ptr_end = ptr_start.wrapping_add(1024); + + let empty_start = buf.split_to(0); + assert_eq!(empty_start.len(), 0); + assert_eq!(empty_start.as_ptr(), ptr_start); + + let _ = buf.split_to(1024); + assert_eq!(buf.len(), 0); + assert_eq!(buf.as_ptr(), ptr_end); + + // Is miri happy about the provenance? + let _ = &empty_start[..]; + let _ = &buf[..]; +} From 8dff14173269d5780cca6a899d1bad9a13aa7514 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 16 Oct 2024 14:57:59 +0200 Subject: [PATCH 2/2] fix miri --- src/bytes.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bytes.rs b/src/bytes.rs index df21f970e..8d7abd040 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -190,7 +190,7 @@ impl Bytes { // Detach this pointer's provenance from whichever allocation it came from, and reattach it // to the provenance of the fake ZST [u8;0] at the same address. - let ptr = ptr as usize as *const u8; + let ptr = without_provenance(ptr as usize); Bytes { ptr, @@ -1445,6 +1445,10 @@ where new_addr as *mut u8 } +fn without_provenance(ptr: usize) -> *const u8 { + core::ptr::null::().wrapping_add(ptr) +} + // compile-fails /// ```compile_fail