From 52569f3af69bfb751015f4878346c0b03d08a594 Mon Sep 17 00:00:00 2001 From: Zack Slayton Date: Thu, 22 Aug 2024 15:39:37 -0400 Subject: [PATCH] Miri uses tree borrows, fixes several Miri errors --- .github/workflows/miri.yml | 22 +++++++--- src/lazy/encoder/binary/v1_0/writer.rs | 59 ++++++++++++++++---------- src/lazy/encoder/binary/v1_1/writer.rs | 11 +++-- src/lazy/expanded/mod.rs | 25 ++++++----- src/unsafe_helpers.rs | 9 ++++ 5 files changed, 79 insertions(+), 47 deletions(-) diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml index 723b2191..af9ea8eb 100644 --- a/.github/workflows/miri.yml +++ b/.github/workflows/miri.yml @@ -1,6 +1,6 @@ name: Miri test -on: [push, pull_request] +on: [ push, pull_request ] jobs: build: @@ -11,9 +11,9 @@ jobs: if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != 'amazon-ion/ion-rust' strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ ubuntu-latest, windows-latest, macos-latest ] # build and test for experimental features with miri - features: ['experimental'] + features: [ 'experimental' ] permissions: checks: write @@ -37,9 +37,17 @@ jobs: - name: Test with Miri (ubuntu and macos) # miri test for Linux and macOS os, since Windows os requires a different syntax to enable miri flags if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' - # We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps - # more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables - run: MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test serde --features "${{ matrix.features }}" + # Miri flags + # + # -Zmiri-disable-isolation + # We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps + # more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables + # + # -Zmiri-tree-borrows + # Miri's default aliasing model (stacked borrows) is known to be unnecessarily restrictive. Their alternative + # aliasing model (tree borrows) accepts valid programs that stacked borrows would reject. + # For more information, see: https://www.ralfj.de/blog/2023/06/02/tree-borrows.html + run: MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" cargo miri test serde --features "${{ matrix.features }}" - name: Test with Miri (windows) if: matrix.os == 'windows-latest' - run: $env:MIRIFLAGS="-Zmiri-disable-isolation" ; cargo miri test serde --features "${{ matrix.features }}" + run: $env:MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" ; cargo miri test serde --features "${{ matrix.features }}" diff --git a/src/lazy/encoder/binary/v1_0/writer.rs b/src/lazy/encoder/binary/v1_0/writer.rs index fe40d098..f3fa92f9 100644 --- a/src/lazy/encoder/binary/v1_0/writer.rs +++ b/src/lazy/encoder/binary/v1_0/writer.rs @@ -11,7 +11,7 @@ use crate::lazy::encoder::value_writer::SequenceWriter; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::encoder::LazyRawWriter; use crate::lazy::encoding::Encoding; -use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref}; +use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref}; use crate::write_config::{WriteConfig, WriteConfigKind}; use crate::{IonEncoding, IonResult}; @@ -70,17 +70,13 @@ impl LazyRawBinaryWriter_1_0 { allocator, encoding_buffer_ptr, } = self; - - let encoding_buffer = match encoding_buffer_ptr { - // If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers. - Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() }, - // Otherwise, there's nothing in the buffer. Use an empty slice. - None => &[], - }; - // Write our top level encoding buffer's contents to the output sink. - output.write_all(encoding_buffer)?; - // Flush the output sink, which may have its own buffers. - output.flush()?; + if let Some(ptr) = encoding_buffer_ptr { + let encoding_buffer = unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() }; + // Write our top level encoding buffer's contents to the output sink. + output.write_all(encoding_buffer)?; + // Flush the output sink, which may have its own buffers. + output.flush()?; + } // Now that we've written the encoding buffer's contents to output, clear it. self.encoding_buffer_ptr = None; // Clear the allocator. A new encoding buffer will be allocated on the next write. @@ -88,22 +84,39 @@ impl LazyRawBinaryWriter_1_0 { Ok(()) } - pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> { - let top_level = match self.encoding_buffer_ptr { + fn get_or_allocate_encoding_buffer<'value, 'top>( + encoding_buffer_ptr: &'value mut Option<*mut ()>, + allocator: &'top BumpAllocator, + ) -> &'value mut BumpVec<'top, u8> { + match encoding_buffer_ptr { // If the `encoding_buffer_ptr` is set, we already allocated an encoding buffer on // a previous call to `value_writer()`. Dereference the pointer and continue encoding // to that buffer. - Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) }, - // Otherwise, allocate a new encoding buffer and set the pointer to refer to it. + Some(ptr) => { + let new_ptr = *ptr; + unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(new_ptr) } + } None => { - let buffer = self - .allocator - .alloc_with(|| BumpVec::new_in(&self.allocator)); - self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer)); - buffer + let encoding_buffer = allocator.alloc_with(|| BumpVec::::new_in(allocator)); + let ptr = mut_ref_to_ptr(encoding_buffer); + // SAFETY: We cannot both store `ptr` in `encoding_buffer_ptr` AND turn it into + // a mutable BumVec reference to return because this (briefly) constructs + // two mutable references. Instead, we store it in `encoding_buffer_ptr` + // and then read it from its new location. + *encoding_buffer_ptr = Some(ptr); + unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(encoding_buffer_ptr.unwrap()) } } - }; - let annotated_value_writer = BinaryValueWriter_1_0::new(&self.allocator, top_level); + } + } + + pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> { + let Self { + ref allocator, + ref mut encoding_buffer_ptr, + .. + } = *self; + let top_level = Self::get_or_allocate_encoding_buffer(encoding_buffer_ptr, allocator); + let annotated_value_writer = BinaryValueWriter_1_0::new(allocator, top_level); annotated_value_writer } } diff --git a/src/lazy/encoder/binary/v1_1/writer.rs b/src/lazy/encoder/binary/v1_1/writer.rs index 776fa9b3..c906baed 100644 --- a/src/lazy/encoder/binary/v1_1/writer.rs +++ b/src/lazy/encoder/binary/v1_1/writer.rs @@ -12,7 +12,7 @@ use crate::lazy::encoder::value_writer_config::ValueWriterConfig; use crate::lazy::encoder::write_as_ion::WriteAsIon; use crate::lazy::encoder::LazyRawWriter; use crate::lazy::encoding::Encoding; -use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref}; +use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref}; use crate::write_config::{WriteConfig, WriteConfigKind}; use crate::{IonEncoding, IonResult}; @@ -74,7 +74,7 @@ impl LazyRawBinaryWriter_1_1 { let encoding_buffer = match encoding_buffer_ptr { // If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers. - Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() }, + Some(ptr) => unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() }, // Otherwise, there's nothing in the buffer. Use an empty slice. None => &[], }; @@ -97,14 +97,17 @@ impl LazyRawBinaryWriter_1_1 { Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) }, // Otherwise, allocate a new encoding buffer and set the pointer to refer to it. None => { - let buffer = self.allocator.alloc_with(|| { + let buffer: &mut BumpVec = self.allocator.alloc_with(|| { // Use half of the bump allocator's backing array as an encoding space for this // top level value. The other half of the bump can be used for incidental // bookkeeping. BumpVec::with_capacity_in(DEFAULT_BUMP_SIZE / 2, &self.allocator) }); + // SAFETY: We cannot both store `buffer` in `encoding_buffer_ptr` AND return it + // because this would (briefly) construct two mutable references. Instead, + // we store it in `encoding_buffer_ptr` and then read it from its new location. self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer)); - buffer + unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(self.encoding_buffer_ptr.unwrap()) } } }; BinaryValueWriter_1_1::new( diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 68c45b90..bf4b4ab5 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -220,7 +220,7 @@ pub struct ExpandingReader { // the pointer and coerce the result into a `&'top mut MacroEvaluator`, allowing the value it // yields that can be used until `next()` is called again. // - // Because there is not valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`, + // Because there is not a valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`, // in the field below, we cast away the pointer's type for the purposes of storage and then cast // it back at dereference time when a 'top lifetime is available. evaluator_ptr: Cell>, @@ -468,10 +468,12 @@ impl ExpandingReader { if pending_lst.has_changes { // SAFETY: Nothing else holds a reference to the `EncodingContext`'s contents, so we can use the // `UnsafeCell` to get a mutable reference to its symbol table. - let symbol_table: &mut SymbolTable = - &mut unsafe { &mut *self.encoding_context.get() }.symbol_table; - let macro_table: &mut MacroTable = - &mut unsafe { &mut *self.encoding_context.get() }.macro_table; + let encoding_context_ref = unsafe { &mut *self.encoding_context.get() }; + let EncodingContext { + macro_table, + symbol_table, + .. + } = encoding_context_ref; Self::apply_pending_context_changes(pending_lst, symbol_table, macro_table); } } @@ -530,8 +532,8 @@ impl ExpandingReader { // See if the raw reader can get another expression from the input stream. It's possible // to find an expression that yields no values (for example: `(:void)`), so we perform this // step in a loop until we get a value or end-of-stream. - let allocator: &BumpAllocator = self.context().allocator(); - let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context())); + let context_ref = self.context(); + // Pull another top-level expression from the input stream if one is available. use crate::lazy::raw_stream_item::RawStreamItem::*; let raw_reader = unsafe { &mut *self.raw_reader.get() }; @@ -548,7 +550,6 @@ impl ExpandingReader { // It's another macro invocation, we'll add it to the evaluator so it will be evaluated // on the next call and then we'll return the e-expression itself. EExp(e_exp) => { - let context = self.context(); let resolved_e_exp = match e_exp.resolve(context_ref) { Ok(resolved) => resolved, Err(e) => return Err(e), @@ -565,7 +566,7 @@ impl ExpandingReader { // If there's not an evaluator in the bump, make a new one. None => { let new_evaluator = MacroEvaluator::for_eexp(resolved_e_exp)?; - context.allocator.alloc_with(|| new_evaluator) + context_ref.allocator.alloc_with(|| new_evaluator) } }; @@ -613,9 +614,8 @@ impl ExpandingReader { // See if the raw reader can get another expression from the input stream. It's possible // to find an expression that yields no values (for example: `(:void)`), so we perform this // step in a loop until we get a value or end-of-stream. + let context_ref = self.context(); - let allocator: &BumpAllocator = self.context().allocator(); - let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context())); loop { // Pull another top-level expression from the input stream if one is available. use crate::lazy::raw_stream_item::RawStreamItem::*; @@ -631,7 +631,6 @@ impl ExpandingReader { } // It's another macro invocation, we'll start evaluating it. EExp(e_exp) => { - let context = self.context(); let resolved_e_exp = match e_exp.resolve(context_ref) { Ok(resolved) => resolved, Err(e) => return Err(e), @@ -653,7 +652,7 @@ impl ExpandingReader { bump_evaluator_ref } // If there's not an evaluator in the bump, make a new one. - None => context.allocator.alloc_with(|| new_evaluator), + None => context_ref.allocator.alloc_with(|| new_evaluator), }; // Try to get a value by starting to evaluate the e-expression. diff --git a/src/unsafe_helpers.rs b/src/unsafe_helpers.rs index 8382e861..fef8333b 100644 --- a/src/unsafe_helpers.rs +++ b/src/unsafe_helpers.rs @@ -19,6 +19,15 @@ pub(crate) unsafe fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T { &mut *typed_ptr } +/// Helper function that turns a raw pointer into an immutable reference of the specified type. +/// +/// The caller is responsible for confirming that `ptr` is a valid reference to some value +/// of type `T`. +pub(crate) unsafe fn ptr_to_ref<'a, T>(ptr: *mut ()) -> &'a T { + let typed_ptr: *const T = ptr.cast(); + &*typed_ptr +} + /// Helper function that turns a mutable reference into a raw pointer. /// /// Because this method does not read the data to which the reference points,