From 52569f3af69bfb751015f4878346c0b03d08a594 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Thu, 22 Aug 2024 15:39:37 -0400
Subject: [PATCH] Miri uses tree borrows, fixes several Miri errors

---
 .github/workflows/miri.yml             | 22 +++++++---
 src/lazy/encoder/binary/v1_0/writer.rs | 59 ++++++++++++++++----------
 src/lazy/encoder/binary/v1_1/writer.rs | 11 +++--
 src/lazy/expanded/mod.rs               | 25 ++++++-----
 src/unsafe_helpers.rs                  |  9 ++++
 5 files changed, 79 insertions(+), 47 deletions(-)
diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml
index 723b2191..af9ea8eb 100644
--- a/.github/workflows/miri.yml
+++ b/.github/workflows/miri.yml
@@ -1,6 +1,6 @@
 name: Miri test
 
-on: [push, pull_request]
+on: [ push, pull_request ]
 
 jobs:
   build:
@@ -11,9 +11,9 @@ jobs:
     if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != 'amazon-ion/ion-rust'
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ ubuntu-latest, windows-latest, macos-latest ]
         # build and test for experimental features with miri
-        features: ['experimental']
+        features: [ 'experimental' ]
     permissions:
       checks: write
 
@@ -37,9 +37,17 @@ jobs:
       - name: Test with Miri (ubuntu and macos)
         # miri test for Linux and macOS os, since Windows os requires a different syntax to enable miri flags
         if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest'
-        # We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps
-        # more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables
-        run: MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test serde --features "${{ matrix.features }}"
+        # Miri flags
+        #
+        # -Zmiri-disable-isolation
+        #     We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps
+        #     more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables
+        #
+        # -Zmiri-tree-borrows
+        #     Miri's default aliasing model (stacked borrows) is known to be unnecessarily restrictive. Their alternative
+        #     aliasing model (tree borrows) accepts valid programs that stacked borrows would reject.
+        #     For more information, see: https://www.ralfj.de/blog/2023/06/02/tree-borrows.html
+        run: MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" cargo miri test serde --features "${{ matrix.features }}"
       - name: Test with Miri (windows)
         if: matrix.os == 'windows-latest'
-        run: $env:MIRIFLAGS="-Zmiri-disable-isolation" ; cargo miri test serde --features "${{ matrix.features }}"
+        run: $env:MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" ; cargo miri test serde --features "${{ matrix.features }}"
diff --git a/src/lazy/encoder/binary/v1_0/writer.rs b/src/lazy/encoder/binary/v1_0/writer.rs
index fe40d098..f3fa92f9 100644
--- a/src/lazy/encoder/binary/v1_0/writer.rs
+++ b/src/lazy/encoder/binary/v1_0/writer.rs
@@ -11,7 +11,7 @@ use crate::lazy::encoder::value_writer::SequenceWriter;
 use crate::lazy::encoder::write_as_ion::WriteAsIon;
 use crate::lazy::encoder::LazyRawWriter;
 use crate::lazy::encoding::Encoding;
-use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref};
+use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref};
 use crate::write_config::{WriteConfig, WriteConfigKind};
 use crate::{IonEncoding, IonResult};
 
@@ -70,17 +70,13 @@ impl<W: Write> LazyRawBinaryWriter_1_0<W> {
             allocator,
             encoding_buffer_ptr,
         } = self;
-
-        let encoding_buffer = match encoding_buffer_ptr {
-            // If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
-            Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
-            // Otherwise, there's nothing in the buffer. Use an empty slice.
-            None => &[],
-        };
-        // Write our top level encoding buffer's contents to the output sink.
-        output.write_all(encoding_buffer)?;
-        // Flush the output sink, which may have its own buffers.
-        output.flush()?;
+        if let Some(ptr) = encoding_buffer_ptr {
+            let encoding_buffer = unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() };
+            // Write our top level encoding buffer's contents to the output sink.
+            output.write_all(encoding_buffer)?;
+            // Flush the output sink, which may have its own buffers.
+            output.flush()?;
+        }
         // Now that we've written the encoding buffer's contents to output, clear it.
         self.encoding_buffer_ptr = None;
         // Clear the allocator. A new encoding buffer will be allocated on the next write.
@@ -88,22 +84,39 @@ impl<W: Write> LazyRawBinaryWriter_1_0<W> {
         Ok(())
     }
 
-    pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> {
-        let top_level = match self.encoding_buffer_ptr {
+    fn get_or_allocate_encoding_buffer<'value, 'top>(
+        encoding_buffer_ptr: &'value mut Option<*mut ()>,
+        allocator: &'top BumpAllocator,
+    ) -> &'value mut BumpVec<'top, u8> {
+        match encoding_buffer_ptr {
             // If the `encoding_buffer_ptr` is set, we already allocated an encoding buffer on
             // a previous call to `value_writer()`. Dereference the pointer and continue encoding
             // to that buffer.
-            Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) },
-            // Otherwise, allocate a new encoding buffer and set the pointer to refer to it.
+            Some(ptr) => {
+                let new_ptr = *ptr;
+                unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(new_ptr) }
+            }
             None => {
-                let buffer = self
-                    .allocator
-                    .alloc_with(|| BumpVec::new_in(&self.allocator));
-                self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer));
-                buffer
+                let encoding_buffer = allocator.alloc_with(|| BumpVec::<u8>::new_in(allocator));
+                let ptr = mut_ref_to_ptr(encoding_buffer);
+                // SAFETY: We cannot both store `ptr` in `encoding_buffer_ptr` AND turn it into
+                //         a mutable BumVec reference to return because this (briefly) constructs
+                //         two mutable references. Instead, we store it in `encoding_buffer_ptr`
+                //         and then read it from its new location.
+                *encoding_buffer_ptr = Some(ptr);
+                unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(encoding_buffer_ptr.unwrap()) }
             }
-        };
-        let annotated_value_writer = BinaryValueWriter_1_0::new(&self.allocator, top_level);
+        }
+    }
+
+    pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> {
+        let Self {
+            ref allocator,
+            ref mut encoding_buffer_ptr,
+            ..
+        } = *self;
+        let top_level = Self::get_or_allocate_encoding_buffer(encoding_buffer_ptr, allocator);
+        let annotated_value_writer = BinaryValueWriter_1_0::new(allocator, top_level);
         annotated_value_writer
     }
 }
diff --git a/src/lazy/encoder/binary/v1_1/writer.rs b/src/lazy/encoder/binary/v1_1/writer.rs
index 776fa9b3..c906baed 100644
--- a/src/lazy/encoder/binary/v1_1/writer.rs
+++ b/src/lazy/encoder/binary/v1_1/writer.rs
@@ -12,7 +12,7 @@ use crate::lazy::encoder::value_writer_config::ValueWriterConfig;
 use crate::lazy::encoder::write_as_ion::WriteAsIon;
 use crate::lazy::encoder::LazyRawWriter;
 use crate::lazy::encoding::Encoding;
-use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref};
+use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref};
 use crate::write_config::{WriteConfig, WriteConfigKind};
 use crate::{IonEncoding, IonResult};
 
@@ -74,7 +74,7 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {
 
         let encoding_buffer = match encoding_buffer_ptr {
             // If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
-            Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
+            Some(ptr) => unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
             // Otherwise, there's nothing in the buffer. Use an empty slice.
             None => &[],
         };
@@ -97,14 +97,17 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {
             Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) },
             // Otherwise, allocate a new encoding buffer and set the pointer to refer to it.
             None => {
-                let buffer = self.allocator.alloc_with(|| {
+                let buffer: &mut BumpVec<u8> = self.allocator.alloc_with(|| {
                     // Use half of the bump allocator's backing array as an encoding space for this
                     // top level value. The other half of the bump can be used for incidental
                     // bookkeeping.
                     BumpVec::with_capacity_in(DEFAULT_BUMP_SIZE / 2, &self.allocator)
                 });
+                // SAFETY: We cannot both store `buffer` in `encoding_buffer_ptr` AND return it
+                //         because this would (briefly) construct two mutable references. Instead,
+                //         we store it in `encoding_buffer_ptr` and then read it from its new location.
                 self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer));
-                buffer
+                unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(self.encoding_buffer_ptr.unwrap()) }
             }
         };
         BinaryValueWriter_1_1::new(
diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs
index 68c45b90..bf4b4ab5 100644
--- a/src/lazy/expanded/mod.rs
+++ b/src/lazy/expanded/mod.rs
@@ -220,7 +220,7 @@ pub struct ExpandingReader<Encoding: Decoder, Input: IonInput> {
     // the pointer and coerce the result into a `&'top mut MacroEvaluator`, allowing the value it
     // yields that can be used until `next()` is called again.
     //
-    // Because there is not valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
+    // Because there is not a valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
     // in the field below, we cast away the pointer's type for the purposes of storage and then cast
     // it back at dereference time when a 'top lifetime is available.
     evaluator_ptr: Cell<Option<*mut ()>>,
@@ -468,10 +468,12 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
         if pending_lst.has_changes {
             // SAFETY: Nothing else holds a reference to the `EncodingContext`'s contents, so we can use the
             //         `UnsafeCell` to get a mutable reference to its symbol table.
-            let symbol_table: &mut SymbolTable =
-                &mut unsafe { &mut *self.encoding_context.get() }.symbol_table;
-            let macro_table: &mut MacroTable =
-                &mut unsafe { &mut *self.encoding_context.get() }.macro_table;
+            let encoding_context_ref = unsafe { &mut *self.encoding_context.get() };
+            let EncodingContext {
+                macro_table,
+                symbol_table,
+                ..
+            } = encoding_context_ref;
             Self::apply_pending_context_changes(pending_lst, symbol_table, macro_table);
         }
     }
@@ -530,8 +532,8 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
         // See if the raw reader can get another expression from the input stream. It's possible
         // to find an expression that yields no values (for example: `(:void)`), so we perform this
         // step in a loop until we get a value or end-of-stream.
-        let allocator: &BumpAllocator = self.context().allocator();
-        let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context()));
+        let context_ref = self.context();
+
         // Pull another top-level expression from the input stream if one is available.
         use crate::lazy::raw_stream_item::RawStreamItem::*;
         let raw_reader = unsafe { &mut *self.raw_reader.get() };
@@ -548,7 +550,6 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
             // It's another macro invocation, we'll add it to the evaluator so it will be evaluated
             // on the next call and then we'll return the e-expression itself.
             EExp(e_exp) => {
-                let context = self.context();
                 let resolved_e_exp = match e_exp.resolve(context_ref) {
                     Ok(resolved) => resolved,
                     Err(e) => return Err(e),
@@ -565,7 +566,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
                     // If there's not an evaluator in the bump, make a new one.
                     None => {
                         let new_evaluator = MacroEvaluator::for_eexp(resolved_e_exp)?;
-                        context.allocator.alloc_with(|| new_evaluator)
+                        context_ref.allocator.alloc_with(|| new_evaluator)
                     }
                 };
 
@@ -613,9 +614,8 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
         // See if the raw reader can get another expression from the input stream. It's possible
         // to find an expression that yields no values (for example: `(:void)`), so we perform this
         // step in a loop until we get a value or end-of-stream.
+        let context_ref = self.context();
 
-        let allocator: &BumpAllocator = self.context().allocator();
-        let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context()));
         loop {
             // Pull another top-level expression from the input stream if one is available.
             use crate::lazy::raw_stream_item::RawStreamItem::*;
@@ -631,7 +631,6 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
                 }
                 // It's another macro invocation, we'll start evaluating it.
                 EExp(e_exp) => {
-                    let context = self.context();
                     let resolved_e_exp = match e_exp.resolve(context_ref) {
                         Ok(resolved) => resolved,
                         Err(e) => return Err(e),
@@ -653,7 +652,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
                             bump_evaluator_ref
                         }
                         // If there's not an evaluator in the bump, make a new one.
-                        None => context.allocator.alloc_with(|| new_evaluator),
+                        None => context_ref.allocator.alloc_with(|| new_evaluator),
                     };
 
                     // Try to get a value by starting to evaluate the e-expression.
diff --git a/src/unsafe_helpers.rs b/src/unsafe_helpers.rs
index 8382e861..fef8333b 100644
--- a/src/unsafe_helpers.rs
+++ b/src/unsafe_helpers.rs
@@ -19,6 +19,15 @@ pub(crate) unsafe fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T {
     &mut *typed_ptr
 }
 
+/// Helper function that turns a raw pointer into an immutable reference of the specified type.
+///
+/// The caller is responsible for confirming that `ptr` is a valid reference to some value
+/// of type `T`.
+pub(crate) unsafe fn ptr_to_ref<'a, T>(ptr: *mut ()) -> &'a T {
+    let typed_ptr: *const T = ptr.cast();
+    &*typed_ptr
+}
+
 /// Helper function that turns a mutable reference into a raw pointer.
 ///
 /// Because this method does not read the data to which the reference points,