Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configures Miri to use tree borrows, fixes remaining errors #821

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions .github/workflows/miri.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Miri test

on: [push, pull_request]
on: [ push, pull_request ]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Some autoformatting happened in this file.


jobs:
build:
Expand All @@ -11,9 +11,9 @@ jobs:
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != 'amazon-ion/ion-rust'
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ ubuntu-latest, windows-latest, macos-latest ]
# build and test for experimental features with miri
features: ['experimental']
features: [ 'experimental' ]
permissions:
checks: write

Expand All @@ -37,9 +37,17 @@ jobs:
- name: Test with Miri (ubuntu and macos)
# miri test for Linux and macOS os, since Windows os requires a different syntax to enable miri flags
if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest'
# We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps
# more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables
run: MIRIFLAGS="-Zmiri-disable-isolation" cargo miri test serde --features "${{ matrix.features }}"
# Miri flags
#
# -Zmiri-disable-isolation
# We need to pass the miri flag to disable host isolation in order to access host for clock information on timestamps
# more information can be found: https://github.com/rust-lang/miri#miri--z-flags-and-environment-variables
#
# -Zmiri-tree-borrows
# Miri's default aliasing model (stacked borrows) is known to be unnecessarily restrictive. Their alternative
# aliasing model (tree borrows) accepts valid programs that stacked borrows would reject.
# For more information, see: https://www.ralfj.de/blog/2023/06/02/tree-borrows.html
run: MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" cargo miri test serde --features "${{ matrix.features }}"
- name: Test with Miri (windows)
if: matrix.os == 'windows-latest'
run: $env:MIRIFLAGS="-Zmiri-disable-isolation" ; cargo miri test serde --features "${{ matrix.features }}"
run: $env:MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-tree-borrows" ; cargo miri test serde --features "${{ matrix.features }}"
59 changes: 36 additions & 23 deletions src/lazy/encoder/binary/v1_0/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::lazy::encoder::value_writer::SequenceWriter;
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::lazy::encoder::LazyRawWriter;
use crate::lazy::encoding::Encoding;
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref};
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref};
use crate::write_config::{WriteConfig, WriteConfigKind};
use crate::{IonEncoding, IonResult};

Expand Down Expand Up @@ -70,40 +70,53 @@ impl<W: Write> LazyRawBinaryWriter_1_0<W> {
allocator,
encoding_buffer_ptr,
} = self;

let encoding_buffer = match encoding_buffer_ptr {
// If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
// Otherwise, there's nothing in the buffer. Use an empty slice.
None => &[],
};
// Write our top level encoding buffer's contents to the output sink.
output.write_all(encoding_buffer)?;
// Flush the output sink, which may have its own buffers.
output.flush()?;
if let Some(ptr) = encoding_buffer_ptr {
let encoding_buffer = unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() };
// Write our top level encoding buffer's contents to the output sink.
output.write_all(encoding_buffer)?;
// Flush the output sink, which may have its own buffers.
output.flush()?;
}
Comment on lines +73 to +79
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ I ported this more succinct version from the 1.1 writer; notice that it more narrowly scopes the encoding_buffer reference.

// Now that we've written the encoding buffer's contents to output, clear it.
self.encoding_buffer_ptr = None;
// Clear the allocator. A new encoding buffer will be allocated on the next write.
allocator.reset();
Ok(())
}

pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> {
let top_level = match self.encoding_buffer_ptr {
fn get_or_allocate_encoding_buffer<'value, 'top>(
encoding_buffer_ptr: &'value mut Option<*mut ()>,
allocator: &'top BumpAllocator,
) -> &'value mut BumpVec<'top, u8> {
match encoding_buffer_ptr {
// If the `encoding_buffer_ptr` is set, we already allocated an encoding buffer on
// a previous call to `value_writer()`. Dereference the pointer and continue encoding
// to that buffer.
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) },
// Otherwise, allocate a new encoding buffer and set the pointer to refer to it.
Some(ptr) => {
let new_ptr = *ptr;
unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(new_ptr) }
}
None => {
let buffer = self
.allocator
.alloc_with(|| BumpVec::new_in(&self.allocator));
self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer));
buffer
let encoding_buffer = allocator.alloc_with(|| BumpVec::<u8>::new_in(allocator));
let ptr = mut_ref_to_ptr(encoding_buffer);
// SAFETY: We cannot both store `ptr` in `encoding_buffer_ptr` AND turn it into
// a mutable BumVec reference to return because this (briefly) constructs
// two mutable references. Instead, we store it in `encoding_buffer_ptr`
// and then read it from its new location.
*encoding_buffer_ptr = Some(ptr);
unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(encoding_buffer_ptr.unwrap()) }
}
};
let annotated_value_writer = BinaryValueWriter_1_0::new(&self.allocator, top_level);
}
}

pub(crate) fn value_writer(&mut self) -> BinaryValueWriter_1_0<'_, '_> {
let Self {
ref allocator,
ref mut encoding_buffer_ptr,
..
} = *self;
let top_level = Self::get_or_allocate_encoding_buffer(encoding_buffer_ptr, allocator);
let annotated_value_writer = BinaryValueWriter_1_0::new(allocator, top_level);
annotated_value_writer
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/lazy/encoder/binary/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::lazy::encoder::value_writer_config::ValueWriterConfig;
use crate::lazy::encoder::write_as_ion::WriteAsIon;
use crate::lazy::encoder::LazyRawWriter;
use crate::lazy::encoding::Encoding;
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref};
use crate::unsafe_helpers::{mut_ref_to_ptr, ptr_to_mut_ref, ptr_to_ref};
use crate::write_config::{WriteConfig, WriteConfigKind};
use crate::{IonEncoding, IonResult};

Expand Down Expand Up @@ -74,7 +74,7 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {

let encoding_buffer = match encoding_buffer_ptr {
// If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
Some(ptr) => unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ In flush() we only need to read from the encoding buffer, so we don't need a mut ref.

// Otherwise, there's nothing in the buffer. Use an empty slice.
None => &[],
};
Expand All @@ -97,14 +97,17 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {
Some(ptr) => unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(ptr) },
// Otherwise, allocate a new encoding buffer and set the pointer to refer to it.
None => {
let buffer = self.allocator.alloc_with(|| {
let buffer: &mut BumpVec<u8> = self.allocator.alloc_with(|| {
// Use half of the bump allocator's backing array as an encoding space for this
// top level value. The other half of the bump can be used for incidental
// bookkeeping.
BumpVec::with_capacity_in(DEFAULT_BUMP_SIZE / 2, &self.allocator)
});
// SAFETY: We cannot both store `buffer` in `encoding_buffer_ptr` AND return it
// because this would (briefly) construct two mutable references. Instead,
// we store it in `encoding_buffer_ptr` and then read it from its new location.
self.encoding_buffer_ptr = Some(mut_ref_to_ptr(buffer));
buffer
unsafe { ptr_to_mut_ref::<'_, BumpVec<'_, u8>>(self.encoding_buffer_ptr.unwrap()) }
}
};
BinaryValueWriter_1_1::new(
Expand Down
25 changes: 12 additions & 13 deletions src/lazy/expanded/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ pub struct ExpandingReader<Encoding: Decoder, Input: IonInput> {
// the pointer and coerce the result into a `&'top mut MacroEvaluator`, allowing the value it
// yields that can be used until `next()` is called again.
//
// Because there is not valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
// Because there is not a valid lifetime we can use for the type `*mut MacroEvaluator<'lifetime>`,
// in the field below, we cast away the pointer's type for the purposes of storage and then cast
// it back at dereference time when a 'top lifetime is available.
evaluator_ptr: Cell<Option<*mut ()>>,
Expand Down Expand Up @@ -468,10 +468,12 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
if pending_lst.has_changes {
// SAFETY: Nothing else holds a reference to the `EncodingContext`'s contents, so we can use the
// `UnsafeCell` to get a mutable reference to its symbol table.
let symbol_table: &mut SymbolTable =
&mut unsafe { &mut *self.encoding_context.get() }.symbol_table;
let macro_table: &mut MacroTable =
&mut unsafe { &mut *self.encoding_context.get() }.macro_table;
let encoding_context_ref = unsafe { &mut *self.encoding_context.get() };
let EncodingContext {
macro_table,
symbol_table,
..
} = encoding_context_ref;
Comment on lines -471 to +476
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Here we get a single mutable reference and then use safe code to destructure it into two mutable references.

Self::apply_pending_context_changes(pending_lst, symbol_table, macro_table);
}
}
Expand Down Expand Up @@ -530,8 +532,8 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// See if the raw reader can get another expression from the input stream. It's possible
// to find an expression that yields no values (for example: `(:void)`), so we perform this
// step in a loop until we get a value or end-of-stream.
let allocator: &BumpAllocator = self.context().allocator();
let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context()));
let context_ref = self.context();

Comment on lines -533 to +536
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ This was copying the EncodingContextRef to the bump allocator so it would survive the lifetime 'top...but it was already going to survive that because the EncodingContext lives in the ExpandingReader.

// Pull another top-level expression from the input stream if one is available.
use crate::lazy::raw_stream_item::RawStreamItem::*;
let raw_reader = unsafe { &mut *self.raw_reader.get() };
Expand All @@ -548,7 +550,6 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// It's another macro invocation, we'll add it to the evaluator so it will be evaluated
// on the next call and then we'll return the e-expression itself.
EExp(e_exp) => {
let context = self.context();
let resolved_e_exp = match e_exp.resolve(context_ref) {
Ok(resolved) => resolved,
Err(e) => return Err(e),
Expand All @@ -565,7 +566,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// If there's not an evaluator in the bump, make a new one.
None => {
let new_evaluator = MacroEvaluator::for_eexp(resolved_e_exp)?;
context.allocator.alloc_with(|| new_evaluator)
context_ref.allocator.alloc_with(|| new_evaluator)
}
};

Expand Down Expand Up @@ -613,9 +614,8 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
// See if the raw reader can get another expression from the input stream. It's possible
// to find an expression that yields no values (for example: `(:void)`), so we perform this
// step in a loop until we get a value or end-of-stream.
let context_ref = self.context();

let allocator: &BumpAllocator = self.context().allocator();
let context_ref = EncodingContextRef::new(allocator.alloc_with(|| self.context()));
loop {
// Pull another top-level expression from the input stream if one is available.
use crate::lazy::raw_stream_item::RawStreamItem::*;
Expand All @@ -631,7 +631,6 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
}
// It's another macro invocation, we'll start evaluating it.
EExp(e_exp) => {
let context = self.context();
let resolved_e_exp = match e_exp.resolve(context_ref) {
Ok(resolved) => resolved,
Err(e) => return Err(e),
Expand All @@ -653,7 +652,7 @@ impl<Encoding: Decoder, Input: IonInput> ExpandingReader<Encoding, Input> {
bump_evaluator_ref
}
// If there's not an evaluator in the bump, make a new one.
None => context.allocator.alloc_with(|| new_evaluator),
None => context_ref.allocator.alloc_with(|| new_evaluator),
};

// Try to get a value by starting to evaluate the e-expression.
Expand Down
9 changes: 9 additions & 0 deletions src/unsafe_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ pub(crate) unsafe fn ptr_to_mut_ref<'a, T>(ptr: *mut ()) -> &'a mut T {
&mut *typed_ptr
}

/// Helper function that turns a raw pointer into an immutable reference of the specified type.
///
/// The caller is responsible for confirming that `ptr` is a valid reference to some value
/// of type `T`.
pub(crate) unsafe fn ptr_to_ref<'a, T>(ptr: *mut ()) -> &'a T {
let typed_ptr: *const T = ptr.cast();
&*typed_ptr
}
Comment on lines +22 to +29
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🗺️ Previously we only had a ptr_to_mut_ref which meant we were getting a mutable reference even where a read-only reference would suffice.


/// Helper function that turns a mutable reference into a raw pointer.
///
/// Because this method does not read the data to which the reference points,
Expand Down
Loading