From a89ef359dd1b58d1e0d8fe4788518cb5e1f41f2c Mon Sep 17 00:00:00 2001 From: tremwil Date: Thu, 12 Dec 2024 11:40:58 -0500 Subject: [PATCH 1/4] make decoder not alias memory being read --- src/rust/iced-x86/src/decoder.rs | 85 ++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/src/rust/iced-x86/src/decoder.rs b/src/rust/iced-x86/src/decoder.rs index 6d9f2a107..ffb001782 100644 --- a/src/rust/iced-x86/src/decoder.rs +++ b/src/rust/iced-x86/src/decoder.rs @@ -101,6 +101,7 @@ use crate::instruction_internal; use crate::tuple_type_tbl::get_disp8n; use crate::*; use core::iter::FusedIterator; +use core::marker::PhantomData; use core::{cmp, fmt, mem, ptr}; #[rustfmt::skip] @@ -578,8 +579,10 @@ where // Offset of displacement in the instruction. Only used by get_constant_offsets() to return the offset of the displ displ_index: u8, - // Input data provided by the user. When there's no more bytes left to read we'll return a NoMoreBytes error - data: &'a [u8], + // Input data provided by the user as a raw pointer to avoid potential UB from aliasing. + // When there's no more bytes left to read we'll return a NoMoreBytes error + data: *const [u8], + phantom: PhantomData<&'a [u8]> } macro_rules! write_base_reg { @@ -866,10 +869,85 @@ impl<'a> Decoder<'a> { /// assert_eq!(instr.code(), Code::Add_rm32_r32); /// assert!(instr.has_lock_prefix()); /// ``` + #[inline] + pub fn try_with_ip(bitness: u32, data: &'a [u8], ip: u64, options: u32) -> Result, IcedError> { + unsafe { Decoder::try_with_slice_ptr(bitness, data as *const _, ip, options) } + } + + /// Creates a decoder from a raw slice pointer. + /// + /// # Errors + /// + /// Fails if `bitness` is not one of 16, 32, 64. + /// + /// # Arguments + /// + /// * `bitness`: 16, 32 or 64 + /// * `data`: Data to decode + /// * `ip`: `RIP` value + /// * `options`: Decoder options, `0` or eg. `DecoderOptions::NO_INVALID_CHECK | DecoderOptions::AMD` + /// + /// # Safety + /// `data` must be a valid pointer to a slice of bytes with lifetime at least that of the decoder. + /// + /// # Examples + /// + /// ``` + /// use iced_x86::*; + /// + /// // xchg ah,[rdx+rsi+16h] + /// // xacquire lock add dword ptr [rax],5Ah + /// // vmovdqu64 zmm18{k3}{z},zmm11 + /// let bytes = b"\x86\x64\x32\x16\xF0\xF2\x83\x00\x5A\x62\xC1\xFE\xCB\x6F\xD3" as *const [u8]; + /// let mut decoder = unsafe { + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() + /// }; + /// + /// let instr1 = decoder.decode(); + /// assert_eq!(instr1.code(), Code::Xchg_rm8_r8); + /// assert_eq!(instr1.mnemonic(), Mnemonic::Xchg); + /// assert_eq!(instr1.len(), 4); + /// + /// let instr2 = decoder.decode(); + /// assert_eq!(instr2.code(), Code::Add_rm32_imm8); + /// assert_eq!(instr2.mnemonic(), Mnemonic::Add); + /// assert_eq!(instr2.len(), 5); + /// + /// let instr3 = decoder.decode(); + /// assert_eq!(instr3.code(), Code::EVEX_Vmovdqu64_zmm_k1z_zmmm512); + /// assert_eq!(instr3.mnemonic(), Mnemonic::Vmovdqu64); + /// assert_eq!(instr3.len(), 6); + /// ``` + /// + /// It's sometimes useful to decode some invalid instructions, eg. `lock add esi,ecx`. + /// Pass in [`DecoderOptions::NO_INVALID_CHECK`] to the constructor and the decoder + /// will decode some invalid encodings. + /// + /// [`DecoderOptions::NO_INVALID_CHECK`]: struct.DecoderOptions.html#associatedconstant.NO_INVALID_CHECK + /// + /// ``` + /// use iced_x86::*; + /// + /// // lock add esi,ecx ; lock not allowed + /// let bytes = b"\xF0\x01\xCE" as *const [u8]; + /// let mut decoder = unsafe { + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() + /// }; + /// let instr = decoder.decode(); + /// assert_eq!(instr.code(), Code::INVALID); + /// + /// // We want to decode some instructions with invalid encodings + /// let mut decoder = unsafe { + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NO_INVALID_CHECK).unwrap() + /// }; + /// let instr = decoder.decode(); + /// assert_eq!(instr.code(), Code::Add_rm32_r32); + /// assert!(instr.has_lock_prefix()); + /// ``` #[allow(clippy::missing_inline_in_public_items)] #[allow(clippy::let_unit_value)] #[allow(trivial_casts)] - pub fn try_with_ip(bitness: u32, data: &'a [u8], ip: u64, options: u32) -> Result, IcedError> { + pub unsafe fn try_with_slice_ptr(bitness: u32, data: *const [u8], ip: u64, options: u32) -> Result, IcedError> { let is64b_mode; let default_code_size; let default_operand_size; @@ -1027,6 +1105,7 @@ impl<'a> Decoder<'a> { default_code_size, displ_index: 0, data, + phantom: PhantomData }) } From 6c95719eed668193aef4f1e9ae2cd3b9f59be034 Mon Sep 17 00:00:00 2001 From: tremwil Date: Thu, 12 Dec 2024 11:47:23 -0500 Subject: [PATCH 2/4] implement sync for decoder --- src/rust/iced-x86/src/decoder.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rust/iced-x86/src/decoder.rs b/src/rust/iced-x86/src/decoder.rs index ffb001782..a532cd708 100644 --- a/src/rust/iced-x86/src/decoder.rs +++ b/src/rust/iced-x86/src/decoder.rs @@ -585,6 +585,14 @@ where phantom: PhantomData<&'a [u8]> } +// Safety: decoder is safe to send between threads +unsafe impl<'a> Send for Decoder<'a> {} + +// Safety: data read by decoder is borrowed from an immutable u8 slice, unless +// using the unsafe `try_with_slice_ptr` constructor. In this case, the caller +// is responsible for making sure the slice pointer is never written to +unsafe impl<'a> Sync for Decoder<'a> {} + macro_rules! write_base_reg { ($instruction:ident, $expr:expr) => { debug_assert!($expr < IcedConstants::REGISTER_ENUM_COUNT as u32); From 02dc7be50c048d483e0fef4e4887b90433f24a80 Mon Sep 17 00:00:00 2001 From: tremwil Date: Thu, 12 Dec 2024 12:11:44 -0500 Subject: [PATCH 3/4] bump msrv and remove unstable fns --- src/rust/iced-x86/Cargo.toml | 2 +- src/rust/iced-x86/src/decoder.rs | 33 ++++++++++++++++++-------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/rust/iced-x86/Cargo.toml b/src/rust/iced-x86/Cargo.toml index b30ca6d27..9c1cd8989 100644 --- a/src/rust/iced-x86/Cargo.toml +++ b/src/rust/iced-x86/Cargo.toml @@ -12,7 +12,7 @@ keywords = ["disassembler", "assembler", "x86", "amd64", "x86_64"] categories = ["no-std", "development-tools::debugging", "encoding", "hardware-support", "wasm"] # Don't include the tests when uploading to crates.io exclude = ["/src/**/test/", "/src/**/tests/", "/src/**/test_utils/"] -rust-version = "1.63.0" +rust-version = "1.79.0" # These features are documented in README.md / lib.rs [features] diff --git a/src/rust/iced-x86/src/decoder.rs b/src/rust/iced-x86/src/decoder.rs index a532cd708..716110755 100644 --- a/src/rust/iced-x86/src/decoder.rs +++ b/src/rust/iced-x86/src/decoder.rs @@ -505,19 +505,19 @@ where // Next bytes to read if there's enough bytes left to read. // This can be 1 byte past the last byte of `data`. - // Invariant: data.as_ptr() <= data_ptr <= max_data_ptr <= data.as_ptr() + data.len() == data_ptr_end + // Invariant: data as *const u8 <= data_ptr <= max_data_ptr <= data as *const u8 + data.len() == data_ptr_end // Invariant: {data_ptr,max_data_ptr,data_ptr_end}.add(max(MAX_READ_SIZE, MAX_INSTRUCTION_LENGTH)) doesn't overflow data_ptr: usize, - // This is `data.as_ptr() + data.len()` (1 byte past the last valid byte). + // This is `data as *const u8 + data.len()` (1 byte past the last valid byte). // This is guaranteed to be >= data_ptr (see the ctor), in other words, it can't overflow to 0 - // Invariant: data.as_ptr() <= data_ptr <= max_data_ptr <= data.as_ptr() + data.len() == data_ptr_end + // Invariant: data as *const u8 <= data_ptr <= max_data_ptr <= data as *const u8 + data.len() == data_ptr_end // Invariant: {data_ptr,max_data_ptr,data_ptr_end}.add(max(MAX_READ_SIZE, MAX_INSTRUCTION_LENGTH)) doesn't overflow data_ptr_end: usize, // Set to cmp::min(self.data_ptr + IcedConstants::MAX_INSTRUCTION_LENGTH, self.data_ptr_end) // and is guaranteed to not overflow // Initialized in decode() to at most 15 bytes after data_ptr so read_uXX() fails quickly after at most 15 read bytes // (1MB prefixes won't cause it to read 1MB prefixes, it will stop after at most 15). - // Invariant: data.as_ptr() <= data_ptr <= max_data_ptr <= data.as_ptr() + data.len() == data_ptr_end + // Invariant: data as *const u8 <= data_ptr <= max_data_ptr <= data as *const u8 + data.len() == data_ptr_end // Invariant: {data_ptr,max_data_ptr,data_ptr_end}.add(max(MAX_READ_SIZE, MAX_INSTRUCTION_LENGTH)) doesn't overflow max_data_ptr: usize, // Initialized to start of data (data_ptr) when decode() is called. Used to calculate current IP/offset (when decoding) if needed. @@ -879,7 +879,7 @@ impl<'a> Decoder<'a> { /// ``` #[inline] pub fn try_with_ip(bitness: u32, data: &'a [u8], ip: u64, options: u32) -> Result, IcedError> { - unsafe { Decoder::try_with_slice_ptr(bitness, data as *const _, ip, options) } + unsafe { Decoder::try_with_slice_ptr(bitness, data, ip, options) } } /// Creates a decoder from a raw slice pointer. @@ -896,7 +896,12 @@ impl<'a> Decoder<'a> { /// * `options`: Decoder options, `0` or eg. `DecoderOptions::NO_INVALID_CHECK | DecoderOptions::AMD` /// /// # Safety - /// `data` must be a valid pointer to a slice of bytes with lifetime at least that of the decoder. + /// For other safe methods to be safe, `data` must be a valid pointer to a slice of bytes with lifetime + /// at least that of the decoder which is not mutably aliased. + /// + /// It is not *immediately* UB for `data` to fail to uphold these invariants. In that case you must + /// ensure that the subslice at offset equal to the current decoder position with length equal to the + /// to-be-decoded instruction does uphold them. /// /// # Examples /// @@ -989,13 +994,13 @@ impl<'a> Decoder<'a> { } _ => return Err(IcedError::new("Invalid bitness")), } - let data_ptr_end = data.as_ptr() as usize + data.len(); - if data_ptr_end < data.as_ptr() as usize || { + let data_ptr_end = data as *const u8 as usize + data.len(); + if data_ptr_end < data as *const u8 as usize || { // Verify that max_data_ptr can never overflow and that data_ptr.add(N) can't overflow. // Both of them can equal data_ptr_end (1 byte past the last valid byte). // When reading a u8/u16/u32..., we calculate data_ptr.add({1,2,4,...MAX_READ_SIZE}) so it must not overflow. // In decode(), we calculate data_ptr.add(MAX_INSTRUCTION_LENGTH) so it must not overflow. - data_ptr_end.wrapping_add(cmp::max(IcedConstants::MAX_INSTRUCTION_LENGTH, Decoder::MAX_READ_SIZE)) < data.as_ptr() as usize + data_ptr_end.wrapping_add(cmp::max(IcedConstants::MAX_INSTRUCTION_LENGTH, Decoder::MAX_READ_SIZE)) < data as *const u8 as usize } { return Err(IcedError::new("Invalid slice")); } @@ -1084,10 +1089,10 @@ impl<'a> Decoder<'a> { Ok(Decoder { ip, - data_ptr: data.as_ptr() as usize, + data_ptr: data as *const u8 as usize, data_ptr_end, - max_data_ptr: data.as_ptr() as usize, - instr_start_data_ptr: data.as_ptr() as usize, + max_data_ptr: data as *const u8 as usize, + instr_start_data_ptr: data as *const u8 as usize, handlers_map0: get_handlers(&tables.handlers_map0), handlers_vex_map0, handlers_vex: [handlers_vex_0f, handlers_vex_0f38, handlers_vex_0f3a], @@ -1167,7 +1172,7 @@ impl<'a> Decoder<'a> { #[must_use] #[inline] pub fn position(&self) -> usize { - self.data_ptr - self.data.as_ptr() as usize + self.data_ptr - self.data as *const u8 as usize } /// Sets the current data position, which is the index into the data passed to the constructor. @@ -1218,7 +1223,7 @@ impl<'a> Decoder<'a> { } else { // - We verified the new offset above. // - Referencing 1 byte past the last valid byte is safe as long as we don't dereference it. - self.data_ptr = self.data.as_ptr() as usize + new_pos; + self.data_ptr = self.data as *const u8 as usize + new_pos; Ok(()) } } From 64c4c2ab1361727d16c36d40c91a3f76df4d4f97 Mon Sep 17 00:00:00 2001 From: tremwil Date: Thu, 12 Dec 2024 12:19:39 -0500 Subject: [PATCH 4/4] apply clippy suggestions, improve docs --- src/rust/iced-x86/src/decoder.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/rust/iced-x86/src/decoder.rs b/src/rust/iced-x86/src/decoder.rs index 716110755..1389b3caf 100644 --- a/src/rust/iced-x86/src/decoder.rs +++ b/src/rust/iced-x86/src/decoder.rs @@ -586,12 +586,12 @@ where } // Safety: decoder is safe to send between threads -unsafe impl<'a> Send for Decoder<'a> {} +unsafe impl Send for Decoder<'_> {} // Safety: data read by decoder is borrowed from an immutable u8 slice, unless // using the unsafe `try_with_slice_ptr` constructor. In this case, the caller // is responsible for making sure the slice pointer is never written to -unsafe impl<'a> Sync for Decoder<'a> {} +unsafe impl Sync for Decoder<'_> {} macro_rules! write_base_reg { ($instruction:ident, $expr:expr) => { @@ -900,8 +900,8 @@ impl<'a> Decoder<'a> { /// at least that of the decoder which is not mutably aliased. /// /// It is not *immediately* UB for `data` to fail to uphold these invariants. In that case you must - /// ensure that the subslice at offset equal to the current decoder position with length equal to the - /// to-be-decoded instruction does uphold them. + /// ensure that whenever [`decode`] or [`decode_out`] is called, the subslice at the current decoder + /// position with length equal to the to-be-decoded instruction does uphold them. /// /// # Examples /// @@ -913,7 +913,7 @@ impl<'a> Decoder<'a> { /// // vmovdqu64 zmm18{k3}{z},zmm11 /// let bytes = b"\x86\x64\x32\x16\xF0\xF2\x83\x00\x5A\x62\xC1\xFE\xCB\x6F\xD3" as *const [u8]; /// let mut decoder = unsafe { - /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() /// }; /// /// let instr1 = decoder.decode(); @@ -937,6 +937,8 @@ impl<'a> Decoder<'a> { /// will decode some invalid encodings. /// /// [`DecoderOptions::NO_INVALID_CHECK`]: struct.DecoderOptions.html#associatedconstant.NO_INVALID_CHECK + /// [`decode`]: #method.decode + /// [`decode_out`]: #method.decode_out /// /// ``` /// use iced_x86::*; @@ -944,14 +946,14 @@ impl<'a> Decoder<'a> { /// // lock add esi,ecx ; lock not allowed /// let bytes = b"\xF0\x01\xCE" as *const [u8]; /// let mut decoder = unsafe { - /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NONE).unwrap() /// }; /// let instr = decoder.decode(); /// assert_eq!(instr.code(), Code::INVALID); /// /// // We want to decode some instructions with invalid encodings /// let mut decoder = unsafe { - /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NO_INVALID_CHECK).unwrap() + /// Decoder::try_with_slice_ptr(64, bytes, 0x1234_5678, DecoderOptions::NO_INVALID_CHECK).unwrap() /// }; /// let instr = decoder.decode(); /// assert_eq!(instr.code(), Code::Add_rm32_r32);