Skip to content

Commit

Permalink
Add a syntax property to the asm JSON response.
Browse files Browse the repository at this point in the history
  • Loading branch information
mstange committed Jan 31, 2023
1 parent 9d2790a commit 239f747
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 29 deletions.
2 changes: 1 addition & 1 deletion fixtures/snapshots/asm_with_continue.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"startAddress":"0x51fd0","size":"0x26","arch":"arm","instructions":[[0,"push {r4, r6, r7, lr}"],[2,"add r7, sp, 0x8"],[4,"mov r4, r0"],[6,"ldr r0, [r0, 0x14]"],[8,"cbz r0, $+0xe"],[10,"ldr r1, [r0]"],[12,"ldr r1, [r1, 0xc]"],[14,"blx r1"],[16,"cmp r0, 0x1"],[18,"bne $+0x8"],[20,"ldr r0, [r4, 0x14]"],[22,"pop {r4, r6, r7, pc}"],[24,"movs r0, 0x0"],[26,"pop {r4, r6, r7, pc}"],[28,"ldr r0, [r4, 0xc]"],[30,"pop.w {r4, r6, r7, lr}"],[34,"and r10, r1, 0x47474747"]]}
{"startAddress":"0x51fd0","size":"0x26","arch":"arm","syntax":["ARM"],"instructions":[[0,"push {r4, r6, r7, lr}"],[2,"add r7, sp, 0x8"],[4,"mov r4, r0"],[6,"ldr r0, [r0, 0x14]"],[8,"cbz r0, $+0xe"],[10,"ldr r1, [r0]"],[12,"ldr r1, [r1, 0xc]"],[14,"blx r1"],[16,"cmp r0, 0x1"],[18,"bne $+0x8"],[20,"ldr r0, [r4, 0x14]"],[22,"pop {r4, r6, r7, pc}"],[24,"movs r0, 0x0"],[26,"pop {r4, r6, r7, pc}"],[28,"ldr r0, [r4, 0xc]"],[30,"pop.w {r4, r6, r7, lr}"],[34,"and r10, r1, 0x47474747"]]}
81 changes: 53 additions & 28 deletions samply-api/src/asm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ use yaxpeax_arch::{Arch, DecodeError, U8Reader};

use crate::asm::response_json::DecodedInstruction;

use self::response_json::Response;

mod request_json;
mod response_json;

Expand Down Expand Up @@ -144,27 +146,27 @@ fn compute_response<'data: 'file, 'file>(
// Translate start_address from a "relative address" into an
// SVMA ("stated virtual memory address").
let image_base = relative_address_base(object);
let start_address = image_base + u64::from(relative_start_address);
let start_svma = image_base + u64::from(relative_start_address);

// Find the section and segment which contains our start_address.
// Find the section and segment which contains our start_svma.
use object::ObjectSection;
let (section, section_end_addr) = object
let (section, section_end_svma) = object
.sections()
.find_map(|section| {
let section_start_addr = section.address();
let section_end_addr = section_start_addr.checked_add(section.size())?;
if !(section_start_addr..section_end_addr).contains(&start_address) {
let section_start_svma = section.address();
let section_end_svma = section_start_svma.checked_add(section.size())?;
if !(section_start_svma..section_end_svma).contains(&start_svma) {
return None;
}

Some((section, section_end_addr))
Some((section, section_end_svma))
})
.ok_or(AsmError::AddressNotFound)?;

let segment = object.segments().find(|segment| {
let segment_start_addr = segment.address();
if let Some(segment_end_addr) = segment_start_addr.checked_add(segment.size()) {
(segment_start_addr..segment_end_addr).contains(&start_address)
let segment_start_svma = segment.address();
if let Some(segment_end_svma) = segment_start_svma.checked_add(segment.size()) {
(segment_start_svma..segment_end_svma).contains(&start_svma)
} else {
false
}
Expand All @@ -173,19 +175,19 @@ fn compute_response<'data: 'file, 'file>(
// Pad out the number of bytes we read a little, to allow for reading one
// more instruction.
// We've been asked to decode the instructions whose instruction addresses
// are in the range start_address .. (start_address + size). If the end of
// are in the range start_svma .. (start_svma + size). If the end of
// this range points into the middle of an instruction, we still want to
// decode the entire instruction, so we need all of its bytes.
// We have another check later to make sure we don't return instructions whose
// address is beyond the requested range.
const MAX_INSTR_LEN: u64 = 15; // TODO: Get the correct max length for this arch
let max_read_len = section_end_addr - start_address;
let max_read_len = section_end_svma - start_svma;
let read_len = (u64::from(disassembly_len) + MAX_INSTR_LEN).min(max_read_len);

// Now read the instruction bytes from the file.
let bytes = if let Some(segment) = segment {
segment
.data_range(start_address, read_len)?
.data_range(start_svma, read_len)?
.ok_or(AsmError::ByteRangeNotInSection)?
} else {
// We don't have a segment, try reading via the section.
Expand All @@ -196,42 +198,54 @@ fn compute_response<'data: 'file, 'file>(
// Specifically, incorrect section file offset information was observed in
// the arm64e dyld cache on macOS 13.0.1, FB11929250.
section
.data_range(start_address, read_len)?
.data_range(start_svma, read_len)?
.ok_or(AsmError::ByteRangeNotInSection)?
};

let reader = yaxpeax_arch::U8Reader::new(bytes);
let (instructions, len, arch) = decode_arch(reader, architecture, disassembly_len)?;
Ok(response_json::Response {
start_address: relative_start_address,
size: len,
arch,
instructions,
})
decode_arch(
reader,
architecture,
image_base,
start_svma,
disassembly_len,
)
}

fn decode_arch(
reader: U8Reader,
arch: Architecture,
image_base: u64,
start_svma: u64,
decode_len: u32,
) -> Result<(Vec<DecodedInstruction>, u32, String), AsmError> {
) -> Result<Response, AsmError> {
Ok(match arch {
Architecture::I386 => decode::<yaxpeax_x86::protected_mode::Arch>(reader, decode_len),
Architecture::X86_64 => decode::<yaxpeax_x86::amd64::Arch>(reader, decode_len),
Architecture::Aarch64 => decode::<yaxpeax_arm::armv8::a64::ARMv8>(reader, decode_len),
Architecture::Arm => decode::<yaxpeax_arm::armv7::ARMv7>(reader, decode_len),
Architecture::I386 => {
decode::<yaxpeax_x86::protected_mode::Arch>(reader, image_base, start_svma, decode_len)
}
Architecture::X86_64 => {
decode::<yaxpeax_x86::amd64::Arch>(reader, image_base, start_svma, decode_len)
}
Architecture::Aarch64 => {
decode::<yaxpeax_arm::armv8::a64::ARMv8>(reader, image_base, start_svma, decode_len)
}
Architecture::Arm => {
decode::<yaxpeax_arm::armv7::ARMv7>(reader, image_base, start_svma, decode_len)
}
_ => return Err(AsmError::UnrecognizedArch(arch)),
})
}

trait InstructionDecoding: Arch {
const ARCH_NAME: &'static str;
const SYNTAX: &'static [&'static str];
fn make_decoder() -> Self::Decoder;
fn stringify_inst(inst: Self::Instruction) -> String;
}

impl InstructionDecoding for yaxpeax_x86::amd64::Arch {
const ARCH_NAME: &'static str = "x86_64";
const SYNTAX: &'static [&'static str] = &["Intel"];

fn make_decoder() -> Self::Decoder {
yaxpeax_x86::amd64::InstDecoder::default()
Expand All @@ -244,6 +258,7 @@ impl InstructionDecoding for yaxpeax_x86::amd64::Arch {

impl InstructionDecoding for yaxpeax_x86::protected_mode::Arch {
const ARCH_NAME: &'static str = "i686";
const SYNTAX: &'static [&'static str] = &["Intel"];

fn make_decoder() -> Self::Decoder {
yaxpeax_x86::protected_mode::InstDecoder::default()
Expand All @@ -256,6 +271,7 @@ impl InstructionDecoding for yaxpeax_x86::protected_mode::Arch {

impl InstructionDecoding for yaxpeax_arm::armv8::a64::ARMv8 {
const ARCH_NAME: &'static str = "aarch64";
const SYNTAX: &'static [&'static str] = &["ARM"];

fn make_decoder() -> Self::Decoder {
yaxpeax_arm::armv8::a64::InstDecoder::default()
Expand All @@ -268,6 +284,7 @@ impl InstructionDecoding for yaxpeax_arm::armv8::a64::ARMv8 {

impl InstructionDecoding for yaxpeax_arm::armv7::ARMv7 {
const ARCH_NAME: &'static str = "arm";
const SYNTAX: &'static [&'static str] = &["ARM"];

fn make_decoder() -> Self::Decoder {
// TODO: Detect whether the instructions in the requested address range
Expand All @@ -291,8 +308,10 @@ impl InstructionDecoding for yaxpeax_arm::armv7::ARMv7 {

fn decode<'a, A: InstructionDecoding>(
mut reader: U8Reader<'a>,
image_base: u64,
start_svma: u64,
decode_len: u32,
) -> (Vec<DecodedInstruction>, u32, String)
) -> Response
where
u64: From<A::Address>,
U8Reader<'a>: yaxpeax_arch::Reader<A::Address, A::Word>,
Expand Down Expand Up @@ -331,5 +350,11 @@ where
&mut reader,
)) as u32;

(instructions, final_offset, A::ARCH_NAME.to_string())
Response {
start_address: (start_svma - image_base) as u32,
size: final_offset,
arch: A::ARCH_NAME.to_string(),
syntax: A::SYNTAX.iter().map(ToString::to_string).collect(),
instructions,
}
}
11 changes: 11 additions & 0 deletions samply-api/src/asm/response_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ pub struct Response {
/// The CPU architecture targeted by this binary, e.g. "i686", "x86_64", "arm", "aarch64"
pub arch: String,

/// A single-element Vec with the disassembly syntax used in the `instructions`,
/// e.g. `["Intel"]` for x86.
///
/// This is a Vec because I'd like to use `["Intel", "C Style"]` in the future,
/// with each instruction being `[<offset>, <intel-diassembly>, <c-style-disassembly>]`.
pub syntax: Vec<String>,

/// The disassembled instructions.
pub instructions: Vec<DecodedInstruction>,
}
Expand All @@ -41,6 +48,7 @@ mod test {
start_address: 0x1234,
size: 0x3,
arch: "x86_64".to_string(),
syntax: vec!["Intel".to_string()],
instructions: vec![
DecodedInstruction {
offset: 0,
Expand All @@ -57,6 +65,9 @@ mod test {
"startAddress": "0x1234",
"size": "0x3",
"arch": "x86_64",
"syntax": [
"Intel"
],
"instructions": [
[
0,
Expand Down

0 comments on commit 239f747

Please sign in to comment.