Skip to content

Commit

Permalink
Add C-style disassembly for x86_64 in asm JSON.
Browse files Browse the repository at this point in the history
  • Loading branch information
mstange committed Jan 31, 2023
1 parent bf8bd84 commit 2a6ff57
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 24 deletions.
4 changes: 4 additions & 0 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ Example response JSON:
{
"startAddress": "0x5844",
"size": "0x1c",
"arch": "aarch64",
"syntax": "ARM",
"instructions": [
[0, "hint #0x1b"],
[4, "stp x29, x30, [sp, #-0x10]!"],
Expand All @@ -213,6 +215,8 @@ Example response JSON:

This finds the requested binary, reads the machine code bytes for the requested range, and disassembles them based on the binary's target architecture. The per-instruction offset is relative to the given `startAddress`.

The data for each instruction consists of `[offset, ...oneStringPerSyntax]`.

## Special paths

The `/symbolicate/v5` API returns file paths in the `file` property of its response JSON. Such a file path can either be a regular path string (e.g. `/Users/mstange/code/mozilla/widget/cocoa/nsAppShell.mm`), or it can also a "special path", e.g. `hg:hg.mozilla.org/mozilla-central:mozglue/baseprofiler/core/ProfilerBacktrace.cpp:1706d4d54ec68fae1280305b70a02cb24c16ff68`.
Expand Down
2 changes: 1 addition & 1 deletion fixtures/snapshots/asm_x86_64.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"startAddress":"0x17a20","size":"0x3d","arch":"x86_64","syntax":["Intel"],"instructions":[[0,"jl $-0x64"],[2,"add eax, dword [rax]"],[4,"mov edx, 0x38"],[9,"mov rcx, r15"],[12,"call 0x39c6f"],[17,"xor eax, eax"],[19,"jmp $+0x9a"],[24,"mov rbx, qword [r15]"],[27,"cmp rbx, rax"],[30,"jz $+0x35"],[32,"mov rdx, qword [r15 + 0x8]"],[36,"mov qword [rdx], rbx"],[39,"mov rdx, qword [rbx + 0x8]"],[43,"mov qword [rdx], rax"],[46,"mov rdx, qword [rax + 0x8]"],[50,"mov qword [rdx], r15"],[53,"mov rdx, qword [rax + 0x8]"],[57,"mov rbp, qword [rbx + 0x8]"]]}
{"startAddress":"0x17a20","size":"0x3d","arch":"x86_64","syntax":["Intel","C style"],"instructions":[[0,"jl $-0x64","if /* signed */ less(rflags) then jmp $-0x64"],[2,"add eax, dword [rax]","eax += [rax]"],[4,"mov edx, 0x38","edx = 0x38"],[9,"mov rcx, r15","rcx = r15"],[12,"call 0x39c6f","0x39c6f = call(0x39c6f)"],[17,"xor eax, eax","eax ^= eax"],[19,"jmp $+0x9a","jmp $+0x9a"],[24,"mov rbx, qword [r15]","rbx = [r15]"],[27,"cmp rbx, rax","rflags = flags(rbx - rax)"],[30,"jz $+0x35","if zero(rflags) then jmp $+0x35"],[32,"mov rdx, qword [r15 + 0x8]","rdx = [r15 + 0x8]"],[36,"mov qword [rdx], rbx","[rdx] = rbx"],[39,"mov rdx, qword [rbx + 0x8]","rdx = [rbx + 0x8]"],[43,"mov qword [rdx], rax","[rdx] = rax"],[46,"mov rdx, qword [rax + 0x8]","rdx = [rax + 0x8]"],[50,"mov qword [rdx], r15","[rdx] = r15"],[53,"mov rdx, qword [rax + 0x8]","rdx = [rax + 0x8]"],[57,"mov rbp, qword [rbx + 0x8]","rbp = [rbx + 0x8]"]]}
48 changes: 32 additions & 16 deletions samply-api/src/asm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,19 +240,27 @@ trait InstructionDecoding: Arch {
const ARCH_NAME: &'static str;
const SYNTAX: &'static [&'static str];
fn make_decoder() -> Self::Decoder;
fn stringify_inst(inst: Self::Instruction) -> String;
fn stringify_inst(offset: u32, inst: Self::Instruction) -> DecodedInstruction;
}

impl InstructionDecoding for yaxpeax_x86::amd64::Arch {
const ARCH_NAME: &'static str = "x86_64";
const SYNTAX: &'static [&'static str] = &["Intel"];
const SYNTAX: &'static [&'static str] = &["Intel", "C style"];

fn make_decoder() -> Self::Decoder {
yaxpeax_x86::amd64::InstDecoder::default()
}

fn stringify_inst(inst: Self::Instruction) -> String {
inst.to_string()
fn stringify_inst(offset: u32, inst: Self::Instruction) -> DecodedInstruction {
DecodedInstruction {
offset,
decoded_string_per_syntax: vec![
inst.display_with(yaxpeax_x86::amd64::DisplayStyle::Intel)
.to_string(),
inst.display_with(yaxpeax_x86::amd64::DisplayStyle::C)
.to_string(),
],
}
}
}

Expand All @@ -264,8 +272,11 @@ impl InstructionDecoding for yaxpeax_x86::protected_mode::Arch {
yaxpeax_x86::protected_mode::InstDecoder::default()
}

fn stringify_inst(inst: Self::Instruction) -> String {
inst.to_string()
fn stringify_inst(offset: u32, inst: Self::Instruction) -> DecodedInstruction {
DecodedInstruction {
offset,
decoded_string_per_syntax: vec![inst.to_string()],
}
}
}

Expand All @@ -277,8 +288,11 @@ impl InstructionDecoding for yaxpeax_arm::armv8::a64::ARMv8 {
yaxpeax_arm::armv8::a64::InstDecoder::default()
}

fn stringify_inst(inst: Self::Instruction) -> String {
inst.to_string()
fn stringify_inst(offset: u32, inst: Self::Instruction) -> DecodedInstruction {
DecodedInstruction {
offset,
decoded_string_per_syntax: vec![inst.to_string()],
}
}
}

Expand All @@ -301,8 +315,11 @@ impl InstructionDecoding for yaxpeax_arm::armv7::ARMv7 {
yaxpeax_arm::armv7::InstDecoder::default_thumb()
}

fn stringify_inst(inst: Self::Instruction) -> String {
inst.to_string()
fn stringify_inst(offset: u32, inst: Self::Instruction) -> DecodedInstruction {
DecodedInstruction {
offset,
decoded_string_per_syntax: vec![inst.to_string()],
}
}
}

Expand All @@ -328,18 +345,17 @@ where
}
match decoder.decode(&mut reader) {
Ok(inst) => {
let decoded_string = A::stringify_inst(inst);
instructions.push(DecodedInstruction {
offset,
decoded_string,
});
instructions.push(A::stringify_inst(offset, inst));
}
Err(e) => {
if !e.data_exhausted() {
// If decoding encountered an error, append a fake "!!! ERROR" instruction
instructions.push(DecodedInstruction {
offset,
decoded_string: format!("!!! ERROR: {}", e),
decoded_string_per_syntax: A::SYNTAX
.iter()
.map(|_| format!("!!! ERROR: {}", e))
.collect(),
});
}
break;
Expand Down
38 changes: 31 additions & 7 deletions samply-api/src/asm/response_json.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use serde::Serialize;
use serde_tuple::*;
use serde::{ser::SerializeSeq, Serialize};

#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
Expand Down Expand Up @@ -28,13 +27,38 @@ pub struct Response {
pub instructions: Vec<DecodedInstruction>,
}

#[derive(Serialize_tuple, Debug)]
#[derive(Debug)]
pub struct DecodedInstruction {
/// Byte offset from start_address.
pub offset: u32,

/// The decoded instruction as a string.
pub decoded_string: String,
/// The decoded instruction as a string, one for each syntax (e.g. Intel and then C-Style).
pub decoded_string_per_syntax: Vec<String>,
}

impl Serialize for DecodedInstruction {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
// Output as a single array of the form `[8, "decoded instruction"]`.
// If syntax is `["Intel", "C style"]`, serialize as `[8, "decoded using Intel syntax", "decoded using C style syntax"]`.
let mut seq = serializer.serialize_seq(None)?;

// First element is the offset to the start address.
seq.serialize_element(&self.offset)?;

// Flatten all string into the outer array.
for decoded_string in &self.decoded_string_per_syntax {
seq.serialize_element(decoded_string)?;
}

// In the future we may append more elements here, for extra per-instruction information.
// For example `{ "jumpTarget": "0x1390" }`.
// Or even `{ "jumpTarget": "0x2468", "destSymbol": { "name": "MyFunction()", "address": "0x2468", "size": "0x38" } }`

seq.end()
}
}

#[cfg(test)]
Expand All @@ -52,11 +76,11 @@ mod test {
instructions: vec![
DecodedInstruction {
offset: 0,
decoded_string: "push rbp".to_string(),
decoded_string_per_syntax: vec!["push rbp".to_string()],
},
DecodedInstruction {
offset: 1,
decoded_string: "mov rbp, rsp".to_string(),
decoded_string_per_syntax: vec!["mov rbp, rsp".to_string()],
},
],
};
Expand Down

0 comments on commit 2a6ff57

Please sign in to comment.