Load x87 state properly (#193)

The way `@fptw` was handled has been wrong for a long time but I finally figured out. This updates all the backends to do the right thing. Other more minor changes: - The code that reads the CPU state has been updated to be compatible with [snapshot](https://github.com/0vercl0k/snapshot). It also should be now able to load precise values for 80-bit registers. Loading 'old' dumps should also work as they were before (with the buggy code) to not break them - Updated bxcpu to be able to load `@fpst` registers with a `frac` / `exponent` (cf yrp604/bochscpu@cab8051) - Minor updates to the README - Update the CI to get rid of warnings as well as removing the CodeQL scanning (it hasn't reported a single hit since it's been turned on -_-)
0vercl0k · Feb 11, 2024 · 393bea0 · 393bea0
1 parent ad37f7c
commit 393bea0
Show file tree

Hide file tree

Showing 13 changed files with 189 additions and 122 deletions.
diff --git a/.github/workflows/wtf.yml b/.github/workflows/wtf.yml
@@ -18,20 +18,14 @@ jobs:
     name: Windows latest / ${{ matrix.generator }}
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
 
     - name: Add msbuild to PATH
-      uses: microsoft/setup-msbuild@v1
+      uses: microsoft/setup-msbuild@v2
 
     - name: Setup vs prompt
       uses: ilammy/msvc-dev-cmd@v1
 
-    - name: Initialize CodeQL
-      if: matrix.generator == 'msvc'
-      uses: github/codeql-action/init@v2
-      with:
-        languages: cpp
-
     - name: Build with Ninja/cl
       if: matrix.generator == 'ninja'
       run: |
@@ -44,10 +38,6 @@ jobs:
         cd src\build
         .\build-release-msvc.bat
 
-    - name: Perform CodeQL Analysis
-      if: matrix.generator == 'msvc'
-      uses: github/codeql-action/analyze@v2
-
     - name: Copy dbghelp/symsrv
       if: matrix.generator == 'ninja'
       run: |
@@ -58,7 +48,7 @@ jobs:
 
     - name: Upload artifacts
       if: matrix.generator == 'ninja'
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         name: bin-win64.RelWithDebInfo
         path: |
@@ -79,12 +69,7 @@ jobs:
     name: Ubuntu latest / ${{ matrix.compiler }}
     steps:
     - name: Checkout
-      uses: actions/checkout@v3
-
-    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
-      with:
-        languages: cpp
+      uses: actions/checkout@v4
 
     - name: Installing dependencies
       run: |
@@ -105,18 +90,15 @@ jobs:
     - name: Build with clang
       if: matrix.compiler == 'clang'
       env:
-        CC: clang-17
-        CXX: clang++-17
+        CC: clang-18
+        CXX: clang++-18
       run: |
         cd src/build
         chmod u+x ./build-release.sh
         ./build-release.sh
 
-    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
-
     - name: Upload artifacts
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       with:
         name: bin-lin64-${{ matrix.compiler }}.Release
         path: |

diff --git a/README.md b/README.md
@@ -1,15 +1,20 @@
-# what the fuzz
-
-![Builds](https://github.com/0vercl0k/wtf/workflows/Builds/badge.svg)
+<div align='center'>
+  <h1><code>what the fuzz</code></h1>
+  <p>
+    <strong>A distributed, code-coverage guided, cross-platform snapshot-based fuzzer designed for attacking user and or kernel-mode targets running on Microsoft Windows.</strong>
+  </p>
+  <p>
+    <img src='https://github.com/0vercl0k/wtf/workflows/Builds/badge.svg'/>
+  </p>
+  <p>
+    <img src='pics/wtf.gif'/>
+  </p>
+</div>
 
 ## Overview
 
 **what the fuzz** or **wtf** is a distributed, code-coverage guided, customizable, cross-platform snapshot-based fuzzer designed for attacking user and or kernel-mode targets running on Microsoft Windows. Execution of the target can be done inside an emulator with [bochscpu](https://github.com/yrp604/bochscpu) (slowest, most precise), inside a Windows VM with the [Windows Hypervisor Platform APIs](https://docs.microsoft.com/en-us/virtualization/api/hypervisor-platform/hypervisor-platform) or inside a Linux VM with the [KVM APIs](https://www.kernel.org/doc/html/latest/virt/kvm/api.html) (fastest).
 
-<p align='center'>
-<img src='pics/wtf.gif'>
-</p>
-
 It uncovered memory corruption vulnerabilities in a wide range of softwares: [IDA Pro](https://github.com/0vercl0k/fuzzing-ida75), a popular [AAA game](https://blog.ret2.io/2021/07/21/wtf-snapshot-fuzzing/), the [Windows kernel](https://microsoft.fandom.com/wiki/Architecture_of_Windows_NT), the [Microsoft RDP client](https://www.hexacon.fr/slides/Hexacon2022-Fuzzing_RDPEGFX_with_wtf.pdf), [NVIDIA GPU Display driver](https://nvidia.custhelp.com/app/answers/detail/a_id/5383), etc.
 
 Compiled binaries are available from either the [CI artifacts](https://github.com/0vercl0k/wtf/actions/workflows/wtf.yml) or from the [Releases](https://github.com/0vercl0k/wtf/releases) section for both Windows & Linux.
@@ -188,50 +193,34 @@ The usual workflow to harness a target is as follows:
     00007ff6`f5bb111e ff15dc1e0100    call    qword ptr [hevd_client!_imp_DeviceIoControl (00007ff6`f5bc3000)] ds:002b:00007ff6`f5bc3000={KERNEL32!DeviceIoControlImplementation (00007ff8`3e2e6360)}
     ```
 
-1. Use [bdump.js](https://github.com/yrp604/bdump) to generate the kernel crash-dump as well as the `regs.json` file that contains the CPU state. I recommend to dump those file in a `state` directory under your `target` directory (`targets/hevd/state` for example):
+1. Use [snapshot](https://github.com/0vercl0k/snapshot) to generate the kernel crash-dump as well as the `regs.json` file that contains the CPU state. I recommend to dump those file in a `state` directory under your `target` directory (`targets/hevd/state` for example):
 
     ```
-    kd> .scriptload c:\\work\\codes\\bdump\\bdump.js
-    [bdump] Usage: !bdump "C:\\path\\to\\dump"
-    [bdump] Usage: !bdump_full "C:\\path\\to\\dump"
-    [bdump] Usage: !bdump_active_kernel "C:\\path\\to\\dump"
-    [bdump] This will create a dump directory and fill it with a memory and register files
-    [bdump] NOTE: you must include the quotes and escape the backslashes!
-    JavaScript script successfully loaded from 'c:\work\codes\bdump\bdump.js'
-
-    kd> !bdump_active_kernel "c:\\work\\codes\\wtf\\targets\\hevd\\state"
-    [bdump] creating dir...
-    [bdump] saving regs...
-    [bdump] register fixups...
-    [bdump] don't know how to get mxcsr_mask or fpop, setting to zero...
-    [bdump]
-    [bdump] don't know how to get avx registers, skipping...
-    [bdump]
-    [bdump] tr.base is not cannonical...
-    [bdump] old tr.base: 0x7375c000
-    [bdump] new tr.base: 0xfffff8047375c000
-    [bdump]
-    [bdump] setting flag 0x2000 on cs.attr...
-    [bdump] old cs.attr: 0x2fb
-    [bdump] new cs.attr: 0x22fb
-    [bdump]
-    [bdump] rip and gs don't match kernel/user, swapping...
-    [bdump] rip: 0x7ff6f5bb111e
-    [bdump] new gs.base: 0xdfd9621000
-    [bdump] new kernel_gs_base: 0xfffff8046b6f3000
-    [bdump]
-    [bdump] non-zero IRQL in usermode, resetting to zero...
-    [bdump] saving mem, get a coffee or have a smoke, this will probably take around 10-15 minutes...
-    [bdump] Creating c:\work\codes\wtf\targets\hevd\state\mem.dmp - Active kernel and user memory bitmap dump
-    [bdump] Collecting pages to write to the dump. This may take a while.
-    [bdump] 0% written.
+    kd> .load c:\work\codes\snapshot\target\release\snapshot.dll
+
+    kd> !snapshot -h
+    [snapshot] Usage: snapshot [OPTIONS] [STATE_PATH]
+
+    Arguments:
+      [STATE_PATH]  The path to save the snapshot to
+
+    Options:
+      -k, --kind <KIND>  The kind of snapshot to take [default: full] [possible values: active-kernel, full]
+      -h, --help         Print help
+
+    kd> !snapshot c:\work\codes\wtf\targets\hevd\state
+    [snapshot] Dumping the CPU state into c:\work\codes\wtf\targets\hevd\state\regs.json..
+    [snapshot] Dumping the memory state into c:\work\codes\wtf\targets\hevd\state\mem.dmp..
+    Creating c:\\work\\codes\\wtf\\targets\\hevd\\state\\mem.dmp - Full memory range dump
+    0% written.
+    5% written. 1 min 50 sec remaining.
+    10% written. 1 min 17 sec remaining.
+    15% written. 1 min 30 sec remaining.
     [...]
-    [bdump] 95% written. 1 sec remaining.
-    [bdump] Wrote 1.5 GB in 23 sec.
-    [bdump] The average transfer rate was 64.7 MB/s.
-    [bdump] Dump successfully written
-    [bdump] done!
-    @$bdump_active_kernel("c:\\work\\codes\\wtf\\targets\\hevd\\state")
+    Wrote 4.0 GB in 1 min 32 sec.
+    The average transfer rate was 44.5 MB/s.
+    Dump successfully written
+    [snapshot] Done!
     ```
 
 1. Create a [fuzzer module](src/wtf/fuzzer_hevd.cc), write the code that [inserts a test-case](src/wtf/fuzzer_hevd.cc#L20) into your target and define [the](src/wtf/fuzzer_hevd.cc#L81) [various](src/wtf/fuzzer_hevd.cc#L104) [conditions](src/wtf/fuzzer_hevd.cc#L115) to [detect crashes](src/wtf/fuzzer_hevd.cc#L115) or [the end of a test-case](src/wtf/fuzzer_hevd.cc#L69).
@@ -249,7 +238,7 @@ You can also target [WoW64](https://docs.microsoft.com/en-us/windows/win32/winpr
 The context is partially valid. Only x86 user-mode context is available.
 Switched to Host mode
 
-32.kd> !bdump "c:\\dump"
+32.kd> !snapshot
 ```
 
 ## How to deliver multi-packets to my target?

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -85,6 +85,7 @@ if (WIN32)
         winhvplatform.lib
         delayimp.lib
         bcrypt.lib
+        ntdll.lib
     )
 
     add_executable(

diff --git a/src/libs/bochscpu-bins/include/bochscpu.hpp b/src/libs/bochscpu-bins/include/bochscpu.hpp
@@ -116,6 +116,11 @@ struct Zmm {
   uint64_t q[8];
 };
 
+struct Float80 {
+  uint64_t fraction;
+  uint16_t exp;
+};
+
 struct State {
   uint64_t bochscpu_seed;
   uint64_t rax;
@@ -163,7 +168,7 @@ struct State {
   uint16_t fpsw;
   uint16_t fptw;
   uint16_t fpop;
-  uint64_t fpst[8];
+  Float80 fpst[8];
   uint32_t mxcsr;
   uint32_t mxcsr_mask;
   uint64_t tsc;

diff --git a/src/libs/bochscpu-bins/lib/bochscpu_ffi.lib b/src/libs/bochscpu-bins/lib/bochscpu_ffi.lib
diff --git a/src/libs/bochscpu-bins/lib/libbochscpu_ffi.a b/src/libs/bochscpu-bins/lib/libbochscpu_ffi.a
diff --git a/src/wtf/backend.cc b/src/wtf/backend.cc
@@ -191,7 +191,21 @@ uint64_t Backend_t::GetArg(const uint64_t Idx) {
   }
 }
 
-uint64_t Backend_t::GetArg8(const uint64_t Idx) { return GetArg(Idx); }
+uint64_t Backend_t::GetArg8(const uint64_t Idx) {
+  switch (Idx) {
+  case 0:
+    return Rcx();
+  case 1:
+    return Rdx();
+  case 2:
+    return R8();
+  case 3:
+    return R9();
+  default: {
+    return VirtRead8(GetArgAddress(Idx));
+  }
+  }
+}
 
 uint32_t Backend_t::GetArg4(const uint64_t Idx) {
   return uint32_t(GetArg8(Idx));

diff --git a/src/wtf/bochscpu_backend.cc b/src/wtf/bochscpu_backend.cc
@@ -1062,7 +1062,7 @@ void BochscpuBackend_t::LoadState(const CpuState_t &State) {
   Bochs.tsc_aux = State.TscAux;
   Bochs.fpcw = State.Fpcw;
   Bochs.fpsw = State.Fpsw;
-  Bochs.fptw = State.Fptw;
+  Bochs.fptw = State.Fptw.Value;
   Bochs.cr0 = uint32_t(State.Cr0.Flags);
   Bochs.cr2 = State.Cr2;
   Bochs.cr3 = State.Cr3;

diff --git a/src/wtf/globals.h b/src/wtf/globals.h
@@ -1,5 +1,6 @@
 // Axel '0vercl0k' Souchet - March 29 2020
 #pragma once
+#include <bochscpu.hpp>
 #include <cstring>
 #include <filesystem>
 #include <fmt/format.h>
@@ -1017,6 +1018,63 @@ union Rflags_t {
   uint64_t Flags;
 };
 
+struct Fptw_t {
+  //
+  // The FXSAVE instruction saves an abridged version of the x87 FPU tag word
+  // in
+  // the FTW field (unlike the FSAVE instruction, which saves the complete tag
+  // word). The tag information is saved in physical register order (R0
+  // through R7), rather than in top-of-stack (TOS) order. With the FXSAVE
+  // instruction, however, only a single bit (1 for valid or 0 for empty) is
+  // saved for each tag. For example, assume that the tag word is currently
+  // set as follows:
+  //
+  // R7 R6 R5 R4 R3 R2 R1 R0
+  // 11 xx xx xx 11 11 11 11
+  //
+  // Here, 11B indicates empty stack elements and "xx" indicates valid (00B),
+  // zero (01B), or special (10B). For this example, the FXSAVE instruction
+  // saves only the following 8 bits of information:
+  //
+  // R7 R6 R5 R4 R3 R2 R1 R0
+  // 0  1   1  1 0  0   0  0
+  //
+
+  uint16_t Value = 0;
+
+  Fptw_t() = default;
+  Fptw_t(const uint16_t Value) : Value(Value) {}
+
+  static Fptw_t FromAbridged(const uint8_t Abridged) {
+    uint16_t Fptw = 0;
+    for (size_t BitIdx = 0; BitIdx < 8; BitIdx++) {
+      const uint16_t Bits = (Abridged >> BitIdx) & 0b1;
+      if (Bits == 1) {
+        Fptw |= 0b00 << (BitIdx * 2);
+      } else {
+        Fptw |= 0b11 << (BitIdx * 2);
+      }
+    }
+
+    return Fptw_t(Fptw);
+  }
+
+  uint8_t Abridged() const {
+    uint8_t Abridged = 0;
+    for (size_t Idx = 0; Idx < 8; Idx++) {
+      const uint16_t Bits = (Value >> (Idx * 2)) & 0b11;
+      if (Bits == 0b11) {
+        Abridged |= 0b0 << Idx;
+      } else {
+        Abridged |= 0b1 << Idx;
+      }
+    }
+    return Abridged;
+  }
+
+  bool operator==(const Fptw_t &Other) const { return Value == Other.Value; }
+};
+
 struct CpuState_t {
   uint64_t Seed;
   uint64_t Rax;
@@ -1062,9 +1120,9 @@ struct CpuState_t {
   Zmm_t Zmm[32];
   uint16_t Fpcw;
   uint16_t Fpsw;
-  uint16_t Fptw;
+  Fptw_t Fptw;
   uint16_t Fpop;
-  uint64_t Fpst[8];
+  Float80 Fpst[8];
   uint32_t Mxcsr;
   uint32_t MxcsrMask;
   uint64_t Tsc;
@@ -1136,7 +1194,8 @@ struct CpuState_t {
     Equal = Equal && Fpop == B.Fpop;
 
     for (size_t Idx = 0; Idx < 8; Idx++) {
-      Equal = Equal && Fpst[Idx] == B.Fpst[Idx];
+      Equal =
+          Equal && (memcmp(&Fpst[Idx], &B.Fpst[Idx], sizeof(Fpst[Idx])) == 0);
     }
 
     Equal = Equal && Mxcsr == B.Mxcsr;

diff --git a/src/wtf/kvm_backend.cc b/src/wtf/kvm_backend.cc
@@ -952,22 +952,23 @@ bool KvmBackend_t::LoadFpu(const CpuState_t &CpuState) {
   // Set the FPU registers.
   //
 
-  struct kvm_fpu Fregs;
+  struct kvm_fpu Fregs = {};
   if (ioctl(Vp_, KVM_GET_FPU, &Fregs) < 0) {
     perror("KVM_GET_FPU failed");
     return false;
   }
 
   for (uint64_t Idx = 0; Idx < 8; Idx++) {
-    memcpy(&Fregs.fpr[Idx], &CpuState.Fpst[Idx], 16);
+    Fregs.fpr[Idx][0] = CpuState.Fpst[Idx].fraction;
+    Fregs.fpr[Idx][1] = CpuState.Fpst[Idx].exp;
   }
 
   Fregs.fcw = CpuState.Fpcw;
   Fregs.fsw = CpuState.Fpsw;
-  // Fregs.ftwx = ??
+  Fregs.ftwx = CpuState.Fptw.Abridged();
   Fregs.last_opcode = CpuState.Fpop;
-  // Fregs.last_ip = ??
-  // Fregs.last_dp = ??
+  Fregs.last_ip = 0;
+  Fregs.last_dp = 0;
   Fregs.mxcsr = CpuState.Mxcsr;
   for (uint64_t Idx = 0; Idx < 16; Idx++) {
     memcpy(Fregs.xmm[Idx], &CpuState.Zmm[Idx].Q[0], 16);