From 0e2167df45802e37005f0d925742242721e5eb12 Mon Sep 17 00:00:00 2001 From: Langston Barrett Date: Tue, 8 Oct 2024 18:29:26 -0400 Subject: [PATCH] macaw-dump: A CLI for printing internal data structures A simple library and tiny wrappers for different architectures, intended for debugging purposes. The only current capability is to run code discovery on a set of symbols in a binary (or all of them if none are specified), and pretty-print the resulting Macaw or Crucible CFGs. --- cabal.project.dist | 1 + .../macaw-aarch32-symbolic.cabal | 15 +++ macaw-aarch32-symbolic/tools/Dump.hs | 37 ++++++ macaw-dump/macaw-dump.cabal | 107 +++++++++++++++ macaw-dump/src/Data/Macaw/Dump.hs | 125 ++++++++++++++++++ 5 files changed, 285 insertions(+) create mode 100644 macaw-aarch32-symbolic/tools/Dump.hs create mode 100644 macaw-dump/macaw-dump.cabal create mode 100644 macaw-dump/src/Data/Macaw/Dump.hs diff --git a/cabal.project.dist b/cabal.project.dist index 7dd5055c..2ca35e4e 100644 --- a/cabal.project.dist +++ b/cabal.project.dist @@ -1,6 +1,7 @@ packages: base/ macaw-aarch32/ macaw-aarch32-symbolic/ + macaw-dump/ macaw-semmc/ macaw-ppc/ macaw-ppc-symbolic/ diff --git a/macaw-aarch32-symbolic/macaw-aarch32-symbolic.cabal b/macaw-aarch32-symbolic/macaw-aarch32-symbolic.cabal index f6ef52f0..6e0d7b7a 100644 --- a/macaw-aarch32-symbolic/macaw-aarch32-symbolic.cabal +++ b/macaw-aarch32-symbolic/macaw-aarch32-symbolic.cabal @@ -46,6 +46,21 @@ library default-language: Haskell2010 ghc-options: -Wall -Wcompat +executable macaw-aarch32-dump + hs-source-dirs: tools + main-is: Dump.hs + default-language: Haskell2010 + ghc-options: -Wall -Wcompat + build-depends: + base, + bytestring, + containers, + elf-edit, + macaw-aarch32, + macaw-aarch32-symbolic, + macaw-dump, + macaw-symbolic + test-suite macaw-aarch32-symbolic-tests type: exitcode-stdio-1.0 default-language: Haskell2010 diff --git a/macaw-aarch32-symbolic/tools/Dump.hs b/macaw-aarch32-symbolic/tools/Dump.hs new file mode 100644 index 00000000..a298cf1a --- /dev/null +++ b/macaw-aarch32-symbolic/tools/Dump.hs @@ -0,0 +1,37 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE TypeApplications #-} + +module Main (main) where + +import Data.ByteString qualified as BS +import Data.ElfEdit qualified as EE +import Data.Macaw.AArch32.Symbolic () +import Data.Macaw.ARM qualified as MA +import Data.Macaw.Dump qualified as Dump +import Data.Macaw.Symbolic qualified as MS +import Data.Proxy (Proxy(..)) +import Data.Set qualified as Set + +go :: FilePath -> IO () +go exePath = do + bytes <- BS.readFile exePath + case EE.decodeElfHeaderInfo bytes of + Left (_, msg) -> error ("Error parsing ELF header from file '" ++ show exePath ++ "': " ++ msg) + Right (EE.SomeElf ehi) -> do + case EE.headerClass (EE.header ehi) of + EE.ELFCLASS32 -> do + discState <- Dump.runDiscovery ehi MA.arm_linux_info Set.empty + archVals <- + case MS.archVals (Proxy @MA.ARM) Nothing of + Just archVals -> pure archVals + Nothing -> error "impossible" + Dump.displayCfgs exePath discState archVals False + EE.ELFCLASS64 -> error "Only 32-bit ARM is supported" + +main :: IO () +main = do + args <- Env.getArgs + case args of + [exePath] -> go exePath + _ -> error "Bad arguments" diff --git a/macaw-dump/macaw-dump.cabal b/macaw-dump/macaw-dump.cabal new file mode 100644 index 00000000..dc9d469a --- /dev/null +++ b/macaw-dump/macaw-dump.cabal @@ -0,0 +1,107 @@ +Cabal-version: 2.2 +Name: macaw-dump +Version: 0.1 +Author: Galois Inc. +Maintainer: langston@galois.com +Build-type: Simple +License: BSD-3-Clause +License-file: LICENSE +Category: Language +Synopsis: A tool to display internal Macaw data structures + +extra-doc-files: README.md + +common shared + -- Specifying -Wall and -Werror can cause the project to fail to build on + -- newer versions of GHC simply due to new warnings being added to -Wall. To + -- prevent this from happening we manually list which warnings should be + -- considered errors. We also list some warnings that are not in -Wall, though + -- try to avoid "opinionated" warnings (though this judgement is clearly + -- subjective). + -- + -- Warnings are grouped by the GHC version that introduced them, and then + -- alphabetically. + -- + -- A list of warnings and the GHC version in which they were introduced is + -- available here: + -- https://ghc.gitlab.haskell.org/ghc/doc/users_guide/using-warnings.html + + -- Since GHC 8.10 or earlier: + ghc-options: + -Wall + -Werror=compat-unqualified-imports + -Werror=deferred-type-errors + -Werror=deprecated-flags + -Werror=deprecations + -Werror=deriving-defaults + -Werror=dodgy-foreign-imports + -Werror=duplicate-exports + -Werror=empty-enumerations + -Werror=identities + -Werror=inaccessible-code + -Werror=incomplete-patterns + -Werror=incomplete-record-updates + -Werror=incomplete-uni-patterns + -Werror=inline-rule-shadowing + -Werror=missed-extra-shared-lib + -Werror=missing-exported-signatures + -Werror=missing-fields + -Werror=missing-home-modules + -Werror=missing-methods + -Werror=overflowed-literals + -Werror=overlapping-patterns + -Werror=partial-fields + -Werror=partial-type-signatures + -Werror=simplifiable-class-constraints + -Werror=star-binder + -Werror=star-is-type + -Werror=tabs + -Werror=typed-holes + -Werror=unrecognised-pragmas + -Werror=unrecognised-warning-flags + -Werror=unsupported-calling-conventions + -Werror=unsupported-llvm-version + -Werror=unticked-promoted-constructors + -Werror=unused-imports + -Werror=warnings-deprecations + -Werror=wrong-do-bind + + if impl(ghc >= 9.2) + ghc-options: + -Werror=ambiguous-fields + -Werror=operator-whitespace + -Werror=operator-whitespace-ext-conflict + -Werror=redundant-bang-patterns + + if impl(ghc >= 9.4) + ghc-options: + -Werror=forall-identifier + -Werror=misplaced-pragmas + -Werror=redundant-strictness-flags + -Werror=type-equality-out-of-scope + -Werror=type-equality-requires-operators + + ghc-prof-options: -O2 -fprof-auto-top + default-language: Haskell2010 + +library + import: shared + + build-depends: + base >= 4.13, + bytestring, + containers, + crucible, + elf-edit, + lens, + macaw-base, + macaw-symbolic, + parameterized-utils, + prettyprinter, + text, + what4 + + hs-source-dirs: src + + exposed-modules: + Data.Macaw.Dump diff --git a/macaw-dump/src/Data/Macaw/Dump.hs b/macaw-dump/src/Data/Macaw/Dump.hs new file mode 100644 index 00000000..2031c549 --- /dev/null +++ b/macaw-dump/src/Data/Macaw/Dump.hs @@ -0,0 +1,125 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ImportQualifiedPost #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeOperators #-} + +module Data.Macaw.Dump + ( DumpException + , runDiscovery + , displayCfgs + ) where + +import Control.Exception qualified as X +import Control.Lens qualified as Lens +import Control.Monad qualified as Monad +import Data.ByteString qualified as BS +import Data.ElfEdit qualified as EE +import Data.Foldable qualified as F +import Data.Set qualified as Set +import Data.Macaw.Architecture.Info qualified as MAI +import Data.Macaw.CFG qualified as MC +import Data.Macaw.Discovery qualified as MD +import Data.Macaw.Memory.ElfLoader qualified as MEL +import Data.Macaw.Memory.LoadCommon qualified as MML +import Data.Macaw.Memory qualified as MM +import Data.Macaw.Symbolic qualified as MS +import Data.Map qualified as Map +import Data.Parameterized.Some (Some(Some)) +import Data.Text.Encoding.Error qualified as Text +import Data.Text.Encoding qualified as Text +import Data.Text qualified as Text +import Lang.Crucible.Analysis.Postdom qualified as CAP +import Lang.Crucible.CFG.Core qualified as CCC +import Lang.Crucible.CFG.Extension qualified as CCE +import Lang.Crucible.FunctionHandle qualified as CFH +import Prettyprinter qualified as PP +import System.IO qualified as IO +import What4.FunctionName qualified as WF +import What4.ProgramLoc qualified as WPL + +data DumpException = ELFResolutionError String + deriving Show + +instance X.Exception DumpException + + +-- | Convert machine addresses into What4 positions. +-- +-- When possible, we map to the structured 'WPL.BinaryPos' type. However, some +-- 'MM.MemSegmentOff' cannot be mapped to an absolute position (e.g., some +-- addresses from shared libraries are in non-trivial segments). In those cases, +-- we map to the unstructured 'WPL.Others' with a sufficiently descriptive string. +-- +-- TODO: import from `Testing` +posFn :: (MM.MemWidth w) => Text.Text -> MM.MemSegmentOff w -> WPL.Position +posFn binaryName segOff = + case MM.segoffAsAbsoluteAddr segOff of + Just mw -> WPL.BinaryPos binaryName (fromIntegral mw) + Nothing -> WPL.OtherPos (binaryName <> Text.pack ": " <> Text.pack (show segOff)) + +-- | Load an ELF file into a macaw 'MM.Memory' (and symbols) +-- +-- Prints warnings to stderr. +loadELF :: + MML.LoadOptions -> + EE.ElfHeaderInfo w -> + IO (MM.Memory w, [MEL.MemSymbol w]) +loadELF loadOpts ehi = do + case MEL.resolveElfContents loadOpts ehi of + Left err -> X.throwIO (ELFResolutionError err) + Right (warnings, mem, _mentry, nameAddrList) -> do + F.forM_ warnings $ \w -> do + IO.hPutStrLn IO.stderr w + return (mem, nameAddrList) + +-- | Run discovery on the provided symbols, or all if none are provided +runDiscovery :: + forall arch w. + ( MC.ArchAddrWidth arch ~ w + , MC.ArchConstraints arch + , MM.MemWidth w + ) => + EE.ElfHeaderInfo w -> + MAI.ArchitectureInfo arch -> + Set.Set BS.ByteString -> + IO (MD.DiscoveryState arch) +runDiscovery headerInfo archInfo symbols = do + (mem, nameAddrList) <- loadELF MML.defaultLoadOptions headerInfo + let addrSymMap = + Map.fromList + [ (MEL.memSymbolStart msym, name) + | msym <- nameAddrList + , let name = MEL.memSymbolName msym + , Set.null symbols || Set.member name symbols + ] + pure (MD.cfgFromAddrs archInfo mem addrSymMap (Map.keys addrSymMap) []) + +displayCfgs :: + ( MC.ArchConstraints arch + , MS.GenArchInfo MS.LLVMMemory arch + , CCE.IsSyntaxExtension (MS.MacawExt arch) + ) => + FilePath -> + MD.DiscoveryState arch -> + MS.GenArchVals mem arch -> + -- | Also print Crucible CFG? + Bool -> + IO () +displayCfgs path discState archVals printCrucible = do + let funInfos = discState Lens.^. MD.funInfo + halloc <- CFH.newHandleAllocator + F.for_ (Map.toList funInfos) $ \(_addr, Some info) -> do + IO.print (PP.pretty info) + Monad.when printCrucible $ do + let pos = posFn (Text.pack path) + let funName = + WF.functionNameFromText $ + Text.decodeUtf8With Text.lenientDecode $ + MD.discoveredFunName info + CCC.SomeCFG ssa <- + MS.mkFunCFG (MS.archFunctions archVals) halloc funName pos info + IO.print (CCC.ppCFG' True (CAP.postdomInfo ssa) ssa)