From b0cadefe8abac29cf5373b7b50bfc190dbce7377 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Mon, 14 Oct 2024 16:22:09 -0700 Subject: [PATCH 01/16] add "time" tag to display processing time for TraceTree elements --- src/Pate/TraceTree.hs | 65 ++++++++++++++++++++++++----------------- tools/pate-repl.cabal | 3 +- tools/pate-repl/Repl.hs | 34 +++++++++++++-------- tools/pate/Output.hs | 12 ++++++-- 4 files changed, 72 insertions(+), 42 deletions(-) diff --git a/src/Pate/TraceTree.hs b/src/Pate/TraceTree.hs index be38ab55..b097a9df 100644 --- a/src/Pate/TraceTree.hs +++ b/src/Pate/TraceTree.hs @@ -62,6 +62,7 @@ module Pate.TraceTree ( , noTraceTree , viewSomeTraceTree , NodeStatus(..) + , isFinished , isBlockedStatus , BlockedStatus(..) , NodeStatusLevel(..) @@ -122,6 +123,7 @@ module Pate.TraceTree ( , chooseInputFromList , chooseInput_ , runWhenFinishedOrBlocked + , traceTreeStartTime ) where import GHC.TypeLits ( Symbol, KnownSymbol ) @@ -163,6 +165,7 @@ import qualified System.IO as IO import Data.Maybe (catMaybes) import Control.Concurrent (threadDelay) import Control.Monad.State.Strict (StateT (..), MonadState (..)) +import Data.Time data TraceTag = Summary @@ -212,7 +215,10 @@ joinStatusLevels lvlHi lvlLo = case (lvlHi, lvlLo) of -- TODO: We could expose the error type here with some plumbing data NodeStatus = NodeStatus { nodeStatusLevel :: NodeStatusLevel, - isFinished :: Bool, blockStatus :: BlockedStatus } + finishedAt :: Maybe UTCTime, blockStatus :: BlockedStatus } + +isFinished :: NodeStatus -> Bool +isFinished st = isJust (finishedAt st) -- | A blocked node means that it (or a subnode) is waiting for input isBlockedStatus :: NodeStatus -> Bool @@ -230,10 +236,10 @@ instance IsString TraceTag where data IOList' a = IOList' { ioList :: [a], ioListStatus :: NodeStatus } -- The Mvar is used to signal when updates are made, for clients to -- block on updates (rather than busy-waiting) -data IOList a = IOList (IO.IORef (IOList' a)) (MVar ()) +data IOList a = IOList (IO.IORef (IOList' a)) (MVar ()) UTCTime evalIOList' :: IOList a -> IO (IOList' a) -evalIOList' (IOList ref _) = IO.readIORef ref +evalIOList' (IOList ref _ _) = IO.readIORef ref evalIOList :: IOList a -> IO [a] evalIOList l = ioList <$> evalIOList' l @@ -244,7 +250,7 @@ evalIOList l = ioList <$> evalIOList' l -- IOList each time it is executed. -- The result is either the computer 'b' or the final contents of the list withIOList :: forall a b. IOList a -> ([a] -> IO (Maybe b)) -> IO (Either [a] b) -withIOList (IOList ref mv) f = go +withIOList (IOList ref mv _) f = go where go :: IO (Either [a] b) go = do @@ -259,17 +265,18 @@ withIOList (IOList ref mv) f = go mkStaticIOList :: [a] -> IO (IOList a) mkStaticIOList xs = do - ref <- IO.newIORef (IOList' xs (NodeStatus StatusSuccess True mempty)) + now <- getCurrentTime + ref <- IO.newIORef (IOList' xs (NodeStatus StatusSuccess (Just now) mempty)) mv <- newMVar () - return $ IOList ref mv + return $ IOList ref mv now addIOList :: a -> IOList a -> IO () -addIOList a (IOList ref mv) = do +addIOList a (IOList ref mv _) = do IO.atomicModifyIORef' ref (\(IOList' as st) -> (IOList' (a : as) st,())) void $ tryPutMVar mv () modifyIOListStatus :: (NodeStatus -> NodeStatus) -> IOList a -> IO () -modifyIOListStatus f (IOList ref mv) = do +modifyIOListStatus f (IOList ref mv _) = do b <- IO.atomicModifyIORef' ref (\(IOList' as st) -> (IOList' as (f st),isFinished st && (isFinished $ f st))) unless b $ void $ tryPutMVar mv () @@ -282,7 +289,7 @@ propagateStatus stNew stOld = True -> stOld False -> case joinStatusLevels (nodeStatusLevel stNew) (nodeStatusLevel stOld) of Just stLvlMerged -> stNew { nodeStatusLevel = stLvlMerged } - Nothing -> stOld { isFinished = isFinished stNew } + Nothing -> stOld { finishedAt = finishedAt stNew } in stNew' { blockStatus = (blockStatus stOld) <> (blockStatus stNew) } getIOListStatus :: IOList a -> IO NodeStatus @@ -290,13 +297,14 @@ getIOListStatus l = ioListStatus <$> evalIOList' l emptyIOList :: IO (IOList a) emptyIOList = do - r <- IO.liftIO $ IO.newIORef (IOList' [] (NodeStatus StatusSuccess False mempty)) + r <- IO.liftIO $ IO.newIORef (IOList' [] (NodeStatus StatusSuccess Nothing mempty)) mv <- newMVar () - return $ IOList r mv + now <- getCurrentTime + return $ IOList r mv now resetIOList :: IOList a -> IO () -resetIOList (IOList r mv) = do - IO.atomicWriteIORef r (IOList' [] (NodeStatus StatusSuccess False mempty)) +resetIOList (IOList r mv _) = do + IO.atomicWriteIORef r (IOList' [] (NodeStatus StatusSuccess Nothing mempty)) void $ tryPutMVar mv () return () @@ -435,7 +443,7 @@ data TreeBuilder k where } -> TreeBuilder k asBlockedStatus :: NodeStatus -> NodeStatus -asBlockedStatus st = NodeStatus StatusSuccess False (blockStatus st) +asBlockedStatus st = NodeStatus StatusSuccess Nothing (blockStatus st) addNodeDependency :: NodeBuilder k nm -> TreeBuilder k -> TreeBuilder k addNodeDependency nodeBuilder treeBuilder = @@ -499,6 +507,9 @@ data TraceTreeNode (k :: l) nm where -- all of the tracing context that was emitted at this level newtype TraceTree k = TraceTree (IOList (Some (TraceTreeNode k))) +traceTreeStartTime :: TraceTree k -> UTCTime +traceTreeStartTime (TraceTree (IOList _ _ t)) = t + isTraceNode :: TraceTreeNode k nm -> (IsTraceNode k nm => a) -> a isTraceNode TraceTreeNode{} a = a @@ -910,10 +921,12 @@ addStatusBlocker header = do (setBlock, setUnblock) <- case interactionMode builder of Interactive nextChoiceIdent -> do newChoiceIdent <- liftIO $ nextChoiceIdent - let status = NodeStatus StatusSuccess False (BlockedStatus (Set.singleton newChoiceIdent) Set.empty) + let status = NodeStatus StatusSuccess Nothing (BlockedStatus (Set.singleton newChoiceIdent) Set.empty) let setBlock = liftIO $ updateTreeStatus builder status - let statusFinal = NodeStatus StatusSuccess True (BlockedStatus Set.empty (Set.singleton newChoiceIdent)) - let setUnblock = liftIO $ updateTreeStatus builder statusFinal + let statusFinal = \t -> NodeStatus StatusSuccess (Just t) (BlockedStatus Set.empty (Set.singleton newChoiceIdent)) + let setUnblock = liftIO $ do + now <- getCurrentTime + updateTreeStatus builder (statusFinal now) return (setBlock, setUnblock) DefaultChoice -> return (return (), return ()) return $ @@ -1151,8 +1164,8 @@ chooseInput treenm parseInput = do case interactionMode builder of Interactive nextChoiceIdent -> do newChoiceIdent <- liftIO $ nextChoiceIdent - let status = NodeStatus StatusSuccess False (BlockedStatus (Set.singleton newChoiceIdent) Set.empty) - let statusFinal = NodeStatus StatusSuccess False (BlockedStatus Set.empty (Set.singleton newChoiceIdent)) + let status = NodeStatus StatusSuccess Nothing (BlockedStatus (Set.singleton newChoiceIdent) Set.empty) + let statusFinal = NodeStatus StatusSuccess Nothing (BlockedStatus Set.empty (Set.singleton newChoiceIdent)) c <- liftIO $ newEmptyMVar choice_lock <- liftIO $ newMVar False let getValue = withMVar choice_lock $ \case @@ -1390,8 +1403,8 @@ withSubTraces f = do let nodeBuilder = addTreeDependency treeBuilder nodeBuilder' IO.liftIO $ addNode treeBuilder node r <- catchError - (runNodeBuilderT f nodeBuilder >>= \r -> (IO.liftIO $ updateNodeStatus nodeBuilder (NodeStatus StatusSuccess True mempty)) >> return r) - (\e -> (IO.liftIO $ updateNodeStatus nodeBuilder (NodeStatus (StatusError e) True mempty)) >> throwError e) + (runNodeBuilderT f nodeBuilder >>= \r -> (IO.liftIO $ getCurrentTime >>= \t -> updateNodeStatus nodeBuilder (NodeStatus StatusSuccess (Just t) mempty)) >> return r) + (\e -> (IO.liftIO $ getCurrentTime >>= \t -> updateNodeStatus nodeBuilder (NodeStatus (StatusError e) (Just t) mempty)) >> throwError e) return r subTraceLabel' :: @@ -1408,8 +1421,8 @@ subTraceLabel' lbl v f = do IO.liftIO $ addNodeValue nodeBuilder lbl v subtree r <- catchError (liftTreeBuilder treeBuilder (f (\g -> runNodeBuilderT g nodeBuilder)) - >>= \r -> (IO.liftIO $ updateTreeStatus treeBuilder (NodeStatus StatusSuccess True mempty)) >> return r) - (\e -> (IO.liftIO $ updateTreeStatus treeBuilder (NodeStatus (StatusError e) True mempty)) >> throwError e) + >>= \r -> (IO.liftIO $ getCurrentTime >>= \t -> updateTreeStatus treeBuilder (NodeStatus StatusSuccess (Just t) mempty)) >> return r) + (\e -> (IO.liftIO $ getCurrentTime >>= \t -> updateTreeStatus treeBuilder (NodeStatus (StatusError e) (Just t) mempty)) >> throwError e) return r subTraceLabel :: @@ -1429,7 +1442,7 @@ emitTraceWarning :: m () emitTraceWarning e = do treeBuilder <- getTreeBuilder - IO.liftIO $ updateTreeStatus treeBuilder (NodeStatus (StatusWarning e) False mempty) + IO.liftIO $ updateTreeStatus treeBuilder (NodeStatus (StatusWarning e) Nothing mempty) -- | Tag the current sub-computation as having raised an error emitTraceError :: @@ -1439,12 +1452,12 @@ emitTraceError :: m () emitTraceError e = do treeBuilder <- getTreeBuilder - IO.liftIO $ updateTreeStatus treeBuilder (NodeStatus (StatusError e) False mempty) + IO.liftIO $ updateTreeStatus treeBuilder (NodeStatus (StatusError e) Nothing mempty) finalizeTree :: TreeBuilder k -> IO () -finalizeTree treeBuilder = updateTreeStatus treeBuilder (NodeStatus StatusSuccess True mempty) +finalizeTree treeBuilder = getCurrentTime >>= \t -> updateTreeStatus treeBuilder (NodeStatus StatusSuccess (Just t) mempty) traceAlternatives' :: IsTreeBuilder k e m => diff --git a/tools/pate-repl.cabal b/tools/pate-repl.cabal index 2e9ab1f2..59d8af1e 100644 --- a/tools/pate-repl.cabal +++ b/tools/pate-repl.cabal @@ -53,5 +53,6 @@ library macaw-aarch32, macaw-aarch32-symbolic, macaw-aarch32, - dismantle-arm-xml + dismantle-arm-xml, + time \ No newline at end of file diff --git a/tools/pate-repl/Repl.hs b/tools/pate-repl/Repl.hs index c38581da..5b115b1f 100644 --- a/tools/pate-repl/Repl.hs +++ b/tools/pate-repl/Repl.hs @@ -74,6 +74,9 @@ import Unsafe.Coerce(unsafeCoerce) import qualified Output as PO import qualified Control.Concurrent as MVar +import Data.Time +import Data.Fixed + maxSubEntries :: Int maxSubEntries = 5 @@ -409,12 +412,12 @@ ppSuffix nesting s nm = do ppStatusTag :: NodeStatus -> Maybe (PP.Doc a) ppStatusTag st = case st of _ | isBlockedStatus st -> Just "?" - NodeStatus StatusSuccess False _ -> Just "*" - NodeStatus (StatusWarning _) False _ -> Just "!*" - NodeStatus (StatusWarning _) True _ -> Just "!" - NodeStatus (StatusError _) False _ -> Just "*x" - NodeStatus (StatusError _) True _ -> Just "x" - NodeStatus StatusSuccess True _ -> Nothing + NodeStatus StatusSuccess Nothing _ -> Just "*" + NodeStatus (StatusWarning _) Nothing _ -> Just "!*" + NodeStatus (StatusWarning _) (Just{}) _ -> Just "!" + NodeStatus (StatusError _) Nothing _ -> Just "*x" + NodeStatus (StatusError _) (Just{}) _ -> Just "x" + NodeStatus StatusSuccess (Just{}) _ -> Nothing maybeSubNodes :: forall sym arch nm a. @@ -454,15 +457,22 @@ prettyNextNodes startAt onlyFinished = do mapM (\(nesting, Some (nd@(TraceNode lbl v subtree) :: TraceNode sym arch nm)) -> do b <- IO.liftIO $ getTreeStatus subtree json <- IO.liftIO $ jsonNode @_ @'(sym, arch) @nm sym lbl v + + let duration = case finishedAt b of + Just fin | elem "time" tags -> Just $ (PP.viaShow $ + let t = diffUTCTime fin (traceTreeStartTime subtree) + in (round (t * 1000))) <> "ms" + _ -> Nothing case prettyNodeAt @'(sym, arch) @nm tags lbl v of Just pp -> do suf <- ppSuffix nesting b (knownSymbol @nm) let indent = abs(nesting)*2 + return $ PO.OutputElem { PO.outIdx = 0 , PO.outIndent = indent , PO.outPP = pp - , PO.outFinished = isFinished b + , PO.outDuration = duration , PO.outSuffix = suf , PO.outMoreResults = nesting < 0 , PO.outJSON = json @@ -476,7 +486,7 @@ prettyNextNodes startAt onlyFinished = do { PO.outIdx = 0 , PO.outIndent = 0 , PO.outPP = "" - , PO.outFinished = isFinished b + , PO.outDuration = duration , PO.outSuffix = Nothing , PO.outMoreResults = nesting < 0 , PO.outJSON = json @@ -557,8 +567,8 @@ status' mlimit = do _ | isBlockedStatus st -> PO.printMsgLn $ "Waiting for input.." NodeStatus (StatusWarning e) _ _ -> PO.printMsgLn $ "Warning: \n" <> (PP.pretty (chopMsg mlimit (show e))) NodeStatus (StatusError e) _ _ -> PO.printMsgLn $ "Error: \n" <> (PP.pretty (chopMsg mlimit (show e))) - NodeStatus StatusSuccess False _ -> PO.printMsgLn $ "In progress.." - NodeStatus StatusSuccess True _ -> PO.printMsgLn $ "Finalized" + NodeStatus StatusSuccess Nothing _ -> PO.printMsgLn $ "In progress.." + NodeStatus StatusSuccess (Just{}) _ -> PO.printMsgLn $ "Finalized" prevNodes <- gets replPrev fin <- IO.liftIO $ IO.readIORef finalResult case (prevNodes, fin) of @@ -642,8 +652,8 @@ goto_status' f = do isErrStatus :: NodeStatus -> Bool isErrStatus = \case - NodeStatus StatusSuccess True _ -> False - NodeStatus _ False _ -> False + NodeStatus StatusSuccess (Just{}) _ -> False + NodeStatus _ Nothing _ -> False _ -> True goto_err :: IO () diff --git a/tools/pate/Output.hs b/tools/pate/Output.hs index c8e79595..08320f80 100644 --- a/tools/pate/Output.hs +++ b/tools/pate/Output.hs @@ -9,6 +9,7 @@ module Output , outputList , outputMsg , outputErr + , outFinished , printMsg , printErr , printMsgLn @@ -39,12 +40,11 @@ import qualified GHC.IO as IO hiding (liftIO) import qualified Data.ByteString.Lazy as BS import Data.Maybe - data OutputElem = OutputElem { outIdx :: Int , outIndent :: Int - , outFinished :: Bool + , outDuration :: Maybe (PP.Doc ()) , outPP :: PP.Doc () , outJSON :: JSON.Value , outSuffix :: Maybe (PP.Doc ()) @@ -52,6 +52,9 @@ data OutputElem = -- ^ more results at this nesting level that were omitted } +outFinished :: OutputElem -> Bool +outFinished e = isJust (outDuration e) + data Output_ = OutputElemList [OutputElem] | OutputInfo (PP.Doc ()) @@ -140,7 +143,10 @@ ppOutputElem nd = p' = case outMoreResults nd of True -> p <+> "more results..." False -> p - in PP.pretty (outIdx nd) <> ":" <+> (PP.indent (outIndent nd) p') + p'' = case outDuration nd of + Just dur -> p' <+> dur + _ -> p' + in PP.pretty (outIdx nd) <> ":" <+> (PP.indent (outIndent nd) p'') tagOutput :: Maybe (PP.Doc ()) -> Maybe (Text.Text) -> Output -> Output tagOutput msg tag o = Output (outputC o) msg tag From 42ca7c717009e0ae3fccef6aa2e397452f7bc5fb Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 18 Oct 2024 12:18:47 -0700 Subject: [PATCH 02/16] re-enable parsedFunction cache, conservatively flushing for each node this was disabled to avoid edge cases where the function cache would become invalidated as a result of injecting pate's abstract domain information into macaw this lack of caching can adversely affect discoverPairs in cases where many blocks need to be inspected (since macaw starts over each time) if we flush the cache for each node (by adding a flushCache to addOverrides), then we can avoid this duplicated work without worrying about invalid cache entries --- src/Pate/Discovery/ParsedFunctions.hs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Pate/Discovery/ParsedFunctions.hs b/src/Pate/Discovery/ParsedFunctions.hs index 76675aa6..6034c7ce 100644 --- a/src/Pate/Discovery/ParsedFunctions.hs +++ b/src/Pate/Discovery/ParsedFunctions.hs @@ -220,7 +220,9 @@ addOverrides :: addOverrides defaultInit pfm ovs = do let new_ovs = Map.merge Map.preserveMissing Map.preserveMissing (Map.zipWithMaybeMatched (\_ l r -> Just (mergeOverrides l r))) ovs (absStateOverrides pfm) let new_init = PB.MkInitialAbsState $ \mem segOff -> mergeOverrides (PB.mkInitAbs defaultInit mem segOff) (PB.mkInitAbs (defaultInitState pfm) mem segOff) - return $ pfm { absStateOverrides = new_ovs, defaultInitState = new_init } + let pfm' = pfm { absStateOverrides = new_ovs, defaultInitState = new_init } + flushCache pfm' + return pfm' addExtraEdges :: forall arch bin. @@ -231,7 +233,9 @@ addExtraEdges :: addExtraEdges pfm es = do mapM_ addTgt (Map.elems es) IORef.modifyIORef' (parsedStateRef pfm) $ \st' -> - st' { extraEdges = Map.merge Map.preserveMissing Map.preserveMissing (Map.zipWithMaybeMatched (\_ l r -> Just (l <> r))) es (extraEdges st')} + st' { extraEdges = Map.merge Map.preserveMissing Map.preserveMissing (Map.zipWithMaybeMatched (\_ l r -> Just (l <> r))) es (extraEdges st') + , parsedFunctionCache = Map.empty + } where addTgt :: ExtraJumpTarget arch -> IO () addTgt = \case @@ -531,7 +535,10 @@ parsedFunctionContaining blk pfm@(ParsedFunctionMap pfmRef mCFGDir _pd _ _ _ _ _ -- IORef that might be evaluated multiple times if there is a lot of -- contention. If that becomes a problem, we may want to change this -- to an MVar where we fully evaluate each result before updating it. - (_, Some dfi) <- atomicAnalysis faddr st + (st', Some dfi) <- atomicAnalysis faddr st + IORef.modifyIORef pfmRef $ \st_ -> + st_ { parsedFunctionCache = parsedFunctionCache st', discoveryState = discoveryState st'} + --IORef.writeIORef pfmRef pfm' saveCFG mCFGDir (PB.blockBinRepr blk) dfi return (Just (Some dfi)) From 39c3f0211c80678639051425654a1c72a27f4ab6 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 18 Oct 2024 15:34:11 -0700 Subject: [PATCH 03/16] add first-pass filter for concretizing memory writes --- src/Pate/Memory/MemTrace.hs | 245 +++++++++++++++--------- src/Pate/Verification/AbstractDomain.hs | 80 +++++++- src/Pate/Verification/Widening.hs | 19 +- 3 files changed, 240 insertions(+), 104 deletions(-) diff --git a/src/Pate/Memory/MemTrace.hs b/src/Pate/Memory/MemTrace.hs index 721dd5b6..3198cb72 100644 --- a/src/Pate/Memory/MemTrace.hs +++ b/src/Pate/Memory/MemTrace.hs @@ -38,10 +38,12 @@ module Pate.Memory.MemTrace , llvmPtrEq , readMemState , writeMemState -, MemFootprint(..) +, MemFootprint +, pattern MemFootprint , isDir , MemTraceK , traceFootprint +, traceFootprintOps , observableEvents , UndefPtrOpTag , UndefPtrOpTags @@ -1230,7 +1232,7 @@ ptrPredOp mkundef regconstraint f bak reg1 off1 reg2 off2 = do getConst <$> mkBinUndef mkundef sym (AssertedResult cond (Const result)) (LLVMPointer reg1 off1) (LLVMPointer reg2 off2) muxPtr :: - IsSymInterface sym => + IsExprBuilder sym => sym -> Pred sym -> LLVMPtr sym w -> @@ -1949,131 +1951,194 @@ oneSubEq w = unsafeCoerce (leqRefl w) -- | A wrapped value indicating that the given memory address has been modified -- by a given write sequence, with a given word size (in bytes) -data MemFootprint sym ptrW where - MemFootprint :: +data MemFootprintSized sym ptrW w where + MemFootprintSized :: 1 <= w => LLVMPtr sym ptrW -> NatRepr w -> MemOpDirection -> MemOpCondition sym -> Endianness -> - MemFootprint sym ptrW + MemFootprintSized sym ptrW w +type MemFootprint sym ptrW = Some (MemFootprintSized sym ptrW) -instance TestEquality (SymExpr sym) => Eq (MemFootprint sym ptrW) where - (MemFootprint (LLVMPointer reg1 off1) sz1 dir1 cond1 end1) == (MemFootprint (LLVMPointer reg2 off2) sz2 dir2 cond2 end2) - | reg1 == reg2 - , Just Refl <- testEquality off1 off2 - , Just Refl <- testEquality sz1 sz2 - = cond1 == cond2 && dir1 == dir2 && end1 == end2 - _ == _ = False +pattern MemFootprint :: forall sym ptrW. + () => (forall w. 1 <= w => + LLVMPtr sym ptrW -> + NatRepr w -> + MemOpDirection -> + MemOpCondition sym -> + Endianness -> + MemFootprint sym ptrW) +pattern MemFootprint ptr w dir cond end = (Some (MemFootprintSized ptr w dir cond end)) -instance OrdF (SymExpr sym) => Ord (MemFootprint sym ptrW) where - compare (MemFootprint (LLVMPointer reg1 off1) sz1 dir1 cond1 end1) (MemFootprint (LLVMPointer reg2 off2) sz2 dir2 cond2 end2) = - compare dir1 dir2 <> - (compare reg1 reg2) <> - (toOrdering $ compareF off1 off2) <> - (toOrdering $ compareF sz1 sz2) <> +{-# COMPLETE MemFootprint #-} + +instance TestEquality (SymExpr sym) => TestEquality (MemFootprintSized sym ptrW) where + testEquality (MemFootprintSized (LLVMPointer reg1 off1) sz1 dir1 cond1 end1) (MemFootprintSized (LLVMPointer reg2 off2) sz2 dir2 cond2 end2) + | Just Refl <- testEquality off1 off2 + , Just Refl <- testEquality sz1 sz2 + , reg1 == reg2 + , cond1 == cond2 + , dir1 == dir2 + , end1 == end2 + = Just Refl + testEquality _ _ = Nothing + +instance OrdF (SymExpr sym) => OrdF (MemFootprintSized sym ptrW) where + compareF (MemFootprintSized (LLVMPointer reg1 off1) sz1 dir1 cond1 end1) (MemFootprintSized (LLVMPointer reg2 off2) sz2 dir2 cond2 end2) = + lexCompareF off1 off2 $ + lexCompareF sz1 sz2 $ fromOrdering $ + compare reg1 reg2 <> compare cond1 cond2 <> + compare dir1 dir2 <> compare end1 end2 - isDir :: MemOpDirection -> MemFootprint sym ptrW -> Bool isDir dir (MemFootprint _ _ dir' _ _) = dir == dir' + memOpFootprint :: IsExprBuilder sym => sym -> MemOp sym ptrW -> - (MemFootprint sym ptrW, Pred sym) -memOpFootprint sym (MemOp ptr dir cond w _ end) = - (MemFootprint ptr w dir Unconditional end, getCond sym cond) + MemFootprint sym ptrW +memOpFootprint _sym (MemOp ptr dir cond w _val end) = MemFootprint ptr w dir cond end -unionFootprintMap :: +data MemFootprintVal sym w = MemFootprintVal (Pred sym) (LLVMPtr sym (8*w)) + +memOpFootprintVal :: IsExprBuilder sym => - OrdF (SymExpr sym) => sym -> - Map (MemFootprint sym ptrW) (Pred sym) -> - Map (MemFootprint sym ptrW) (Pred sym) -> - IO (Map (MemFootprint sym ptrW) (Pred sym)) -unionFootprintMap sym = - Map.mergeA - Map.preserveMissing - Map.preserveMissing - (Map.zipWithAMatched (\_k p1 p2 -> orPred sym p1 p2)) + MemOp sym ptrW -> + MapF.Pair (MemFootprintSized sym ptrW) (MemFootprintVal sym) +memOpFootprintVal sym (MemOp ptr dir cond w val end) = MapF.Pair + (MemFootprintSized ptr w dir Unconditional end) (MemFootprintVal (getCond sym cond) val) + +mergeFootprintVals :: + IsSymExprBuilder sym => + sym -> + MemFootprintVal sym w -> + MemFootprintVal sym w -> + IO (MemFootprintVal sym w) +mergeFootprintVals sym (MemFootprintVal p1 ptr1) (MemFootprintVal p2 ptr2) = do + p <- orPred sym p1 p2 + ptrs_eq <- llvmPtrEq sym ptr1 ptr2 + ptr <- case asConstantPred ptrs_eq of + Just True -> return ptr1 + _ -> do + p1_and_p2 <- andPred sym p1 p2 + fresh_p <- freshConstant sym emptySymbol BaseBoolRepr + ptr1_or_ptr2 <- muxPtr sym fresh_p ptr1 ptr2 + ptr <- muxPtr sym p1 ptr1 ptr2 + -- if both conditions are true then we nondeterministically select one of the + -- values + -- otherwise we pick based on which is true + muxPtr sym p1_and_p2 ptr1_or_ptr2 ptr + return $ MemFootprintVal p ptr + +muxFootprintVals :: + IsExprBuilder sym => + sym -> + Pred sym -> + MemFootprintVal sym w -> + MemFootprintVal sym w -> + IO (MemFootprintVal sym w) +muxFootprintVals sym p_mux (MemFootprintVal p1 ptr1) (MemFootprintVal p2 ptr2) = do + p <- itePred sym p_mux p1 p2 + ptr <- muxPtr sym p ptr1 ptr2 + return $ MemFootprintVal p ptr + +newtype MemFootprintMap sym ptrW = + MemFootprintMap (MapF.MapF (MemFootprintSized sym ptrW) (MemFootprintVal sym)) + +unionFootprintMap :: + IsSymExprBuilder sym => + sym -> + MemFootprintMap sym ptrW -> + MemFootprintMap sym ptrW -> + IO (MemFootprintMap sym ptrW ) +unionFootprintMap sym (MemFootprintMap m1) (MemFootprintMap m2) = MemFootprintMap <$> + MapF.mergeWithKeyM + (\_k v1 v2 -> Just <$> mergeFootprintVals sym v1 v2) + return + return + m1 m2 muxFootprintMap :: IsExprBuilder sym => OrdF (SymExpr sym) => sym -> Pred sym -> - Map (MemFootprint sym ptrW) (Pred sym) -> - Map (MemFootprint sym ptrW) (Pred sym) -> - IO (Map (MemFootprint sym ptrW) (Pred sym)) -muxFootprintMap sym p = - Map.mergeA - (Map.traverseMissing (\_k x -> andPred sym x p)) - (Map.traverseMissing (\_k y -> andPred sym y =<< notPred sym p)) - (Map.zipWithAMatched (\_k x y -> itePred sym p x y)) - --- This is basically an internal function called --- from "trace footprint", but is broken out here. --- The "Const" in the return type is an artifact --- of how the evalWithFreshCache operator works, --- as it requires an applicative functor over --- the sequence type. --- --- We compute on the intermediate Map type because --- it is more convenient for computing mux and union --- operations than @Set Footprint@ type that is eventually --- returned by `traceFootprint`. + MemFootprintMap sym ptrW -> + MemFootprintMap sym ptrW -> + IO (MemFootprintMap sym ptrW) +muxFootprintMap sym p_mux (MemFootprintMap m1) (MemFootprintMap m2) = MemFootprintMap <$> + MapF.mergeWithKeyM + -- footprints shared between branches are muxed according to the condition + (\_k v1 v2 -> Just <$> muxFootprintVals sym p_mux v1 v2) + -- all footprints unique to the true branch are made conditional on the mux condition being true + (TF.traverseF (\(MemFootprintVal p v) -> MemFootprintVal <$> andPred sym p p_mux <*> pure v)) + -- all footprints unique to the false branch are made conditional on the mux condition being false + (TF.traverseF (\(MemFootprintVal p v) -> MemFootprintVal <$> (andPred sym p =<< notPred sym p_mux) <*> pure v)) + m1 m2 + traceFootprintMap :: - IsExprBuilder sym => + IsSymExprBuilder sym => OrdF (SymExpr sym) => sym -> MemTraceSeq sym ptrW -> - IO (Const (Map (MemFootprint sym ptrW) (Pred sym)) (MemEvent sym ptrW)) + IO (Const (MemFootprintMap sym ptrW) (MemEvent sym ptrW)) traceFootprintMap sym = - evalWithFreshCache $ \rec -> \case - SymSequenceNil -> return (Const mempty) - - SymSequenceCons _ (MemOpEvent x) xs -> - do let (fp,p) = memOpFootprint sym x - let m1 = Map.insert fp p mempty - Const m2 <- rec xs - Const <$> unionFootprintMap sym m1 m2 - SymSequenceCons _ _ xs -> rec xs - - SymSequenceAppend _ xs ys -> - do Const m1 <- rec xs - Const m2 <- rec ys - Const <$> unionFootprintMap sym m1 m2 - - SymSequenceMerge _ p xs ys -> - do Const m1 <- rec xs - Const m2 <- rec ys - Const <$> muxFootprintMap sym p m1 m2 - - --- | Compute the set of "footprint" values --- that correspond to the reads and writes --- generated by this trace memory. -traceFootprint :: - IsExprBuilder sym => + evalWithFreshCache $ \rec -> \case + SymSequenceNil -> return (Const (MemFootprintMap MapF.empty)) + + SymSequenceCons _ (MemOpEvent x) xs -> do + MapF.Pair fp v <- return $ memOpFootprintVal sym x + let m1 = MemFootprintMap (MapF.singleton fp v) + Const m2 <- rec xs + Const <$> unionFootprintMap sym m1 m2 + + SymSequenceCons _ _ xs -> rec xs + SymSequenceAppend _ xs ys -> + do Const m1 <- rec xs + Const m2 <- rec ys + Const <$> unionFootprintMap sym m1 m2 + + SymSequenceMerge _ p xs ys -> + do Const m1 <- rec xs + Const m2 <- rec ys + Const <$> muxFootprintMap sym p m1 m2 + +-- | Collapse all memory operations in a memory trace into a set, where each 'MemOp' is +-- conditional on the branch condition leading to it +traceFootprintOps :: + forall sym ptrW. + IsSymExprBuilder sym => + OrdF (SymExpr sym) => + sym -> + MemTraceImpl sym ptrW -> + IO (Set (MemOp sym ptrW)) +traceFootprintOps sym mem = do + Const (MemFootprintMap m) <- traceFootprintMap sym (memSeq mem) + Set.fromList <$> mapM (\(MapF.Pair a b) -> go a b) (MapF.toList m) + where + go :: MemFootprintSized sym ptrW w -> MemFootprintVal sym w -> IO (MemOp sym ptrW) + go (MemFootprintSized ptr w dir cond end) (MemFootprintVal p val) = case cond of + Unconditional | Just True <- asConstantPred p -> return $ MemOp ptr dir Unconditional w val end + _ -> do + let condp = getCond sym cond + p' <- andPred sym condp p + return $ MemOp ptr dir (Conditional p') w val end + +traceFootprint :: + IsSymExprBuilder sym => OrdF (SymExpr sym) => sym -> MemTraceImpl sym ptrW -> IO (Set (MemFootprint sym ptrW)) -traceFootprint sym mem = do - do Const m <- traceFootprintMap sym (memSeq mem) - let xs = do (MemFootprint ptr w dir _ end, cond) <- Map.toList m - case asConstantPred cond of - Nothing -> [MemFootprint ptr w dir (Conditional cond) end] - Just True -> [MemFootprint ptr w dir Unconditional end] - Just False -> [] - return $ Set.fromList xs - - +traceFootprint sym mem = Set.map (memOpFootprint sym) <$> traceFootprintOps sym mem -- | Filter the memory event traces to leave just the observable -- events. This currently includes all system call events, diff --git a/src/Pate/Verification/AbstractDomain.hs b/src/Pate/Verification/AbstractDomain.hs index fad5f896..3b971bd3 100644 --- a/src/Pate/Verification/AbstractDomain.hs +++ b/src/Pate/Verification/AbstractDomain.hs @@ -434,6 +434,19 @@ instance (OrdF (W4.SymExpr sym), PA.ValidArch arch) => Semigroup (WidenLocs sym instance (OrdF (W4.SymExpr sym), PA.ValidArch arch) => Monoid (WidenLocs sym arch) where mempty = WidenLocs mempty +joinMemCellAbsValues :: + W4.IsSymExprBuilder sym => + MapF.MapF (PMC.MemCell sym arch) (MemAbstractValue s) -> + MapF.MapF (PMC.MemCell sym arch) (MemAbstractValue s) -> + MapF.MapF (PMC.MemCell sym arch) (MemAbstractValue s) +joinMemCellAbsValues = MapF.mergeWithKey + (\_cell (MemAbstractValue v1) (MemAbstractValue v2) -> Just $ MemAbstractValue $ combineAbsVals v1 v2) + id + id + +collapseMemCellVals :: W4.IsSymExprBuilder sym => [MapF.Pair (PMC.MemCell sym arch) (MemAbstractValue s)] -> MapF.MapF (PMC.MemCell sym arch) (MemAbstractValue s) +collapseMemCellVals = foldr (\(MapF.Pair k v) m -> joinMemCellAbsValues (MapF.singleton k v) m) MapF.empty + -- | From the result of symbolic execution (in 'PS.SimOutput') we extract any abstract domain -- information that we can establish for the registers, memory reads and memory writes. initAbsDomainVals :: @@ -450,19 +463,33 @@ initAbsDomainVals :: AbstractDomainVals sym arch bin {- ^ values from pre-domain -} -> m (AbstractDomainVals sym arch bin) initAbsDomainVals sym eqCtx f stOut preVals = do - foots <- fmap S.toList $ IO.liftIO $ MT.traceFootprint sym (PS.simOutMem stOut) + traceOps <- fmap S.toList $ IO.liftIO $ MT.traceFootprintOps sym (PS.simOutMem stOut) -- NOTE: We need to include any cells from the pre-domain to ensure that we -- propagate forward any value constraints for memory that is not accessed in this -- slice - let prevCells = map fstPair $ filter (\(MapF.Pair _ (MemAbstractValue v)) -> not (isUnconstrained v)) (MapF.toList (absMemVals preVals)) - let cells = (S.toList . S.fromList) $ map (\(MT.MemFootprint ptr w _dir _cond end) -> Some (PMC.MemCell ptr w end)) foots ++ prevCells - subTree @"loc" "Initial Domain" $ do - regVals <- MM.traverseRegsWith (\r v -> subTrace (PL.SomeLocation (PL.Register r)) $ getRegAbsVal r v) (PS.simOutRegs stOut) - memVals <- fmap MapF.fromList $ forM cells $ \(Some cell) -> do - absVal <- subTrace (PL.SomeLocation (PL.Cell cell)) $ getMemAbsVal cell - return (MapF.Pair cell absVal) - mr <- subTrace (PL.SomeLocation (PL.Named (knownSymbol @"maxRegion"))) $ f (PS.unSE $ PS.simMaxRegion $ (PS.simOutState stOut)) - return (AbstractDomainVals regVals memVals mr) + let prevCells = (S.toList . S.fromList) $ map fstPair $ filter (\(MapF.Pair _ (MemAbstractValue v)) -> not (isUnconstrained v)) (MapF.toList (absMemVals preVals)) + + regVals <- subTree @"loc" "Initial Domain 1" $ do + MM.traverseRegsWith (\r v -> subTrace (PL.SomeLocation (PL.Register r)) $ getRegAbsVal r v) (PS.simOutRegs stOut) + + memPreVals <- subTree @"loc" "Initial Domain 2" $ do + -- collect models for any cells in the abstract domain with known + -- concrete values first + forM prevCells $ \(Some cell) -> subTrace (PL.SomeLocation (PL.Cell cell)) $ do + absVal <- getMemAbsVal cell + return $ MapF.Pair cell absVal + + -- next we check if the written value was actually concrete, otherwise we can skip these writes + memTracePrevVals <- subTree @"loc" "Initial Domain 3" $ fmap collapseMemCellVals $ forM traceOps $ \mop@(MT.MemOp ptr _dir _cond (w :: MT.NatRepr w) _val end) -> do + let cell = PMC.MemCell ptr w end + subTrace (PL.SomeLocation (PL.Cell cell)) $ getMemOpAbsVal mop + + memTraceVals <- subTree @"loc" "Initial Domain 4" $ MapF.traverseWithKey (\cell mv -> subTrace (PL.SomeLocation (PL.Cell cell)) $ getCellWithPrevAbsVal cell mv) memTracePrevVals + + let memVals = joinMemCellAbsValues (MapF.fromList memPreVals) memTraceVals + + mr <- subTree @"loc" "Initial Domain 5" $ subTrace (PL.SomeLocation (PL.Named (knownSymbol @"maxRegion"))) $ f (PS.unSE $ PS.simMaxRegion $ (PS.simOutState stOut)) + return (AbstractDomainVals regVals memVals mr) where getMemAbsVal :: PMC.MemCell sym arch w -> @@ -490,6 +517,39 @@ initAbsDomainVals sym eqCtx f stOut preVals = do _ -> return v _ -> getAbsVal sym f e + getMemOpAbsVal :: + MT.MemOp sym (MC.ArchAddrWidth arch) -> + m (MapF.Pair (PMC.MemCell sym arch) (MemAbstractValue sym)) + getMemOpAbsVal (MT.MemOp ptr _dir _cond (w :: MT.NatRepr w) val end) = do + let CLM.LLVMPointer region offset = val + regAbs <- f (W4.natToIntegerPure region) + offsetAbs <- f offset + return $ MapF.Pair cell (MemAbstractValue $ MacawAbstractValue (Ctx.Empty Ctx.:> regAbs Ctx.:> offsetAbs)) + + where + cell :: PMC.MemCell sym arch w + cell = PMC.MemCell ptr w end + + -- extract a final abstract value from a first-pass check + getCellWithPrevAbsVal :: + PMC.MemCell sym arch w -> + MemAbstractValue sym w -> + m (MemAbstractValue sym w) + getCellWithPrevAbsVal cell (MemAbstractValue (MacawAbstractValue (Ctx.Empty Ctx.:> regAbs Ctx.:> offsetAbs))) = do + CLM.LLVMPointer mem_region mem_offset <- IO.liftIO $ PMC.readMemCell sym (PS.simOutMem stOut) cell + -- we avoid checking the actual memory model for concrete values if we didn't originally + -- write/read a concrete value, since it's unlikely to succeed (outside of strange memory aliasing cases) + -- since this is an under-approximation it's safe to just leave them as unconstrained + memRegAbs <- case regAbs of + AbsUnconstrained tp -> return $ AbsUnconstrained tp + _ -> f (W4.natToIntegerPure mem_region) + + memOffsetAbs <- case offsetAbs of + AbsUnconstrained tp -> return $ AbsUnconstrained tp + _ -> f mem_offset + + return $ MemAbstractValue $ MacawAbstractValue (Ctx.Empty Ctx.:> memRegAbs Ctx.:> memOffsetAbs) + -- | Convert the abstract domain from an expression into an equivalent 'AbsRange' -- TODO: Currently this only extracts concrete values extractAbsRange :: diff --git a/src/Pate/Verification/Widening.hs b/src/Pate/Verification/Widening.hs index 800df75d..2ad0a5af 100644 --- a/src/Pate/Verification/Widening.hs +++ b/src/Pate/Verification/Widening.hs @@ -128,6 +128,7 @@ import qualified What4.Expr.GroundEval as W4 import qualified Lang.Crucible.Utils.MuxTree as MT import Pate.Verification.Domain (universalDomain) import qualified Data.Parameterized.TraversableF as TF +import qualified Data.IORef as IO -- | Generate a fresh abstract domain value for the given graph node. -- This should represent the most information we can ever possibly @@ -1647,13 +1648,23 @@ getInitalAbsDomainVals :: EquivM sym arch (PPa.PatchPair (PAD.AbstractDomainVals sym arch)) getInitalAbsDomainVals bundle preDom = withTracing @"debug" "getInitalAbsDomainVals" $ withSym $ \sym -> do PEM.SymExprMappable asEM <- return $ PEM.symExprMappable sym + + (ref :: IO.IORef (MapF.MapF (W4.SymExpr sym) (PAD.AbsRange))) <- IO.liftIO $ IO.newIORef MapF.empty + + let getConcreteRange :: forall tp. W4.SymExpr sym tp -> EquivM_ sym arch (PAD.AbsRange tp) getConcreteRange e = do - e' <- asEM @tp $ applyCurrentAsms e - e'' <- concretizeWithSolver e' - emitTraceLabel @"expr" "output" (Some e'') - return $ PAD.extractAbsRange sym e'' + m <- (IO.liftIO $ IO.readIORef ref) + case MapF.lookup e m of + Just a -> return a + Nothing -> do + -- e' <- asEM @tp $ applyCurrentAsms e + e'' <- concretizeWithSolver e + emitTraceLabel @"expr" "output" (Some e'') + let a = PAD.extractAbsRange sym e'' + IO.liftIO $ IO.modifyIORef ref (MapF.insert e a) + return a eqCtx <- equivalenceContext PPa.forBins $ \bin -> do From fde2e5cfe7600016a7e4c1a567f7f0a2eafba782 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Mon, 21 Oct 2024 11:34:21 -0700 Subject: [PATCH 04/16] ParsedFunctions: track function cache validity instead of flushing simplifies the logic required to keep the code discovery cache valid by instead comparing any current state overrides to what they were when the retrieved cache entry was created this avoids the need to explicitly flush the cache whenever any state override occurs, and consequently allows the cache to be less conservative --- src/Pate/Discovery/ParsedFunctions.hs | 92 +++++++++++++------ src/Pate/Monad.hs | 123 +++++++++++++++++++++++++- src/Pate/Verification/Widening.hs | 11 +++ 3 files changed, 196 insertions(+), 30 deletions(-) diff --git a/src/Pate/Discovery/ParsedFunctions.hs b/src/Pate/Discovery/ParsedFunctions.hs index 6034c7ce..9612d85f 100644 --- a/src/Pate/Discovery/ParsedFunctions.hs +++ b/src/Pate/Discovery/ParsedFunctions.hs @@ -78,8 +78,62 @@ import Data.List (isPrefixOf) data ParsedBlocks arch = forall ids. ParsedBlocks [MD.ParsedBlock arch ids] +data CachedFunInfo arch (bin :: PBi.WhichBinary) = CachedFunInfo + { + cachedDfi :: Some (MD.DiscoveryFunInfo arch) + -- we take a snapshot of any state that affects macaw's code discovery so + -- we can check if the cached result is still valid on a hit + , cachedOverride :: Maybe (PB.AbsStateOverride arch) + -- FIXME: technically we can scope these to the current function body, but + -- these are added infrequently enough that it's probably not worth it + , cachedExtraJumps :: ExtraJumps arch + , cachedExtraTargets :: Set.Set (MM.ArchSegmentOff arch) + } + +cachedFunInfo :: + forall arch bin ids. + ParsedFunctionMap arch bin -> + ParsedFunctionState arch bin -> + MD.DiscoveryFunInfo arch ids -> + CachedFunInfo arch bin +cachedFunInfo pfm st dfi = CachedFunInfo (Some dfi) (Map.lookup addr (absStateOverrides pfm)) (extraEdges st) (extraTargets st) + where + addr :: MM.ArchSegmentOff arch + addr = MD.discoveredFunAddr dfi + +-- | Add a function entry to the cache, taking a snapshot of any relevant +-- state so we can validate the cached entry on retrieval +addFunctionEntry :: + ParsedFunctionMap arch bin -> + PB.FunctionEntry arch bin -> + MD.DiscoveryFunInfo arch ids -> + ParsedFunctionState arch bin -> + ParsedFunctionState arch bin +addFunctionEntry pfm fe dfi st = st { parsedFunctionCache = Map.insert fe (cachedFunInfo pfm st dfi) (parsedFunctionCache st ) } + + +-- | Fetch a cached discovery result for a function entry. Returns 'Nothing' if any relevant state in either the 'ParsedFunctionMap' +-- or the 'ParsedFunctionState' has changed since the entry was computed (i.e. the cached entry is potentially invalid and +-- needs to be re-computed). +lookupFunctionEntry :: + MM.ArchConstraints arch => + ParsedFunctionMap arch bin -> + ParsedFunctionState arch bin -> + PB.FunctionEntry arch bin -> + Maybe (Some (MD.DiscoveryFunInfo arch)) +lookupFunctionEntry pfm st fe = case Map.lookup fe (parsedFunctionCache st) of + Just cachedInfo | + Some dfi <- cachedDfi cachedInfo + , mov <- Map.lookup (MD.discoveredFunAddr dfi) (absStateOverrides pfm) + , mov == cachedOverride cachedInfo + , extraEdges st == cachedExtraJumps cachedInfo + , extraTargets st == cachedExtraTargets cachedInfo + -> Just (Some dfi) + _ -> Nothing + + data ParsedFunctionState arch bin = - ParsedFunctionState { parsedFunctionCache :: Map.Map (PB.FunctionEntry arch bin) (Some (MD.DiscoveryFunInfo arch)) + ParsedFunctionState { parsedFunctionCache :: Map.Map (PB.FunctionEntry arch bin) (CachedFunInfo arch bin) , discoveryState :: MD.DiscoveryState arch , extraTargets :: Set.Set (MM.ArchSegmentOff arch) , extraEdges :: ExtraJumps arch @@ -132,7 +186,6 @@ addExtraTarget pfm tgt = do False -> do IORef.modifyIORef' (parsedStateRef pfm) $ \st' -> st' { extraTargets = Set.insert tgt (extraTargets st')} - flushCache pfm getExtraTargets :: ParsedFunctionMap arch bin -> @@ -141,16 +194,6 @@ getExtraTargets pfm = do st <- IORef.readIORef (parsedStateRef pfm) return $ extraTargets st -flushCache :: - MM.ArchConstraints arch => - ParsedFunctionMap arch bin -> - IO () -flushCache pfm = do - st <- IORef.readIORef (parsedStateRef pfm) - let ainfo = MD.archInfo (discoveryState st) - IORef.modifyIORef' (parsedStateRef pfm) $ \st' -> - st' { parsedFunctionCache = mempty, discoveryState = initDiscoveryState pfm ainfo } - isUnsupportedErr :: T.Text -> Bool isUnsupportedErr err = T.isPrefixOf "UnsupportedInstruction" err @@ -220,9 +263,7 @@ addOverrides :: addOverrides defaultInit pfm ovs = do let new_ovs = Map.merge Map.preserveMissing Map.preserveMissing (Map.zipWithMaybeMatched (\_ l r -> Just (mergeOverrides l r))) ovs (absStateOverrides pfm) let new_init = PB.MkInitialAbsState $ \mem segOff -> mergeOverrides (PB.mkInitAbs defaultInit mem segOff) (PB.mkInitAbs (defaultInitState pfm) mem segOff) - let pfm' = pfm { absStateOverrides = new_ovs, defaultInitState = new_init } - flushCache pfm' - return pfm' + return $ pfm { absStateOverrides = new_ovs, defaultInitState = new_init } addExtraEdges :: forall arch bin. @@ -233,18 +274,12 @@ addExtraEdges :: addExtraEdges pfm es = do mapM_ addTgt (Map.elems es) IORef.modifyIORef' (parsedStateRef pfm) $ \st' -> - st' { extraEdges = Map.merge Map.preserveMissing Map.preserveMissing (Map.zipWithMaybeMatched (\_ l r -> Just (l <> r))) es (extraEdges st') - , parsedFunctionCache = Map.empty - } + st' { extraEdges = Map.merge Map.preserveMissing Map.preserveMissing (Map.zipWithMaybeMatched (\_ l r -> Just (l <> r))) es (extraEdges st') } where addTgt :: ExtraJumpTarget arch -> IO () addTgt = \case DirectTargets es' -> mapM_ (addExtraTarget pfm) (Set.toList es') - -- we need to flush the cache here to ensure that we re-check the block at the - -- call site(s) after adding this as a return - ReturnTarget -> flushCache pfm - -- a call shouldn't require special treatment since it won't introduce - -- any edges + ReturnTarget -> return () DirectCall{} -> return () -- | Apply the various overrides to the architecture definition before returning the discovery state @@ -522,7 +557,7 @@ parsedFunctionContaining blk pfm@(ParsedFunctionMap pfmRef mCFGDir _pd _ _ _ _ _ st <- getParsedFunctionState faddr pfm -- First, check if we have a cached set of blocks for this state - case Map.lookup (PB.blockFunctionEntry blk) (parsedFunctionCache st) of + case lookupFunctionEntry pfm st (PB.blockFunctionEntry blk) of Just sdfi -> return (Just sdfi) Nothing -> do -- Otherwise, run code discovery at this address @@ -537,7 +572,7 @@ parsedFunctionContaining blk pfm@(ParsedFunctionMap pfmRef mCFGDir _pd _ _ _ _ _ -- to an MVar where we fully evaluate each result before updating it. (st', Some dfi) <- atomicAnalysis faddr st IORef.modifyIORef pfmRef $ \st_ -> - st_ { parsedFunctionCache = parsedFunctionCache st', discoveryState = discoveryState st'} + st_ { parsedFunctionCache = parsedFunctionCache st' } --IORef.writeIORef pfmRef pfm' saveCFG mCFGDir (PB.blockBinRepr blk) dfi @@ -564,11 +599,10 @@ parsedFunctionContaining blk pfm@(ParsedFunctionMap pfmRef mCFGDir _pd _ _ _ _ _ False -> do let rsn = MD.CallTarget faddr case incCompResult (MD.discoverFunction MD.defaultDiscoveryOptions faddr rsn (discoveryState st) []) of - (ds2, Some dfi) -> do + (_, Some dfi) -> do entry <- resolveFunctionEntry (funInfoToFunEntry (PB.blockBinRepr blk) dfi pfm) pfm - return $ (st { parsedFunctionCache = Map.insert entry (Some dfi) (parsedFunctionCache st) - , discoveryState = ds2 - }, Some dfi) + let st' = addFunctionEntry pfm entry dfi st + return $ (st', Some dfi) bin :: PBi.WhichBinaryRepr bin bin = PB.blockBinRepr blk diff --git a/src/Pate/Monad.hs b/src/Pate/Monad.hs index c481c3d0..5de4f7ac 100644 --- a/src/Pate/Monad.hs +++ b/src/Pate/Monad.hs @@ -105,7 +105,11 @@ module Pate.Monad , joinPatchPred , withSharedEnvEmit , module PME - , atPriority, currentPriority, thisPriority) + , atPriority, currentPriority, thisPriority + , concretizeWithSolverGen + , concretizeWithMap + , ConcreteMap, emptyConcreteMap + ) where import GHC.Stack ( HasCallStack, callStack ) @@ -162,6 +166,7 @@ import qualified What4.Expr as WE import qualified What4.Expr.GroundEval as W4G import qualified What4.Expr.Builder as W4B import qualified What4.Interface as W4 +import qualified What4.Concrete as W4 import qualified What4.SatResult as W4R import qualified What4.Symbol as WS import What4.Utils.Process (filterAsync) @@ -204,6 +209,7 @@ import qualified What4.JSON as W4S import qualified Data.Aeson as JSON import qualified Data.Aeson.Text as JSON import qualified GHC.IO.Unsafe as IO +import qualified Data.Parameterized.TraversableFC as TFC atPriority :: NodePriority -> @@ -1022,7 +1028,122 @@ getWrappedSolver = withSym $ \sym -> do case r of Left _err -> safeIO PEE.SolverError $ k W4R.Unknown Right a -> return a + + +-- | Mapping from expressions to (maybe) concrete values +-- If an expression is missing from the map, it may be symbolic or concrete. +-- If an expression is present in the map with a concrete value, this value *may* hold in all cases +-- If an expression is present in the map with a 'Nothing' entry, it is considered symbolic +-- (either proven to have multiple models or assumed symbolic due to timeout) +-- FIXME: includes path condition predicate which is used internally by 'concretizeWithSolverGen' +-- but should be factored out. +data ConcreteMap sym = + ConcreteMap (MapF.MapF (W4.SymExpr sym) (PPa.LiftF Maybe W4.ConcreteVal)) (W4.Pred sym) + +emptyConcreteMap :: EquivM sym arch (ConcreteMap sym) +emptyConcreteMap = withSym $ \sym -> return (ConcreteMap MapF.empty (W4.truePred sym)) + +-- | Use the given 'ConcreteMap' to concretize an expression. Returns the original expression +-- if it is known to be symbolic, the concretized expression if it is in the map, or 'Nothing' if +-- it is not in the map. +concretizeWithMap :: + ConcreteMap sym -> + W4.SymExpr sym tp -> + EquivM sym arch (Maybe (W4.SymExpr sym tp)) +concretizeWithMap (ConcreteMap cm _) e = withSym $ \sym -> case MapF.lookup e cm of + Just (PPa.LiftF (Just c)) -> Just <$> (IO.liftIO $ W4.concreteToSym sym c) + Just (PPa.LiftF Nothing) -> return $ Just e + Nothing -> return Nothing + +concretizeOne :: + W4.SymExpr sym tp -> + ConcreteMap sym -> + EquivM sym arch (W4.SymExpr sym tp, ConcreteMap sym) +concretizeOne e (ConcreteMap cm p) = do + e' <- concretizeWithSolver e + case W4.asConcrete e' of + Just c -> return $ (e', ConcreteMap (MapF.insert e (PPa.LiftF (Just c)) cm) p) + Nothing -> return $ (e, ConcreteMap (MapF.insert e (PPa.LiftF Nothing) cm) p) + +concretizeWithSolverGen :: + forall sym arch ctx. + ConcreteMap sym -> + Ctx.Assignment (W4.SymExpr sym) ctx -> + EquivM sym arch (Ctx.Assignment (W4.SymExpr sym) ctx, ConcreteMap sym) +concretizeWithSolverGen cm_outer (Ctx.Empty Ctx.:> e) = do + (e', cm) <- concretizeOne e cm_outer + return $ (Ctx.Empty Ctx.:> e', cm) +concretizeWithSolverGen cm_outer a = withSym $ \sym -> do + let + go :: forall tp. SymGroundEvalFn sym -> W4.SymExpr sym tp -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym) + go evalFn e (ConcreteMap m p) = case W4.asConcrete e of + -- value is already concrete, nothing to do + Just{} -> return (ConcreteMap m p) + Nothing -> (W4.asConcrete <$> concretizeWithModel evalFn e) >>= \case + -- failed to concretize with this model, so we'll conservatively mark this + -- as symbolic + Nothing -> return $ ConcreteMap (MapF.insert e (PPa.LiftF Nothing) m) p + Just conc1 -> do + conc1_as_sym <- IO.liftIO $ W4.concreteToSym sym conc1 + e_is_conc1 <- IO.liftIO $ W4.isEq sym e conc1_as_sym + case MapF.lookup e m of + -- value has been seen before and still may be concrete + Just (PPa.LiftF (Just conc2)) -> do + case testEquality conc1 conc2 of + Just Refl -> do + p' <- IO.liftIO $ W4.andPred sym p e_is_conc1 + return $ ConcreteMap m p' + -- ground value may disagree with cached value, so we conservatively assume symbolic + Nothing -> return $ ConcreteMap (MapF.insert e (PPa.LiftF Nothing) m) p + -- value has been seen before and is known not-concrete, nothing to do + Just (PPa.LiftF Nothing) -> return $ ConcreteMap m p + -- first time seeing this value, insert concretized version into cache + Nothing -> do + p' <- IO.liftIO $ W4.andPred sym p e_is_conc1 + return $ ConcreteMap (MapF.insert e (PPa.LiftF (Just conc1)) m) p' + + loop :: W4.Pred sym -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym, Bool) + loop not_prev_model (ConcreteMap cm p) = do + mresult <- heuristicSat "concretize" not_prev_model $ \case + W4R.Sat evalFn -> + W4R.Sat <$> TFC.foldrMFC (go evalFn) (ConcreteMap cm (W4.truePred sym)) a + W4R.Unsat () -> return $ W4R.Unsat () + W4R.Unknown -> return $ W4R.Unknown + case mresult of + W4R.Sat (ConcreteMap cm' p') -> do + not_this <- IO.liftIO $ W4.notPred sym p' + withAssumption not_prev_model $ do + loop not_this (ConcreteMap cm' (W4.truePred sym)) + + -- predicate is unsatisfiable, so there are no models that disprove all of the + -- current cached values + W4R.Unsat () -> return $ (ConcreteMap cm p, True) + W4R.Unknown -> return (ConcreteMap cm p, False) + + (cm, complete) <- loop (W4.truePred sym) cm_outer + (ConcreteMap cm_ _) <- case complete of + True -> return cm + False -> do + -- finish the 'ConcreteMap' by double checking each map + let + upd :: forall tp. W4.SymExpr sym tp -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym) + upd e cm_ = concretizeWithMap cm_ e >>= \case + Just e' -> case W4.asConcrete e' of + -- e is possibly concrete, double check it + Just{} -> snd <$> concretizeOne e cm_ + -- e is known-symbolic, nothing to do + Nothing -> return cm_ + -- e is missing from map entirely, so check it here + Nothing -> snd <$> concretizeOne e cm_ + TFC.foldrMFC' upd cm a + let cm' = ConcreteMap cm_ (W4.truePred sym) + let + conc :: forall tp. W4.SymExpr sym tp -> EquivM_ sym arch (W4.SymExpr sym tp) + conc e = fromMaybe e <$> concretizeWithMap cm' e + + a' <- TFC.traverseFC conc a + return (a', cm') concretizeWithModel :: SymGroundEvalFn sym -> diff --git a/src/Pate/Verification/Widening.hs b/src/Pate/Verification/Widening.hs index 2ad0a5af..671a3cf3 100644 --- a/src/Pate/Verification/Widening.hs +++ b/src/Pate/Verification/Widening.hs @@ -129,6 +129,10 @@ import qualified Lang.Crucible.Utils.MuxTree as MT import Pate.Verification.Domain (universalDomain) import qualified Data.Parameterized.TraversableF as TF import qualified Data.IORef as IO +<<<<<<< Updated upstream +======= +import qualified Data.Parameterized.Context as Ctx +>>>>>>> Stashed changes -- | Generate a fresh abstract domain value for the given graph node. -- This should represent the most information we can ever possibly @@ -1670,6 +1674,13 @@ getInitalAbsDomainVals bundle preDom = withTracing @"debug" "getInitalAbsDomainV PPa.forBins $ \bin -> do out <- PPa.get bin (PS.simOut bundle) pre <- PPa.get bin (PAD.absDomVals preDom) + {- + _ <- PAD.initAbsDomainVals sym eqCtx collectExpr out pre + Some exprs <- (Ctx.fromList . Set.toList) <$> (IO.liftIO $ IO.readIORef exprsRef) + cm_empty <- emptyConcreteMap + (_, cm) <- concretizeWithSolverGen cm_empty exprs + PAD.initAbsDomainVals sym eqCtx (concFromCache cm) out pre + -} PAD.initAbsDomainVals sym eqCtx getConcreteRange out pre From 15dbfd2c2b45218e9fa9b77769fb7b255c987e4c Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Mon, 21 Oct 2024 11:43:03 -0700 Subject: [PATCH 05/16] cleanup merge artifact --- src/Pate/Verification/Widening.hs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Pate/Verification/Widening.hs b/src/Pate/Verification/Widening.hs index 671a3cf3..3f293255 100644 --- a/src/Pate/Verification/Widening.hs +++ b/src/Pate/Verification/Widening.hs @@ -129,10 +129,6 @@ import qualified Lang.Crucible.Utils.MuxTree as MT import Pate.Verification.Domain (universalDomain) import qualified Data.Parameterized.TraversableF as TF import qualified Data.IORef as IO -<<<<<<< Updated upstream -======= -import qualified Data.Parameterized.Context as Ctx ->>>>>>> Stashed changes -- | Generate a fresh abstract domain value for the given graph node. -- This should represent the most information we can ever possibly From de0a065987070748351d04749aac1bae61d406f8 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Mon, 21 Oct 2024 12:39:29 -0700 Subject: [PATCH 06/16] Repl: output duration when using "status" command --- tools/pate-repl/Repl.hs | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tools/pate-repl/Repl.hs b/tools/pate-repl/Repl.hs index 5b115b1f..309e72a2 100644 --- a/tools/pate-repl/Repl.hs +++ b/tools/pate-repl/Repl.hs @@ -441,8 +441,17 @@ maybeSubNodes nd@(TraceNode lbl v subtree) g f = do - - +ppDuration :: + [TraceTag] -> + TraceTree '(sym,arch) -> + ReplM sym arch (Maybe (PP.Doc ())) +ppDuration tags subtree = do + b <- IO.liftIO $ getTreeStatus subtree + return $ case finishedAt b of + Just fin | elem "time" tags -> Just $ (PP.viaShow $ + let t = diffUTCTime fin (traceTreeStartTime subtree) + in (round (t * 1000))) <> "ms" + _ -> Nothing prettyNextNodes :: forall sym arch. @@ -457,12 +466,7 @@ prettyNextNodes startAt onlyFinished = do mapM (\(nesting, Some (nd@(TraceNode lbl v subtree) :: TraceNode sym arch nm)) -> do b <- IO.liftIO $ getTreeStatus subtree json <- IO.liftIO $ jsonNode @_ @'(sym, arch) @nm sym lbl v - - let duration = case finishedAt b of - Just fin | elem "time" tags -> Just $ (PP.viaShow $ - let t = diffUTCTime fin (traceTreeStartTime subtree) - in (round (t * 1000))) <> "ms" - _ -> Nothing + duration <- ppDuration tags subtree case prettyNodeAt @'(sym, arch) @nm tags lbl v of Just pp -> do suf <- ppSuffix nesting b (knownSymbol @nm) @@ -563,12 +567,18 @@ status' mlimit = do SomeReplState {} -> execReplM $ do (Some (TraceNode _ _ t)) <- gets replNode st <- IO.liftIO $ getTreeStatus t - case st of - _ | isBlockedStatus st -> PO.printMsgLn $ "Waiting for input.." - NodeStatus (StatusWarning e) _ _ -> PO.printMsgLn $ "Warning: \n" <> (PP.pretty (chopMsg mlimit (show e))) - NodeStatus (StatusError e) _ _ -> PO.printMsgLn $ "Error: \n" <> (PP.pretty (chopMsg mlimit (show e))) - NodeStatus StatusSuccess Nothing _ -> PO.printMsgLn $ "In progress.." - NodeStatus StatusSuccess (Just{}) _ -> PO.printMsgLn $ "Finalized" + + let pp = case st of + _ | isBlockedStatus st -> "Waiting for input.." + NodeStatus (StatusWarning e) _ _ -> "Warning: \n" <> (PP.pretty (chopMsg mlimit (show e))) + NodeStatus (StatusError e) _ _ -> "Error: \n" <> (PP.pretty (chopMsg mlimit (show e))) + NodeStatus StatusSuccess Nothing _ -> "In progress.." + NodeStatus StatusSuccess (Just{}) _ -> "Finalized" + tags <- gets replTags + duration <- ppDuration tags t + case duration of + Just pp' -> PO.printMsgLn $ pp <+> pp' + Nothing -> PO.printMsgLn pp prevNodes <- gets replPrev fin <- IO.liftIO $ IO.readIORef finalResult case (prevNodes, fin) of From e010f581fe1cf0dae34d75a254d20154351b2156 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Thu, 24 Oct 2024 14:22:53 -0700 Subject: [PATCH 07/16] use batch processing when initializing abstract value domain rather than asking the solver to concretize each value individually, this strategy instead collects all values to be concretized and concretizes them all at once in most cases this seems to be an improvement, but there are edge cases where the batch processing times out and we need to fall back to the original approach --- src/Pate/Monad.hs | 20 +++- src/Pate/PatchPair.hs | 5 + src/Pate/Verification/AbstractDomain.hs | 141 +++++++++++++++++++++++- src/Pate/Verification/Widening.hs | 32 +----- 4 files changed, 163 insertions(+), 35 deletions(-) diff --git a/src/Pate/Monad.hs b/src/Pate/Monad.hs index 5de4f7ac..09318010 100644 --- a/src/Pate/Monad.hs +++ b/src/Pate/Monad.hs @@ -106,9 +106,7 @@ module Pate.Monad , withSharedEnvEmit , module PME , atPriority, currentPriority, thisPriority - , concretizeWithSolverGen - , concretizeWithMap - , ConcreteMap, emptyConcreteMap + , concretizeWithSolverBatch ) where @@ -1065,15 +1063,15 @@ concretizeOne e (ConcreteMap cm p) = do Just c -> return $ (e', ConcreteMap (MapF.insert e (PPa.LiftF (Just c)) cm) p) Nothing -> return $ (e, ConcreteMap (MapF.insert e (PPa.LiftF Nothing) cm) p) -concretizeWithSolverGen :: +concretizeWithSolverGen' :: forall sym arch ctx. ConcreteMap sym -> Ctx.Assignment (W4.SymExpr sym) ctx -> EquivM sym arch (Ctx.Assignment (W4.SymExpr sym) ctx, ConcreteMap sym) -concretizeWithSolverGen cm_outer (Ctx.Empty Ctx.:> e) = do +concretizeWithSolverGen' cm_outer (Ctx.Empty Ctx.:> e) = do (e', cm) <- concretizeOne e cm_outer return $ (Ctx.Empty Ctx.:> e', cm) -concretizeWithSolverGen cm_outer a = withSym $ \sym -> do +concretizeWithSolverGen' cm_outer a = withSym $ \sym -> do let go :: forall tp. SymGroundEvalFn sym -> W4.SymExpr sym tp -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym) go evalFn e (ConcreteMap m p) = case W4.asConcrete e of @@ -1145,6 +1143,16 @@ concretizeWithSolverGen cm_outer a = withSym $ \sym -> do a' <- TFC.traverseFC conc a return (a', cm') +-- | Similar to 'concretizeWithSolver' but uses a widening strategy to +-- attempt to concretize all of the given expressions simultaneously. +concretizeWithSolverBatch :: + forall sym arch ctx. + Ctx.Assignment (W4.SymExpr sym) ctx -> + EquivM sym arch (Ctx.Assignment (W4.SymExpr sym) ctx) +concretizeWithSolverBatch es = do + emptyCM <- emptyConcreteMap + fst <$> concretizeWithSolverGen' emptyCM es + concretizeWithModel :: SymGroundEvalFn sym -> W4.SymExpr sym tp -> diff --git a/src/Pate/PatchPair.hs b/src/Pate/PatchPair.hs index da0fb5bf..79a4ad7f 100644 --- a/src/Pate/PatchPair.hs +++ b/src/Pate/PatchPair.hs @@ -395,6 +395,11 @@ forBinsC f = forBins $ \bin -> Const <$> f bin newtype LiftF (t :: l -> DK.Type) (f :: k -> l) (tp :: k) = LiftF { unLiftF :: (t (f tp)) } +instance Show (t (f tp)) => Show (LiftF t f tp) where + show (LiftF x) = show x + +instance (forall tp. Show (t (f tp))) => ShowF (LiftF t f) + -- | Specialization of 'PatchPair' to lift inner types over the 'bin' parameter. type PatchPairF t tp = PatchPair (LiftF t tp) diff --git a/src/Pate/Verification/AbstractDomain.hs b/src/Pate/Verification/AbstractDomain.hs index 3b971bd3..56f8dddd 100644 --- a/src/Pate/Verification/AbstractDomain.hs +++ b/src/Pate/Verification/AbstractDomain.hs @@ -46,6 +46,11 @@ module Pate.Verification.AbstractDomain , singletonDomain , zipSingletonDomains , widenAbsDomainEqMetaData + , GetAbsRange + , mkGetAbsRange + , batchGetAbsRange + , getAbsRange + , getAbsRange1 ) where import qualified Prettyprinter as PP @@ -54,7 +59,7 @@ import Control.Monad.Trans ( lift ) import Control.Monad ( forM, unless ) import qualified Control.Monad.IO.Class as IO import qualified Control.Monad.Writer as CMW -import Control.Lens ( (^.) ) +import Control.Lens ( (^.), (&), (.~) ) import Data.Functor.Const import qualified Data.Set as S @@ -109,6 +114,8 @@ import Data.Parameterized (knownSymbol) import qualified What4.JSON as W4S import Data.Aeson ( (.=) ) import qualified Data.Aeson as JSON +import qualified Data.IORef as IO +import qualified Data.Functor.Product as Product type instance PL.LocationK "memevent" = () type instance PL.LocationK "absrange" = W4.BaseType @@ -458,11 +465,11 @@ initAbsDomainVals :: IsTreeBuilder '(sym, arch) e m => sym -> PE.EquivContext sym arch -> - (forall tp. W4.SymExpr sym tp -> m (AbsRange tp)) -> + GetAbsRange sym m -> PS.SimOutput sym arch v bin -> AbstractDomainVals sym arch bin {- ^ values from pre-domain -} -> m (AbstractDomainVals sym arch bin) -initAbsDomainVals sym eqCtx f stOut preVals = do +initAbsDomainVals sym eqCtx getAbs stOut preVals = do traceOps <- fmap S.toList $ IO.liftIO $ MT.traceFootprintOps sym (PS.simOutMem stOut) -- NOTE: We need to include any cells from the pre-domain to ensure that we -- propagate forward any value constraints for memory that is not accessed in this @@ -491,6 +498,9 @@ initAbsDomainVals sym eqCtx f stOut preVals = do mr <- subTree @"loc" "Initial Domain 5" $ subTrace (PL.SomeLocation (PL.Named (knownSymbol @"maxRegion"))) $ f (PS.unSE $ PS.simMaxRegion $ (PS.simOutState stOut)) return (AbstractDomainVals regVals memVals mr) where + f :: forall tp. W4.SymExpr sym tp -> m (AbsRange tp) + f = getAbsRange1 getAbs + getMemAbsVal :: PMC.MemCell sym arch w -> m (MemAbstractValue sym w) @@ -1012,6 +1022,131 @@ instance (PSo.ValidSym sym, PA.ValidArch arch) => PL.LocationWitherable sym arch Just ((Const x'), p') -> return $ (Just x', p') Nothing -> return $ (Nothing, W4.falsePred sym)) s +newtype GetAbsRange sym m = GetAbsRange { getAbsRange :: forall ctx. Ctx.Assignment (W4.SymExpr sym) ctx -> m (Ctx.Assignment AbsRange ctx) } + + +-- FIXME: Move this to Parameterized + +data WithEmbedding v ctx = forall sub. WithEmbedding (Ctx.CtxEmbedding sub ctx) (v sub) + +type MaybeF f = PPa.LiftF Maybe f +-- Strip the 'Nothing' results from an 'Assignment' +flattenMaybeCtx :: + Ctx.Assignment (MaybeF f) ctx -> + (WithEmbedding (Ctx.Assignment f) ctx) +flattenMaybeCtx = \case + Ctx.Empty -> WithEmbedding (Ctx.CtxEmbedding Ctx.zeroSize Ctx.Empty) Ctx.Empty + xs Ctx.:> x | WithEmbedding embed xs' <- flattenMaybeCtx xs -> case x of + PPa.LiftF (Just x') -> WithEmbedding (Ctx.extendEmbeddingBoth embed) (xs' Ctx.:> x') + PPa.LiftF Nothing -> WithEmbedding (Ctx.extendEmbeddingRight embed) xs' + +dropEmbedding :: Ctx.CtxEmbedding (sub Ctx.::> tp) ctx -> Ctx.CtxEmbedding sub ctx +dropEmbedding (Ctx.CtxEmbedding sz (idxs Ctx.:> _)) = Ctx.CtxEmbedding sz idxs + +reverseEmbedding :: + Ctx.CtxEmbedding sub ctx -> + Ctx.Assignment f sub -> + Ctx.Assignment (MaybeF f) ctx +reverseEmbedding embed@(Ctx.CtxEmbedding sz idxs) asn = case (idxs, asn) of + (Ctx.Empty, Ctx.Empty) -> Ctx.replicate sz (PPa.LiftF Nothing) + (_ Ctx.:> idx, asn' Ctx.:> x') -> + let res = reverseEmbedding (dropEmbedding embed) asn' + in res & (ixF idx) .~ PPa.LiftF (Just x') + +mapMaybeCtx :: + forall f g ctx m. + Monad m => + (forall sub_ctx. Ctx.CtxEmbedding sub_ctx ctx -> Ctx.Assignment f sub_ctx -> m (Ctx.Assignment g sub_ctx)) -> + Ctx.Assignment (MaybeF f) ctx -> + m (Ctx.Assignment (MaybeF g) ctx) +mapMaybeCtx f asn | WithEmbedding embed asn' <- flattenMaybeCtx asn = do + asn'' <- f embed asn' + return $ reverseEmbedding embed asn'' + +partitionCtx :: + Monad m => + (forall tp. f tp -> m (Either (g tp) (h tp))) -> + Ctx.Assignment f ctx -> + m (Ctx.Assignment (MaybeF g) ctx, Ctx.Assignment (MaybeF h) ctx) +partitionCtx part = \case + Ctx.Empty -> return (Ctx.Empty, Ctx.Empty) + xs Ctx.:> x -> part x >>= \case + Left g -> partitionCtx part xs >>= \(gs,hs) -> return (gs Ctx.:> PPa.LiftF (Just g), hs Ctx.:> PPa.LiftF Nothing) + Right h -> partitionCtx part xs >>= \(gs,hs) -> return (gs Ctx.:> PPa.LiftF Nothing, hs Ctx.:> PPa.LiftF (Just h)) + + +mkGetAbsRange :: + forall m sym. + IO.MonadIO m => + W4.IsSymExprBuilder sym => + (forall ctx. Ctx.Assignment (W4.SymExpr sym) ctx -> m (Ctx.Assignment AbsRange ctx)) -> + m (GetAbsRange sym m) +mkGetAbsRange f = do + (cache :: IO.IORef (MapF.MapF (W4.SymExpr sym) (AbsRange))) <- IO.liftIO $ IO.newIORef MapF.empty + let + getCache :: forall tp. W4.SymExpr sym tp -> m (Either (W4.SymExpr sym tp) (AbsRange tp)) + getCache e = do + m <- IO.liftIO $ IO.readIORef cache + case MapF.lookup e m of + Just a -> return $ Right a + Nothing -> return $ Left e + + return $ GetAbsRange $ \es -> do + (es_uncached, as_cached) <- partitionCtx getCache es + as_result <- mapMaybeCtx (\_ -> f) es_uncached + let + go :: forall tp. MaybeF AbsRange tp -> MaybeF AbsRange tp -> AbsRange tp + go a b = case (a, b) of + (PPa.LiftF (Just a'), _) -> a' + (_, PPa.LiftF (Just a')) -> a' + _ -> error $ "mapMaybeCtx: impossible 'Nothing' result" + return $ Ctx.zipWith go as_result as_cached + +getAbsRange1 :: Monad m => GetAbsRange sym m -> (forall tp. W4.SymExpr sym tp -> m (AbsRange tp)) +getAbsRange1 f = (\e -> getAbsRange f (Ctx.singleton e) >>= \case (Ctx.Empty Ctx.:> a) -> return a) + +-- | Batch the concretization of many values at once, given a function 'f' that +-- uses a 'GetAbsRange' multiple times to concretize many individual expressions. +-- +-- This runs the given 'f' twice, once with a dummy 'GetAbsRange' in order to collect +-- the values that need to be concretized. Once collected, they are all concretized +-- at once, and then these concrete values are used on the second run of 'f'. +batchGetAbsRange :: + forall sym m a. + IO.MonadIO m => + W4.IsSymExprBuilder sym => + GetAbsRange sym m -> + (GetAbsRange sym m -> m a) -> + m a +batchGetAbsRange mkabs f = do + (ref :: IO.IORef (MapF.MapF (W4.SymExpr sym) (PPa.LiftF Maybe AbsRange))) <- IO.liftIO $ IO.newIORef MapF.empty + let + dummy1 :: forall tp. W4.SymExpr sym tp -> m (AbsRange tp) + dummy1 e = do + IO.liftIO $ IO.modifyIORef ref (MapF.insert e (PPa.LiftF Nothing)) + return $ AbsUnconstrained (W4.exprType e) + + dummy :: GetAbsRange sym m + dummy = GetAbsRange $ TFC.traverseFC dummy1 + _ <- f dummy + m <- IO.liftIO $ IO.readIORef ref + Some ctx <- return $ Ctx.fromList (MapF.keys m) + ctx' <- getAbsRange mkabs ctx + + to_exprs1 <- Ctx.traverseWithIndex (\idx v -> return $ Product.Pair v (ctx' Ctx.! idx)) ctx + let + to_exprs :: MapF.MapF (W4.SymExpr sym) AbsRange + to_exprs = MapF.fromList $ TFC.toListFC (\(Product.Pair v a) -> MapF.Pair v a) to_exprs1 + + cached1 :: forall tp. W4.SymExpr sym tp -> m (AbsRange tp) + cached1 e = case MapF.lookup e to_exprs of + Just a -> return a + Nothing -> getAbsRange1 mkabs e + + cached :: GetAbsRange sym m + cached = GetAbsRange $ TFC.traverseFC cached1 + + f cached diff --git a/src/Pate/Verification/Widening.hs b/src/Pate/Verification/Widening.hs index 3f293255..0df285a1 100644 --- a/src/Pate/Verification/Widening.hs +++ b/src/Pate/Verification/Widening.hs @@ -129,6 +129,7 @@ import qualified Lang.Crucible.Utils.MuxTree as MT import Pate.Verification.Domain (universalDomain) import qualified Data.Parameterized.TraversableF as TF import qualified Data.IORef as IO +import qualified Data.Parameterized.TraversableFC as TFC -- | Generate a fresh abstract domain value for the given graph node. -- This should represent the most information we can ever possibly @@ -1647,37 +1648,16 @@ getInitalAbsDomainVals :: PAD.AbstractDomain sym arch v {- ^ incoming pre-domain -} -> EquivM sym arch (PPa.PatchPair (PAD.AbstractDomainVals sym arch)) getInitalAbsDomainVals bundle preDom = withTracing @"debug" "getInitalAbsDomainVals" $ withSym $ \sym -> do - PEM.SymExprMappable asEM <- return $ PEM.symExprMappable sym - - (ref :: IO.IORef (MapF.MapF (W4.SymExpr sym) (PAD.AbsRange))) <- IO.liftIO $ IO.newIORef MapF.empty - - - let - getConcreteRange :: forall tp. W4.SymExpr sym tp -> EquivM_ sym arch (PAD.AbsRange tp) - getConcreteRange e = do - m <- (IO.liftIO $ IO.readIORef ref) - case MapF.lookup e m of - Just a -> return a - Nothing -> do - -- e' <- asEM @tp $ applyCurrentAsms e - e'' <- concretizeWithSolver e - emitTraceLabel @"expr" "output" (Some e'') - let a = PAD.extractAbsRange sym e'' - IO.liftIO $ IO.modifyIORef ref (MapF.insert e a) - return a + getConcreteRange <- PAD.mkGetAbsRange (\es -> TFC.fmapFC (PAD.extractAbsRange sym) <$> concretizeWithSolverBatch es) + eqCtx <- equivalenceContext PPa.forBins $ \bin -> do out <- PPa.get bin (PS.simOut bundle) pre <- PPa.get bin (PAD.absDomVals preDom) - {- - _ <- PAD.initAbsDomainVals sym eqCtx collectExpr out pre - Some exprs <- (Ctx.fromList . Set.toList) <$> (IO.liftIO $ IO.readIORef exprsRef) - cm_empty <- emptyConcreteMap - (_, cm) <- concretizeWithSolverGen cm_empty exprs - PAD.initAbsDomainVals sym eqCtx (concFromCache cm) out pre - -} - PAD.initAbsDomainVals sym eqCtx getConcreteRange out pre + + PAD.batchGetAbsRange getConcreteRange $ \getConcreteRangeBatch -> + PAD.initAbsDomainVals sym eqCtx getConcreteRangeBatch out pre widenUsingCounterexample :: From fcb2f7cab1e2e2043f9059167c5fb66cd3d81001 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Thu, 24 Oct 2024 15:56:29 -0700 Subject: [PATCH 08/16] Experimenting with some limited parallelism in simple cases --- src/Pate/Monad.hs | 54 ++++++++++++++++++++++++- src/Pate/Verification/StrongestPosts.hs | 7 +++- src/Pate/Verification/Widening.hs | 2 +- 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/Pate/Monad.hs b/src/Pate/Monad.hs index 09318010..2426dd4e 100644 --- a/src/Pate/Monad.hs +++ b/src/Pate/Monad.hs @@ -106,7 +106,10 @@ module Pate.Monad , withSharedEnvEmit , module PME , atPriority, currentPriority, thisPriority - , concretizeWithSolverBatch + , concretizeWithSolverBatch + , withFreshSolver + , forkBins + , withForkedSolver ) where @@ -1611,3 +1614,52 @@ equivalenceContext = withValid $ do PA.SomeValidArch d <- CMR.asks envValidArch stackRegion <- CMR.asks (PMC.stackRegion . PME.envCtx) return $ PEq.EquivContext (PA.validArchDedicatedRegisters d) stackRegion (\x -> x) + + +-- | Run the given continuation with a fresh solver process (without affecting the +-- current solver process). The new process will inherit the current assumption +-- context. +withFreshSolver :: + EquivM_ sym arch a -> + EquivM sym arch a +withFreshSolver f = do + vcfg <- CMR.asks envConfig + let solver = PC.cfgSolver vcfg + PSo.Sym n sym _ <- CMR.asks envValidSym + asm <- currentAsm + PSo.withOnlineSolver solver Nothing sym $ \bak -> + CMR.local (\env -> env {envValidSym = PSo.Sym n sym bak, envCurrentFrame = mempty }) $ + withAssumptionSet asm $ f + +-- | Run the given computation in a forked thread with a fresh solver process. +-- This is non-blocking and the current solver process is left unmodified. +withForkedSolver :: + EquivM_ sym arch () -> + EquivM sym arch () +withForkedSolver f = do + thistid <- IO.liftIO $ IO.myThreadId + _ <- IO.withRunInIO $ \runInIO -> + IO.forkFinally (runInIO $ withFreshSolver f) + (\case Left err -> IO.throwTo thistid err; Right () -> return ()) + return () + +-- | Similar to 'forBins' but runs the 'Patched' process in a separate thread +-- concurrently with a fresh solver process. +forkBins :: + (forall bin. PBi.WhichBinaryRepr bin -> EquivM_ sym arch (f bin)) -> + EquivM sym arch (PPa.PatchPair f) +forkBins f = do + (resO, resP) <- do + thistid <- IO.liftIO $ IO.myThreadId + outVar2 <- IO.liftIO $ IO.newEmptyMVar + _ <- IO.withRunInIO $ \runInIO -> + IO.forkFinally (runInIO $ withFreshSolver $ (PPa.catchPairErr (Just <$> f PBi.PatchedRepr) (return Nothing))) + (\case Left err -> IO.throwTo thistid err; Right a -> IO.putMVar outVar2 a) + resO <- PPa.catchPairErr (Just <$> f PBi.OriginalRepr) (return Nothing) + resP <- IO.liftIO $ IO.readMVar outVar2 + return (resO, resP) + case (resO, resP) of + (Just vO, Just vP) -> return $ PPa.PatchPair vO vP + (Nothing, Just vP) -> return $ PPa.PatchPairPatched vP + (Just vO, Nothing) -> return $ PPa.PatchPairOriginal vO + (Nothing, Nothing) -> PPa.throwPairErr diff --git a/src/Pate/Verification/StrongestPosts.hs b/src/Pate/Verification/StrongestPosts.hs index 8197d59d..530e79c0 100644 --- a/src/Pate/Verification/StrongestPosts.hs +++ b/src/Pate/Verification/StrongestPosts.hs @@ -1319,8 +1319,11 @@ getTracesForPred :: EquivM sym arch (Maybe (CE.TraceEvents sym arch), Maybe (CE.TraceEvents sym arch)) getTracesForPred scope bundle dom p = withSym $ \sym -> do not_p <- liftIO $ W4.notPred sym p - p_pretty <- PSi.applySimpStrategy PSi.prettySimplifier p - withTracing @"expr" (Some p_pretty) $ do + -- p_pretty <- withTracing @"debug" "simplifier" $ PSi.applySimpStrategy PSi.prettySimplifier p + withTracing @"message" "Traces" $ do + withTracing @"message" "Condition" $ withForkedSolver $ do + p_pretty <- withTracing @"debug" "simplifier" $ PSi.applySimpStrategy PSi.prettySimplifier p + emitTrace @"expr" (Some p_pretty) mtraceT <- withTracing @"message" "With condition assumed" $ withSatAssumption (PAS.fromPred p) $ do traceT <- getSomeGroundTrace scope bundle dom Nothing diff --git a/src/Pate/Verification/Widening.hs b/src/Pate/Verification/Widening.hs index 0df285a1..10b48c94 100644 --- a/src/Pate/Verification/Widening.hs +++ b/src/Pate/Verification/Widening.hs @@ -1652,7 +1652,7 @@ getInitalAbsDomainVals bundle preDom = withTracing @"debug" "getInitalAbsDomainV getConcreteRange <- PAD.mkGetAbsRange (\es -> TFC.fmapFC (PAD.extractAbsRange sym) <$> concretizeWithSolverBatch es) eqCtx <- equivalenceContext - PPa.forBins $ \bin -> do + forkBins $ \bin -> do out <- PPa.get bin (PS.simOut bundle) pre <- PPa.get bin (PAD.absDomVals preDom) From 49c7eeb9129c5d890d4ba1595d4d822a94027b7e Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 25 Oct 2024 09:28:54 -0700 Subject: [PATCH 09/16] fixup packet integration test --- src/Pate/Verification/StrongestPosts.hs | 5 ++--- tests/integration/packet/packet.exp | 6 ++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Pate/Verification/StrongestPosts.hs b/src/Pate/Verification/StrongestPosts.hs index 530e79c0..7d790755 100644 --- a/src/Pate/Verification/StrongestPosts.hs +++ b/src/Pate/Verification/StrongestPosts.hs @@ -1319,9 +1319,8 @@ getTracesForPred :: EquivM sym arch (Maybe (CE.TraceEvents sym arch), Maybe (CE.TraceEvents sym arch)) getTracesForPred scope bundle dom p = withSym $ \sym -> do not_p <- liftIO $ W4.notPred sym p - -- p_pretty <- withTracing @"debug" "simplifier" $ PSi.applySimpStrategy PSi.prettySimplifier p - withTracing @"message" "Traces" $ do - withTracing @"message" "Condition" $ withForkedSolver $ do + withTracing @"expr" (Some p) $ do + withTracing @"message" "Simplified Condition" $ withForkedSolver $ do p_pretty <- withTracing @"debug" "simplifier" $ PSi.applySimpStrategy PSi.prettySimplifier p emitTrace @"expr" (Some p_pretty) mtraceT <- withTracing @"message" "With condition assumed" $ diff --git a/tests/integration/packet/packet.exp b/tests/integration/packet/packet.exp index 8dd9ae51..6b777071 100755 --- a/tests/integration/packet/packet.exp +++ b/tests/integration/packet/packet.exp @@ -93,6 +93,12 @@ expect -re {0: let -- segment1\+0x664.. in not v(\d)+} expect ">" send "0\r" +expect "0: Simplified Condition" +send "0\r" + +expect -re {0: let -- segment1\+0x664.. in not v(\d)+} +send "0\r" + expect -re {v(\d)+ = eq 0x80:\[8\] \(select \(select cInitMemBytes@(\d)+:a 0\) 0x11045:\[32\]\)} expect -re {in not v((\d)+)} expect ">" From 7500e0cb592267bcbb25f15a20b3122529386363 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 25 Oct 2024 09:51:19 -0700 Subject: [PATCH 10/16] fix bug where nodes were not outputted during 'wait' unless profiling was enabled --- tools/pate-repl/Repl.hs | 2 ++ tools/pate/Output.hs | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/pate-repl/Repl.hs b/tools/pate-repl/Repl.hs index 309e72a2..9a48668b 100644 --- a/tools/pate-repl/Repl.hs +++ b/tools/pate-repl/Repl.hs @@ -477,6 +477,7 @@ prettyNextNodes startAt onlyFinished = do , PO.outIndent = indent , PO.outPP = pp , PO.outDuration = duration + , PO.outFinished = isFinished b , PO.outSuffix = suf , PO.outMoreResults = nesting < 0 , PO.outJSON = json @@ -491,6 +492,7 @@ prettyNextNodes startAt onlyFinished = do , PO.outIndent = 0 , PO.outPP = "" , PO.outDuration = duration + , PO.outFinished = isFinished b , PO.outSuffix = Nothing , PO.outMoreResults = nesting < 0 , PO.outJSON = json diff --git a/tools/pate/Output.hs b/tools/pate/Output.hs index 08320f80..3f01e2ee 100644 --- a/tools/pate/Output.hs +++ b/tools/pate/Output.hs @@ -44,6 +44,7 @@ data OutputElem = OutputElem { outIdx :: Int , outIndent :: Int + , outFinished :: Bool , outDuration :: Maybe (PP.Doc ()) , outPP :: PP.Doc () , outJSON :: JSON.Value @@ -52,9 +53,6 @@ data OutputElem = -- ^ more results at this nesting level that were omitted } -outFinished :: OutputElem -> Bool -outFinished e = isJust (outDuration e) - data Output_ = OutputElemList [OutputElem] | OutputInfo (PP.Doc ()) From 6ecb8823066bef2a18f316e53d8a3f18220859d7 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 25 Oct 2024 15:54:48 -0700 Subject: [PATCH 11/16] initAbsDomainVals: handle case where a cell has multiple writes, with only some concrete this relaxes the heuristic slightly to ensure that we attempt to concretize the contents of a memory cell if it was ever written to with a concrete value. Previously we attempted to compute a single abstract domain for the entire cell, which is not always possible. --- src/Pate/Verification/AbstractDomain.hs | 37 +++++++++++++++++-------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/Pate/Verification/AbstractDomain.hs b/src/Pate/Verification/AbstractDomain.hs index 56f8dddd..70e57edc 100644 --- a/src/Pate/Verification/AbstractDomain.hs +++ b/src/Pate/Verification/AbstractDomain.hs @@ -451,8 +451,19 @@ joinMemCellAbsValues = MapF.mergeWithKey id id -collapseMemCellVals :: W4.IsSymExprBuilder sym => [MapF.Pair (PMC.MemCell sym arch) (MemAbstractValue s)] -> MapF.MapF (PMC.MemCell sym arch) (MemAbstractValue s) -collapseMemCellVals = foldr (\(MapF.Pair k v) m -> joinMemCellAbsValues (MapF.singleton k v) m) MapF.empty +-- Track if we ever write a concrete value to a given cell +collapseMemCellVals :: + forall sym arch. + W4.IsSymExprBuilder sym => + [MapF.Pair (PMC.MemCell sym arch) (MemAbstractValue sym)] -> + MapF.MapF (PMC.MemCell sym arch) (Const (Bool, Bool)) +collapseMemCellVals = foldr go MapF.empty + where + go :: MapF.Pair (PMC.MemCell sym arch) (MemAbstractValue s) -> MapF.MapF (PMC.MemCell sym arch) (Const (Bool, Bool)) -> MapF.MapF (PMC.MemCell sym arch) (Const (Bool, Bool)) + go (MapF.Pair k (MemAbstractValue (MacawAbstractValue (Ctx.Empty Ctx.:> regAbs Ctx.:> offsetAbs)))) m = + MapF.insertWith (\(Const (a,b)) (Const (c, d)) -> Const (a || c, b || d)) k + (Const (case regAbs of AbsUnconstrained{} -> False; _ -> True, case offsetAbs of AbsUnconstrained{} -> False; _ -> True)) m + -- | From the result of symbolic execution (in 'PS.SimOutput') we extract any abstract domain -- information that we can establish for the registers, memory reads and memory writes. @@ -491,7 +502,8 @@ initAbsDomainVals sym eqCtx getAbs stOut preVals = do let cell = PMC.MemCell ptr w end subTrace (PL.SomeLocation (PL.Cell cell)) $ getMemOpAbsVal mop - memTraceVals <- subTree @"loc" "Initial Domain 4" $ MapF.traverseWithKey (\cell mv -> subTrace (PL.SomeLocation (PL.Cell cell)) $ getCellWithPrevAbsVal cell mv) memTracePrevVals + memTraceVals <- subTree @"loc" "Initial Domain 4" $ + MapF.traverseWithKey (\cell (Const (b1, b2)) -> subTrace (PL.SomeLocation (PL.Cell cell)) $ getCellWithPrevAbsVal cell b1 b2) memTracePrevVals let memVals = joinMemCellAbsValues (MapF.fromList memPreVals) memTraceVals @@ -543,20 +555,21 @@ initAbsDomainVals sym eqCtx getAbs stOut preVals = do -- extract a final abstract value from a first-pass check getCellWithPrevAbsVal :: PMC.MemCell sym arch w -> - MemAbstractValue sym w -> + Bool -> + Bool -> m (MemAbstractValue sym w) - getCellWithPrevAbsVal cell (MemAbstractValue (MacawAbstractValue (Ctx.Empty Ctx.:> regAbs Ctx.:> offsetAbs))) = do + getCellWithPrevAbsVal cell regMaybeConcrete offMaybeConcrete = do CLM.LLVMPointer mem_region mem_offset <- IO.liftIO $ PMC.readMemCell sym (PS.simOutMem stOut) cell -- we avoid checking the actual memory model for concrete values if we didn't originally -- write/read a concrete value, since it's unlikely to succeed (outside of strange memory aliasing cases) -- since this is an under-approximation it's safe to just leave them as unconstrained - memRegAbs <- case regAbs of - AbsUnconstrained tp -> return $ AbsUnconstrained tp - _ -> f (W4.natToIntegerPure mem_region) - - memOffsetAbs <- case offsetAbs of - AbsUnconstrained tp -> return $ AbsUnconstrained tp - _ -> f mem_offset + memRegAbs <- case regMaybeConcrete of + False -> return $ AbsUnconstrained W4.BaseIntegerRepr + True -> f (W4.natToIntegerPure mem_region) + + memOffsetAbs <- case offMaybeConcrete of + False -> return $ AbsUnconstrained (W4.exprType mem_offset) + True -> f mem_offset return $ MemAbstractValue $ MacawAbstractValue (Ctx.Empty Ctx.:> memRegAbs Ctx.:> memOffsetAbs) From 65a18a95e154043b01aec1d17f275fc4096649dd Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 25 Oct 2024 16:29:56 -0700 Subject: [PATCH 12/16] disable simplifcation of 'SimBundle' nothing actually depends on this, for the most part it just wastes time --- src/Pate/Verification/StrongestPosts.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Pate/Verification/StrongestPosts.hs b/src/Pate/Verification/StrongestPosts.hs index 7d790755..d528eb41 100644 --- a/src/Pate/Verification/StrongestPosts.hs +++ b/src/Pate/Verification/StrongestPosts.hs @@ -1435,7 +1435,8 @@ withSimBundle :: EquivM sym arch a withSimBundle pg vars node f = do bundle0 <- mkSimBundle pg node vars - bundle1 <- PSi.applySimpStrategy PSi.coreStrategy bundle0 + let bundle1 = bundle0 + --bundle1 <- withTracing @"debug" "simpBundle" $ PSi.applySimpStrategy PSi.coreStrategy bundle0 bundle <- applyCurrentAsms bundle1 emitTrace @"bundle" (Some bundle) f bundle From d19920a88bacdc3adbb96dfa36d8b66cb2fbaf62 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 25 Oct 2024 16:32:01 -0700 Subject: [PATCH 13/16] allow forked solver instances to share the sat/unsat cache a forked instance of the solver has the same assumption context, and so any results about satisfiability/unsatisfiability can be re-used by other threads --- src/Pate/Monad.hs | 31 ++++++++++++++++++++----- src/Pate/Verification/StrongestPosts.hs | 2 +- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/Pate/Monad.hs b/src/Pate/Monad.hs index 2426dd4e..d7d40047 100644 --- a/src/Pate/Monad.hs +++ b/src/Pate/Monad.hs @@ -110,6 +110,7 @@ module Pate.Monad , withFreshSolver , forkBins , withForkedSolver + , withForkedSolver_ ) where @@ -1627,22 +1628,40 @@ withFreshSolver f = do let solver = PC.cfgSolver vcfg PSo.Sym n sym _ <- CMR.asks envValidSym asm <- currentAsm - PSo.withOnlineSolver solver Nothing sym $ \bak -> + PSo.withOnlineSolver solver Nothing sym $ \bak -> do + unsatCacheRef <- asks envUnsatCacheRef + satCache <- asks envSatCacheRef + CMR.local (\env -> env {envValidSym = PSo.Sym n sym bak, envCurrentFrame = mempty }) $ - withAssumptionSet asm $ f + withAssumptionSet asm $ CMR.local (\env -> env { envUnsatCacheRef = unsatCacheRef, envSatCacheRef = satCache }) f --- | Run the given computation in a forked thread with a fresh solver process. --- This is non-blocking and the current solver process is left unmodified. -withForkedSolver :: + +-- | Similar to 'withForkedSolver' but does not return a result. +withForkedSolver_ :: EquivM_ sym arch () -> EquivM sym arch () -withForkedSolver f = do +withForkedSolver_ f = do thistid <- IO.liftIO $ IO.myThreadId _ <- IO.withRunInIO $ \runInIO -> IO.forkFinally (runInIO $ withFreshSolver f) (\case Left err -> IO.throwTo thistid err; Right () -> return ()) return () +-- | Run the given function in a forked thread with a fresh solver process. +-- This is non-blocking and the current solver process is left unmodified. +-- The returned IO action will block until the given function has finished. +withForkedSolver :: + EquivM_ sym arch a -> + EquivM sym arch (IO a) +withForkedSolver f = do + thistid <- IO.liftIO $ IO.myThreadId + outVar <- IO.liftIO $ IO.newEmptyMVar + _ <- IO.withRunInIO $ \runInIO -> + IO.forkFinally (runInIO $ withFreshSolver f) + (\case Left err -> IO.throwTo thistid err; Right a -> IO.putMVar outVar a) + return $ IO.readMVar outVar + + -- | Similar to 'forBins' but runs the 'Patched' process in a separate thread -- concurrently with a fresh solver process. forkBins :: diff --git a/src/Pate/Verification/StrongestPosts.hs b/src/Pate/Verification/StrongestPosts.hs index d528eb41..837a203e 100644 --- a/src/Pate/Verification/StrongestPosts.hs +++ b/src/Pate/Verification/StrongestPosts.hs @@ -1320,7 +1320,7 @@ getTracesForPred :: getTracesForPred scope bundle dom p = withSym $ \sym -> do not_p <- liftIO $ W4.notPred sym p withTracing @"expr" (Some p) $ do - withTracing @"message" "Simplified Condition" $ withForkedSolver $ do + withTracing @"message" "Simplified Condition" $ withForkedSolver_ $ do p_pretty <- withTracing @"debug" "simplifier" $ PSi.applySimpStrategy PSi.prettySimplifier p emitTrace @"expr" (Some p_pretty) mtraceT <- withTracing @"message" "With condition assumed" $ From b7c0b16e9a21927fc9ad6c35ee13151ae83476f3 Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Fri, 25 Oct 2024 16:35:43 -0700 Subject: [PATCH 14/16] move 'asScopedConst' to last rescoping strategy in practice this turns out to be the most expensive strategy to try in cases where it doesn't apply --- src/Pate/Verification/Widening.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Pate/Verification/Widening.hs b/src/Pate/Verification/Widening.hs index 10b48c94..b434f87b 100644 --- a/src/Pate/Verification/Widening.hs +++ b/src/Pate/Verification/Widening.hs @@ -1236,9 +1236,9 @@ abstractOverVars scope_pre bundle _from _to postSpec postResult = do , ("simpleStackOffsetO", simpleStackOffset PBi.OriginalRepr se) , ("simpleStackOffsetP", simpleStackOffset PBi.PatchedRepr se) -- solver-based strategies now - , ("asScopedConst", asScopedConst (W4.truePred sym) se) , ("asSimpleAssign", asSimpleAssign se) ] ++ asStackOffsetStrats + ++ [ ("asScopedConst", asScopedConst (W4.truePred sym) se) ] lift $ emitEvent (PE.ScopeAbstractionResult (PS.simPair bundle) se se') return se' From 97d1c09969917524328d36ca2ff4ebe072d34a2e Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Mon, 28 Oct 2024 11:02:28 -0700 Subject: [PATCH 15/16] use Crucible's assumption/frames to initialize forked solver state the assumption state in 'envCurrentFrame' is not 100% complete, as it is missing bounds assumptions for bounded constants --- src/Pate/Monad.hs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Pate/Monad.hs b/src/Pate/Monad.hs index d7d40047..013c3956 100644 --- a/src/Pate/Monad.hs +++ b/src/Pate/Monad.hs @@ -1627,14 +1627,11 @@ withFreshSolver f = do vcfg <- CMR.asks envConfig let solver = PC.cfgSolver vcfg PSo.Sym n sym _ <- CMR.asks envValidSym - asm <- currentAsm + st <- withOnlineBackend $ \bak -> safeIO PEE.SolverError $ LCB.saveAssumptionState bak PSo.withOnlineSolver solver Nothing sym $ \bak -> do - unsatCacheRef <- asks envUnsatCacheRef - satCache <- asks envSatCacheRef - - CMR.local (\env -> env {envValidSym = PSo.Sym n sym bak, envCurrentFrame = mempty }) $ - withAssumptionSet asm $ CMR.local (\env -> env { envUnsatCacheRef = unsatCacheRef, envSatCacheRef = satCache }) f - + CMR.local (\env -> env {envValidSym = PSo.Sym n sym bak }) $ do + safeIO PEE.SolverError $ LCBO.restoreSolverState bak st + f -- | Similar to 'withForkedSolver' but does not return a result. withForkedSolver_ :: From c923ac3e0a6747a9ba6ab456239ea06ff6edc74b Mon Sep 17 00:00:00 2001 From: Daniel Matichuk Date: Mon, 28 Oct 2024 14:28:40 -0700 Subject: [PATCH 16/16] concretize: add explicit positivity assumptions for integers this is a workaround for cases where what4 fails to introduce this assumption itself --- src/Pate/Equivalence/Error.hs | 1 + src/Pate/Monad.hs | 85 +++++++++++++++++++++++------------ 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/src/Pate/Equivalence/Error.hs b/src/Pate/Equivalence/Error.hs index f82d89d0..21966595 100644 --- a/src/Pate/Equivalence/Error.hs +++ b/src/Pate/Equivalence/Error.hs @@ -172,6 +172,7 @@ data InnerEquivalenceError arch | forall e. X.Exception e => UnhandledException e | IncompatibleSingletonNodes (PB.ConcreteBlock arch PBi.Original) (PB.ConcreteBlock arch PBi.Patched) | SolverError X.SomeException + | ConcretizationFailure String errShortName :: MS.SymArchConstraints arch => InnerEquivalenceError arch -> String errShortName = \case diff --git a/src/Pate/Monad.hs b/src/Pate/Monad.hs index 013c3956..a221f9db 100644 --- a/src/Pate/Monad.hs +++ b/src/Pate/Monad.hs @@ -212,6 +212,7 @@ import qualified Data.Aeson as JSON import qualified Data.Aeson.Text as JSON import qualified GHC.IO.Unsafe as IO import qualified Data.Parameterized.TraversableFC as TFC +import qualified What4.Utils.AbstractDomains as W4AD atPriority :: NodePriority -> @@ -1102,50 +1103,75 @@ concretizeWithSolverGen' cm_outer a = withSym $ \sym -> do -- first time seeing this value, insert concretized version into cache Nothing -> do p' <- IO.liftIO $ W4.andPred sym p e_is_conc1 + emitTraceLabel @"expr" "p'" (Some p') return $ ConcreteMap (MapF.insert e (PPa.LiftF (Just conc1)) m) p' loop :: W4.Pred sym -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym, Bool) loop not_prev_model (ConcreteMap cm p) = do - mresult <- heuristicSat "concretize" not_prev_model $ \case - W4R.Sat evalFn -> + mresult <- withTracing @"debug" "loop" $ heuristicSat "concretize" not_prev_model $ \case + W4R.Sat evalFn -> do + emitTrace @"debug" "Sat" + emitTrace @"expr" (Some not_prev_model) W4R.Sat <$> TFC.foldrMFC (go evalFn) (ConcreteMap cm (W4.truePred sym)) a W4R.Unsat () -> return $ W4R.Unsat () W4R.Unknown -> return $ W4R.Unknown case mresult of W4R.Sat (ConcreteMap cm' p') -> do not_this <- IO.liftIO $ W4.notPred sym p' - withAssumption not_prev_model $ do - loop not_this (ConcreteMap cm' (W4.truePred sym)) + case W4.asConstantPred not_this of + Just True -> throwHere $ PEE.ConcretizationFailure "Previous computed model is unsatisfiable" + _ -> withAssumption not_prev_model $ do + loop not_this (ConcreteMap cm' (W4.truePred sym)) -- predicate is unsatisfiable, so there are no models that disprove all of the -- current cached values W4R.Unsat () -> return $ (ConcreteMap cm p, True) W4R.Unknown -> return (ConcreteMap cm p, False) + + -- this is a workaround for the fact that we're manually setting the + -- abstract domains for any integers coming out of the memory model to be positive. + -- What4's bookkeeping sometimes neglects to introduce this assumption into the solver state, so + -- we re-add it here explicitly. + explicit_nat_asm :: forall tp. W4.SymExpr sym tp -> W4.Pred sym -> EquivM_ sym arch (W4.Pred sym) + explicit_nat_asm e p = do + case W4.exprType e of + W4.BaseIntegerRepr -> case W4AD.rangeLowBound (W4.integerBounds e) of + W4AD.Inclusive i -> do + i_sym <- IO.liftIO $ W4.intLit sym i + -- we add an annotation to prevent the expression builder simplification rules from turning this + -- into just 'true' if there is a cached result for this comparison + (_, e') <- IO.liftIO $ W4.annotateTerm sym (W4.unsafeSetAbstractValue W4AD.unboundedRange e) + p' <- IO.liftIO $ W4.intLe sym i_sym e' + IO.liftIO $ W4.andPred sym p p' + _ -> return p + _ -> return p + + nat_asms <- TFC.foldrMFC explicit_nat_asm (W4.truePred sym) a + withAssumption nat_asms $ do + (cm, complete) <- loop (W4.truePred sym) cm_outer + (ConcreteMap cm_ _) <- case complete of + True -> return cm + False -> do + -- finish the 'ConcreteMap' by double checking each map + let + upd :: forall tp. W4.SymExpr sym tp -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym) + upd e cm_ = concretizeWithMap cm_ e >>= \case + Just e' -> case W4.asConcrete e' of + -- e is possibly concrete, double check it + Just{} -> snd <$> concretizeOne e cm_ + -- e is known-symbolic, nothing to do + Nothing -> return cm_ + -- e is missing from map entirely, so check it here + Nothing -> snd <$> concretizeOne e cm_ + TFC.foldrMFC' upd cm a + let cm' = ConcreteMap cm_ (W4.truePred sym) + let + conc :: forall tp. W4.SymExpr sym tp -> EquivM_ sym arch (W4.SymExpr sym tp) + conc e = fromMaybe e <$> concretizeWithMap cm' e - (cm, complete) <- loop (W4.truePred sym) cm_outer - (ConcreteMap cm_ _) <- case complete of - True -> return cm - False -> do - -- finish the 'ConcreteMap' by double checking each map - let - upd :: forall tp. W4.SymExpr sym tp -> ConcreteMap sym -> EquivM_ sym arch (ConcreteMap sym) - upd e cm_ = concretizeWithMap cm_ e >>= \case - Just e' -> case W4.asConcrete e' of - -- e is possibly concrete, double check it - Just{} -> snd <$> concretizeOne e cm_ - -- e is known-symbolic, nothing to do - Nothing -> return cm_ - -- e is missing from map entirely, so check it here - Nothing -> snd <$> concretizeOne e cm_ - TFC.foldrMFC' upd cm a - let cm' = ConcreteMap cm_ (W4.truePred sym) - let - conc :: forall tp. W4.SymExpr sym tp -> EquivM_ sym arch (W4.SymExpr sym tp) - conc e = fromMaybe e <$> concretizeWithMap cm' e - - a' <- TFC.traverseFC conc a - return (a', cm') + a' <- TFC.traverseFC conc a + return (a', cm') -- | Similar to 'concretizeWithSolver' but uses a widening strategy to -- attempt to concretize all of the given expressions simultaneously. @@ -1630,7 +1656,10 @@ withFreshSolver f = do st <- withOnlineBackend $ \bak -> safeIO PEE.SolverError $ LCB.saveAssumptionState bak PSo.withOnlineSolver solver Nothing sym $ \bak -> do CMR.local (\env -> env {envValidSym = PSo.Sym n sym bak }) $ do - safeIO PEE.SolverError $ LCBO.restoreSolverState bak st + -- we need to ensure that the solver process is started, since otherwise + -- restoreSolverState does nothing + withSolverProcess $ \_sp -> + safeIO PEE.SolverError $ LCBO.restoreSolverState bak st f -- | Similar to 'withForkedSolver' but does not return a result.