From 6792d204101fc811ff192e8e2613eabcee362049 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Fri, 13 Sep 2024 14:37:24 -0700 Subject: [PATCH 01/13] start --- ailment/__init__.py | 9 +- ailment/block.py | 2 +- ailment/converter_vex.py | 10 +- ailment/expression.py | 261 ++------------------------------------- ailment/statement.py | 6 +- 5 files changed, 25 insertions(+), 263 deletions(-) diff --git a/ailment/__init__.py b/ailment/__init__.py index 73fdf4e..5a9ed66 100644 --- a/ailment/__init__.py +++ b/ailment/__init__.py @@ -5,7 +5,7 @@ from .block import Block from . import statement as Stmt from . import expression as Expr -from .statement import Assignment +from .statement import Assignment, Statement from .expression import Expression, Const, Tmp, Register, UnaryOp, BinaryOp from .converter_common import Converter from .manager import Manager @@ -24,6 +24,7 @@ except ImportError as e: log.debug("Could not import VEXIRSBConverter") log.debug(e) + VEXIRSBConverter = None try: from .converter_pcode import PCodeIRSBConverter @@ -33,6 +34,7 @@ except ImportError as e: log.debug("Could not import PCodeIRSBConverter") log.debug(e) + PCodeIRSBConverter = None class IRSBConverter(Converter): @@ -59,6 +61,7 @@ def convert(irsb, manager): # pylint:disable=arguments-differ "Block", "Stmt", "Expr", + "Statement", "Assignment", "Expression", "Const", @@ -70,6 +73,6 @@ def convert(irsb, manager): # pylint:disable=arguments-differ "IRSBConverter", "AILBlockWalkerBase", "AILBlockWalker", - *(["PCodeIRSBConverter"] if "pcode" in available_converters else []), - *(["VEXIRSBConverter"] if "vex" in available_converters else []), + "PCodeIRSBConverter", + "VEXIRSBConverter", ] diff --git a/ailment/block.py b/ailment/block.py index 892ad56..a5fc57e 100644 --- a/ailment/block.py +++ b/ailment/block.py @@ -16,7 +16,7 @@ class Block: "idx", ) - def __init__(self, addr, original_size, statements=None, idx=None): + def __init__(self, addr: int, original_size, statements=None, idx=None): self.addr = addr self.original_size = original_size self.statements: list["Statement"] = [] if statements is None else statements diff --git a/ailment/converter_vex.py b/ailment/converter_vex.py index d20573b..1aa9511 100644 --- a/ailment/converter_vex.py +++ b/ailment/converter_vex.py @@ -332,15 +332,7 @@ def Triop(expr, manager): bits=bits, ) - return TernaryOp( - manager.next_atom(), - op_name, - operands, - ins_addr=manager.ins_addr, - vex_block_addr=manager.block_addr, - vex_stmt_idx=manager.vex_stmt_idx, - bits=bits, - ) + raise TypeError("Please figure out what kind of operation this is (smart money says fused multiply) and convert it into multiple binops") @staticmethod def Const(expr, manager): diff --git a/ailment/expression.py b/ailment/expression.py index 4feefa1..1d86243 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -10,7 +10,7 @@ claripy = None from .tagged_object import TaggedObject -from .utils import get_bits, stable_hash, is_none_or_likeable, is_none_or_matchable +from .utils import get_bits, stable_hash, is_none_or_likeable if TYPE_CHECKING: from .statement import Statement @@ -44,9 +44,6 @@ def __eq__(self, other): def likes(self, atom): # pylint:disable=unused-argument,no-self-use raise NotImplementedError() - def matches(self, atom): # pylint:disable=unused-argument,no-self-use - return NotImplementedError() - def replace(self, old_expr, new_expr): if self is old_expr: r = True @@ -118,7 +115,6 @@ def likes(self, other): and self.bits == other.bits ) - matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -157,7 +153,6 @@ def __str__(self): def likes(self, other): return type(self) is type(other) and self.tmp_idx == other.tmp_idx and self.bits == other.bits - matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -186,8 +181,6 @@ def size(self): def likes(self, atom): return type(self) is type(atom) and self.reg_offset == atom.reg_offset and self.bits == atom.bits - matches = likes - def __repr__(self): return str(self) @@ -287,14 +280,6 @@ def likes(self, atom): and self.oident == atom.oident ) - def matches(self, atom): - return ( - isinstance(atom, VirtualVariable) - and self.bits == atom.bits - and self.category == atom.category - and self.oident == atom.oident - ) - def __repr__(self): ori_str = "" match self.category: @@ -361,29 +346,6 @@ def likes(self, atom) -> bool: return self_src_and_vvarids == other_src_and_vvarids return False - def matches(self, atom) -> bool: - if isinstance(atom, Phi) and self.bits == atom.bits: - if len(self.src_and_vvars) != len(atom.src_and_vvars): - return False - self_src_and_vvars = dict(self.src_and_vvars) - other_src_and_vvars = dict(atom.src_and_vvars) - for src, self_vvar in self_src_and_vvars.items(): - if src not in other_src_and_vvars: - return False - other_vvar = other_src_and_vvars[src] - if self_vvar is None and other_vvar is None: - continue - if ( - self_vvar is None - and other_vvar is not None - or self_vvar is not None - and other_vvar is None - or not self_vvar.matches(other_vvar) - ): - return False - return True - return False - def __repr__(self): return f"𝜙@{self.bits}b {self.src_and_vvars}" @@ -454,11 +416,11 @@ class UnaryOp(Op): "variable_offset", ) - def __init__(self, idx, op, operand, variable=None, variable_offset=None, bits: int | None = None, **kwargs): + def __init__(self, idx, op, operand, variable=None, variable_offset=None, **kwargs): super().__init__(idx, (operand.depth if isinstance(operand, Expression) else 0) + 1, op, **kwargs) self.operand = operand - self.bits = operand.bits if bits is None else bits + self.bits = operand.bits self.variable = variable self.variable_offset = variable_offset @@ -470,18 +432,7 @@ def __repr__(self): def likes(self, other): return ( - type(other) is UnaryOp - and self.op == other.op - and self.bits == other.bits - and self.operand.likes(other.operand) - ) - - def matches(self, atom): - return ( - type(atom) is UnaryOp - and self.op == atom.op - and self.bits == atom.bits - and self.operand.matches(atom.operand) + type(other) is UnaryOp and self.op == other.op and self.bits == other.bits and self.operand == other.operand ) __hash__ = TaggedObject.__hash__ @@ -497,7 +448,7 @@ def replace(self, old_expr, new_expr): r, replaced_operand = self.operand.replace(old_expr, new_expr) if r: - return True, UnaryOp(self.idx, self.op, replaced_operand, bits=self.bits, **self.tags) + return True, UnaryOp(self.idx, self.op, replaced_operand, **self.tags) else: return False, self @@ -511,13 +462,7 @@ def size(self): def copy(self) -> UnaryOp: return UnaryOp( - self.idx, - self.op, - self.operand, - variable=self.variable, - variable_offset=self.variable_offset, - bits=self.bits, - **self.tags, + self.idx, self.op, self.operand, variable=self.variable, variable_offset=self.variable_offset, **self.tags ) def has_atom(self, atom, identity=True): @@ -581,19 +526,6 @@ def likes(self, other): and self.rounding_mode == other.rounding_mode ) - def matches(self, other): - return ( - type(other) is Convert - and self.from_bits == other.from_bits - and self.to_bits == other.to_bits - and self.bits == other.bits - and self.is_signed == other.is_signed - and self.operand.matches(other.operand) - and self.from_type == other.from_type - and self.to_type == other.to_type - and self.rounding_mode == other.rounding_mode - ) - __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -689,17 +621,7 @@ def likes(self, other): and self.from_type == other.from_type and self.to_bits == other.to_bits and self.to_type == other.to_type - and self.operand.likes(other.operand) - ) - - def matches(self, other): - return ( - type(other) is Reinterpret - and self.from_bits == other.from_bits - and self.from_type == other.from_type - and self.to_bits == other.to_bits - and self.to_type == other.to_type - and self.operand.matches(other.operand) + and self.operand == other.operand ) __hash__ = TaggedObject.__hash__ @@ -892,17 +814,6 @@ def likes(self, other): and self.rounding_mode == other.rounding_mode ) - def matches(self, other): - return ( - type(other) is BinaryOp - and self.op == other.op - and self.bits == other.bits - and self.signed == other.signed - and is_none_or_matchable(self.operands, other.operands, is_list=True) - and self.floating_point == other.floating_point - and self.rounding_mode == other.rounding_mode - ) - __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -1002,117 +913,6 @@ def copy(self) -> BinaryOp: ) -class TernaryOp(Op): - OPSTR_MAP = {} - - __slots__ = ( - "operands", - "bits", - ) - - def __init__(self, idx, op, operands, bits=None, **kwargs): - depth = ( - max( - operands[0].depth if isinstance(operands[0], Expression) else 0, - operands[1].depth if isinstance(operands[1], Expression) else 0, - operands[2].depth if isinstance(operands[1], Expression) else 0, - ) - + 1 - ) - super().__init__(idx, depth, op, **kwargs) - - assert len(operands) == 3 - self.operands = operands - self.bits = bits - - def __str__(self): - return f"{self.verbose_op}({self.operands[0]}, {self.operands[1]}, {self.operands[2]})" - - def __repr__(self): - return f"{self.verbose_op}({self.operands[0]}, {self.operands[1]}, {self.operands[2]})" - - def likes(self, other): - return ( - type(other) is TernaryOp - and self.op == other.op - and self.bits == other.bits - and is_none_or_likeable(self.operands, other.operands, is_list=True) - ) - - def matches(self, other): - return ( - type(other) is TernaryOp - and self.op == other.op - and self.bits == other.bits - and is_none_or_matchable(self.operands, other.operands, is_list=True) - ) - - __hash__ = TaggedObject.__hash__ - - def _hash_core(self): - return stable_hash((self.op, tuple(self.operands), self.bits)) - - def has_atom(self, atom, identity=True): - if super().has_atom(atom, identity=identity): - return True - - for op in self.operands: - if identity and op == atom: - return True - if not identity and isinstance(op, Atom) and op.likes(atom): - return True - if isinstance(op, Atom) and op.has_atom(atom, identity=identity): - return True - return False - - def replace(self, old_expr, new_expr): - if self.operands[0] == old_expr: - r0 = True - replaced_operand_0 = new_expr - elif isinstance(self.operands[0], Expression): - r0, replaced_operand_0 = self.operands[0].replace(old_expr, new_expr) - else: - r0, replaced_operand_0 = False, None - - if self.operands[1] == old_expr: - r1 = True - replaced_operand_1 = new_expr - elif isinstance(self.operands[1], Expression): - r1, replaced_operand_1 = self.operands[1].replace(old_expr, new_expr) - else: - r1, replaced_operand_1 = False, None - - if self.operands[2] == old_expr: - r2 = True - replaced_operand_2 = new_expr - elif isinstance(self.operands[2], Expression): - r2, replaced_operand_2 = self.operands[2].replace(old_expr, new_expr) - else: - r2, replaced_operand_2 = False, None - - if r0 or r1 or r2: - return True, TernaryOp( - self.idx, - self.op, - [replaced_operand_0, replaced_operand_1, replaced_operand_2], - bits=self.bits, - **self.tags, - ) - else: - return False, self - - @property - def verbose_op(self): - return self.op - - @property - def size(self): - return self.bits // 8 - - def copy(self) -> TernaryOp: - return TernaryOp(self.idx, self.op, self.operands[::], bits=self.bits, **self.tags) - - class Load(Expression): __slots__ = ( "addr", @@ -1169,6 +969,7 @@ def replace(self, old_expr, new_expr): def _likes_addr(self, other_addr): if hasattr(self.addr, "likes") and hasattr(other_addr, "likes"): return self.addr.likes(other_addr) + return self.addr == other_addr def likes(self, other): @@ -1181,21 +982,6 @@ def likes(self, other): and self.alt == other.alt ) - def _matches_addr(self, other_addr): - if hasattr(self.addr, "matches") and hasattr(other_addr, "matches"): - return self.addr.matches(other_addr) - return self.addr == other_addr - - def matches(self, other): - return ( - type(other) is Load - and self._matches_addr(other.addr) - and self.size == other.size - and self.endness == other.endness - and self.guard == other.guard - and self.alt == other.alt - ) - __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -1252,18 +1038,9 @@ def __str__(self): def likes(self, atom): return ( type(atom) is ITE - and self.cond.likes(atom.cond) - and self.iffalse.likes(atom.iffalse) - and self.iftrue.likes(atom.iftrue) - and self.bits == atom.bits - ) - - def matches(self, atom): - return ( - type(atom) is ITE - and self.cond.matches(atom.cond) - and self.iffalse.matches(atom.iffalse) - and self.iftrue.matches(atom.iftrue) + and self.cond == atom.cond + and self.iffalse == atom.iffalse + and self.iftrue == atom.iftrue and self.bits == atom.bits ) @@ -1555,20 +1332,7 @@ def _hash_core(self): return stable_hash((MultiStatementExpression,) + tuple(self.stmts) + (self.expr,)) def likes(self, other): - return ( - type(self) is type(other) - and len(self.stmts) == len(other.stmts) - and all(s_stmt.likes(o_stmt) for s_stmt, o_stmt in zip(self.stmts, other.stmts)) - and self.expr.likes(other.expr) - ) - - def matches(self, atom): - return ( - type(self) is type(atom) - and len(self.stmts) == len(atom.stmts) - and all(s_stmt.matches(o_stmt) for s_stmt, o_stmt in zip(self.stmts, atom.stmts)) - and self.expr.matches(atom.expr) - ) + return type(self) is type(other) and self.stmts == other.stmts and self.expr == other.expr def __repr__(self): return f"MultiStatementExpression({self.stmts}, {self.expr})" @@ -1661,7 +1425,6 @@ def likes(self, other): and self.offset == other.offset ) - matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): diff --git a/ailment/statement.py b/ailment/statement.py index 4edb5d9..e6ed9aa 100644 --- a/ailment/statement.py +++ b/ailment/statement.py @@ -1,5 +1,6 @@ # pylint:disable=isinstance-second-argument-not-valid-type,no-self-use,arguments-renamed from typing import Optional, TYPE_CHECKING +from abc import ABC, abstractmethod try: import claripy @@ -14,19 +15,22 @@ from angr.calling_conventions import SimCC -class Statement(TaggedObject): +class Statement(TaggedObject, ABC): """ The base class of all AIL statements. """ __slots__ = () + @abstractmethod def __repr__(self): raise NotImplementedError() + @abstractmethod def __str__(self): raise NotImplementedError() + @abstractmethod def replace(self, old_expr, new_expr): raise NotImplementedError() From e361e3afe851c0e96fbb9f8c343533fa077e65b9 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Wed, 9 Oct 2024 15:33:47 -0700 Subject: [PATCH 02/13] continue --- ailment/__init__.py | 9 +- ailment/converter_vex.py | 28 +++- ailment/expression.py | 348 +++++++++++++++++++++++++++------------ ailment/statement.py | 26 ++- ailment/tagged_object.py | 4 +- ailment/utils.py | 27 ++- 6 files changed, 309 insertions(+), 133 deletions(-) diff --git a/ailment/__init__.py b/ailment/__init__.py index 5a9ed66..ee7edae 100644 --- a/ailment/__init__.py +++ b/ailment/__init__.py @@ -3,8 +3,8 @@ import logging from .block import Block -from . import statement as Stmt -from . import expression as Expr +from . import statement +from . import expression from .statement import Assignment, Statement from .expression import Expression, Const, Tmp, Register, UnaryOp, BinaryOp from .converter_common import Converter @@ -13,6 +13,9 @@ log = logging.getLogger(__name__) +# REALLY BAD +Expr = expression +Stmt = statement available_converters: set[str] = set() @@ -59,6 +62,8 @@ def convert(irsb, manager): # pylint:disable=arguments-differ __all__ = [ "available_converters", "Block", + "expression", + "statement", "Stmt", "Expr", "Statement", diff --git a/ailment/converter_vex.py b/ailment/converter_vex.py index 1aa9511..2bcbc26 100644 --- a/ailment/converter_vex.py +++ b/ailment/converter_vex.py @@ -20,7 +20,6 @@ ITE, Reinterpret, VEXCCallExpression, - TernaryOp, ) from .converter_common import SkipConversionNotice, Converter @@ -288,10 +287,30 @@ def Binop(expr, manager): bits = op._output_size_bits - extra_kwargs = {} if op_name == "DivMod": - extra_kwargs["from_bits"] = op._from_size if op._from_size is not None else operands[1].bits - extra_kwargs["to_bits"] = op._to_size if op._to_size is not None else operands[1].bits + div = BinaryOp( + manager.next_atom(), + "Div", + operands, + signed, + ins_addr=manager.ins_addr, + vex_block_addr=manager.block_addr, + vex_stmt_idx=manager.vex_stmt_idx, + bits=bits, + ) + mod = BinaryOp( + manager.next_atom(), + "Mod", + operands, + signed, + ins_addr=manager.ins_addr, + vex_block_addr=manager.block_addr, + vex_stmt_idx=manager.vex_stmt_idx, + bits=bits, + ) + operands = [mod, div] + op_name = "Concat" + signed = False return BinaryOp( manager.next_atom(), @@ -304,7 +323,6 @@ def Binop(expr, manager): bits=bits, vector_count=vector_count, vector_size=vector_size, - **extra_kwargs, ) @staticmethod diff --git a/ailment/expression.py b/ailment/expression.py index 1d86243..feb8bad 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -1,8 +1,10 @@ # pylint:disable=arguments-renamed,isinstance-second-argument-not-valid-type,missing-class-docstring from __future__ import annotations -from enum import IntEnum +from typing import TYPE_CHECKING, Sequence, cast +from typing_extensions import Self +from enum import Enum, IntEnum +from abc import abstractmethod -from typing import TYPE_CHECKING try: import claripy @@ -10,7 +12,7 @@ claripy = None from .tagged_object import TaggedObject -from .utils import get_bits, stable_hash, is_none_or_likeable +from .utils import get_bits, stable_hash, is_none_or_likeable, is_none_or_matchable if TYPE_CHECKING: from .statement import Statement @@ -21,12 +23,15 @@ class Expression(TaggedObject): The base class of all AIL expressions. """ + bits: int + __slots__ = ("depth",) def __init__(self, idx, depth, **kwargs): super().__init__(idx, **kwargs) self.depth = depth + @abstractmethod def __repr__(self): raise NotImplementedError() @@ -41,13 +46,18 @@ def __eq__(self, other): return True return type(self) is type(other) and self.likes(other) and self.idx == other.idx - def likes(self, atom): # pylint:disable=unused-argument,no-self-use + @abstractmethod + def likes(self, other): # pylint:disable=unused-argument,no-self-use raise NotImplementedError() - def replace(self, old_expr, new_expr): + @abstractmethod + def matches(self, other): # pylint:disable=unused-argument,no-self-use + raise NotImplementedError() + + def replace(self, old_expr: Expression, new_expr: Expression) -> tuple[bool, Self]: if self is old_expr: r = True - replaced = new_expr + replaced = cast(Self, new_expr) elif not isinstance(self, Atom): r, replaced = self.replace(old_expr, new_expr) else: @@ -56,10 +66,10 @@ def replace(self, old_expr, new_expr): return r, replaced def __add__(self, other): - return BinaryOp(None, "Add", [self, other], False, **self.tags) + return BinaryOp(None, "Add", [self, other], signed=False, **self.tags) def __sub__(self, other): - return BinaryOp(None, "Sub", [self, other], False, **self.tags) + return BinaryOp(None, "Sub", [self, other], signed=False, **self.tags) class Atom(Expression): @@ -68,16 +78,16 @@ class Atom(Expression): "variable_offset", ) - def __init__(self, idx, variable=None, variable_offset=0, **kwargs): + def __init__(self, idx: int | None, variable=None, variable_offset=0, **kwargs): super().__init__(idx, 0, **kwargs) self.variable = variable self.variable_offset = variable_offset - def __repr__(self): + def __repr__(self) -> str: return "Atom (%d)" % self.idx - def copy(self): # pylint:disable=no-self-use - return NotImplementedError() + def copy(self) -> Self: # pylint:disable=no-self-use + raise NotImplementedError() class Const(Atom): @@ -86,7 +96,7 @@ class Const(Atom): "bits", ) - def __init__(self, idx, variable, value, bits, **kwargs): + def __init__(self, idx: int | None, variable, value: int | float, bits: int, **kwargs): super().__init__(idx, variable, **kwargs) self.value = value @@ -115,6 +125,7 @@ def likes(self, other): and self.bits == other.bits ) + matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -134,7 +145,7 @@ class Tmp(Atom): "bits", ) - def __init__(self, idx, variable, tmp_idx, bits, **kwargs): + def __init__(self, idx: int | None, variable, tmp_idx: int, bits, **kwargs): super().__init__(idx, variable, **kwargs) self.tmp_idx = tmp_idx @@ -153,6 +164,7 @@ def __str__(self): def likes(self, other): return type(self) is type(other) and self.tmp_idx == other.tmp_idx and self.bits == other.bits + matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -168,7 +180,7 @@ class Register(Atom): "bits", ) - def __init__(self, idx, variable, reg_offset, bits, **kwargs): + def __init__(self, idx: int | None, variable, reg_offset: int, bits: int, **kwargs): super().__init__(idx, variable, **kwargs) self.reg_offset = reg_offset @@ -178,8 +190,8 @@ def __init__(self, idx, variable, reg_offset, bits, **kwargs): def size(self): return self.bits // 8 - def likes(self, atom): - return type(self) is type(atom) and self.reg_offset == atom.reg_offset and self.bits == atom.bits + def likes(self, other): + return type(self) is type(other) and self.reg_offset == other.reg_offset and self.bits == other.bits def __repr__(self): return str(self) @@ -192,6 +204,7 @@ def __str__(self): else: return "%s" % str(self.variable.name) + matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -256,28 +269,36 @@ def was_tmp(self) -> bool: return self.category == VirtualVariableCategory.TMP @property - def reg_offset(self) -> int | None: + def reg_offset(self) -> int: if self.was_reg: return self.oident - return None + raise TypeError("Is not a register") @property - def stack_offset(self) -> int | None: + def stack_offset(self) -> int: if self.was_stack: return self.oident - return None + raise TypeError("Is not a stack variable") @property def tmp_idx(self) -> int | None: return self.oident if self.was_tmp else None - def likes(self, atom): + def likes(self, other): + return ( + isinstance(other, VirtualVariable) + and self.varid == other.varid + and self.bits == other.bits + and self.category == other.category + and self.oident == other.oident + ) + + def matches(self, other): return ( - isinstance(atom, VirtualVariable) - and self.varid == atom.varid - and self.bits == atom.bits - and self.category == atom.category - and self.oident == atom.oident + isinstance(other, VirtualVariable) + and self.bits == other.bits + and self.category == other.category + and self.oident == other.oident ) def __repr__(self): @@ -337,15 +358,38 @@ def op(self) -> str: def verbose_op(self) -> str: return "Phi" - def likes(self, atom) -> bool: - if isinstance(atom, Phi) and self.bits == atom.bits: + def likes(self, other) -> bool: + if isinstance(other, Phi) and self.bits == other.bits: self_src_and_vvarids = {(src, vvar.varid if vvar is not None else None) for src, vvar in self.src_and_vvars} other_src_and_vvarids = { - (src, vvar.varid if vvar is not None else None) for src, vvar in atom.src_and_vvars + (src, vvar.varid if vvar is not None else None) for src, vvar in other.src_and_vvars } return self_src_and_vvarids == other_src_and_vvarids return False + def matches(self, other) -> bool: + if isinstance(other, Phi) and self.bits == other.bits: + if len(self.src_and_vvars) != len(other.src_and_vvars): + return False + self_src_and_vvars = dict(self.src_and_vvars) + other_src_and_vvars = dict(other.src_and_vvars) + for src, self_vvar in self_src_and_vvars.items(): + if src not in other_src_and_vvars: + return False + other_vvar = other_src_and_vvars[src] + if self_vvar is None and other_vvar is None: + continue + if ( + self_vvar is None + and other_vvar is not None + or self_vvar is not None + and other_vvar is None + or not self_vvar.matches(other_vvar) + ): + return False + return True + return False + def __repr__(self): return f"𝜙@{self.bits}b {self.src_and_vvars}" @@ -416,7 +460,9 @@ class UnaryOp(Op): "variable_offset", ) - def __init__(self, idx, op, operand, variable=None, variable_offset=None, **kwargs): + def __init__( + self, idx: int | None, op: str, operand: Expression, variable=None, variable_offset: int | None = None, **kwargs + ): super().__init__(idx, (operand.depth if isinstance(operand, Expression) else 0) + 1, op, **kwargs) self.operand = operand @@ -432,7 +478,18 @@ def __repr__(self): def likes(self, other): return ( - type(other) is UnaryOp and self.op == other.op and self.bits == other.bits and self.operand == other.operand + type(other) is UnaryOp + and self.op == other.op + and self.bits == other.bits + and self.operand.likes(other.operand) + ) + + def matches(self, other): + return ( + type(other) is UnaryOp + and self.op == other.op + and self.bits == other.bits + and self.operand.matches(other.operand) ) __hash__ = TaggedObject.__hash__ @@ -471,10 +528,15 @@ def has_atom(self, atom, identity=True): return self.operand.has_atom(atom, identity=identity) -class Convert(UnaryOp): +class ConvertType(Enum): TYPE_INT = 0 TYPE_FP = 1 + +class Convert(UnaryOp): + TYPE_INT = ConvertType.TYPE_INT + TYPE_FP = ConvertType.TYPE_FP + __slots__ = ( "from_bits", "to_bits", @@ -486,13 +548,13 @@ class Convert(UnaryOp): def __init__( self, - idx, - from_bits, - to_bits, - is_signed, - operand, - from_type=TYPE_INT, - to_type=TYPE_INT, + idx: int | None, + from_bits: int, + to_bits: int, + is_signed: bool, + operand: Expression, + from_type: ConvertType = TYPE_INT, + to_type: ConvertType = TYPE_INT, rounding_mode=None, **kwargs, ): @@ -526,6 +588,19 @@ def likes(self, other): and self.rounding_mode == other.rounding_mode ) + def matches(self, other): + return ( + type(other) is Convert + and self.from_bits == other.from_bits + and self.to_bits == other.to_bits + and self.bits == other.bits + and self.is_signed == other.is_signed + and self.operand.matches(other.operand) + and self.from_type == other.from_type + and self.to_type == other.to_type + and self.rounding_mode == other.rounding_mode + ) + __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -621,7 +696,17 @@ def likes(self, other): and self.from_type == other.from_type and self.to_bits == other.to_bits and self.to_type == other.to_type - and self.operand == other.operand + and self.operand.likes(other.operand) + ) + + def matches(self, other): + return ( + type(other) is Reinterpret + and self.from_bits == other.from_bits + and self.from_type == other.from_type + and self.to_bits == other.to_bits + and self.to_type == other.to_type + and self.operand.matches(other.operand) ) __hash__ = TaggedObject.__hash__ @@ -661,13 +746,11 @@ class BinaryOp(Op): __slots__ = ( "operands", "bits", - "signed", "variable", "variable_offset", "floating_point", "rounding_mode", - "from_bits", # for divmod - "to_bits", # for divmod + "signed", "vector_count", "vector_size", ) @@ -683,7 +766,6 @@ class BinaryOp(Op): "MulV": "*", "Div": "/", "DivF": "/", - "DivMod": "/m", "Mod": "%", "Xor": "^", "And": "&", @@ -700,10 +782,10 @@ class BinaryOp(Op): "CmpLE": "<=", "CmpGT": ">", "CmpGE": ">=", - "CmpLTs": "s", - "CmpGEs": ">=s", + "CmpLT (signed)": "s", + "CmpGE (signed)": ">=s", "Concat": "CONCAT", "Ror": "ROR", "Rol": "ROL", @@ -719,25 +801,22 @@ class BinaryOp(Op): "CmpGE": "CmpLT", "CmpLE": "CmpGT", "CmpGT": "CmpLE", - "CmpLTs": "CmpGEs", - "CmpGEs": "CmpLTs", - "CmpLEs": "CmpGTs", - "CmpGTs": "CmpLEs", } def __init__( self, - idx, - op, - operands, - signed, + idx: int | None, + op: str, + operands: Sequence[Expression], + signed: bool = False, + *, variable=None, variable_offset=None, bits=None, - floating_point: bool = False, - rounding_mode: str | None = None, - from_bits: int | None = None, - to_bits: int | None = None, + floating_point=False, + rounding_mode=None, + from_bits=None, + to_bits=None, vector_count: int | None = None, vector_size: int | None = None, **kwargs, @@ -750,11 +829,6 @@ def __init__( + 1 ) - # special handling of initialization with signed op names - if op and op.endswith("s"): - op = op[:-1] - signed = True - super().__init__(idx, depth, op, **kwargs) assert len(operands) == 2 @@ -790,9 +864,6 @@ def __init__( self.vector_count = vector_count self.vector_size = vector_size - self.from_bits = from_bits - self.to_bits = to_bits - # TODO: sanity check of operands' sizes for some ops # assert self.bits == operands[1].bits @@ -814,6 +885,17 @@ def likes(self, other): and self.rounding_mode == other.rounding_mode ) + def matches(self, other): + return ( + type(other) is BinaryOp + and self.op == other.op + and self.bits == other.bits + and self.signed == other.signed + and is_none_or_matchable(self.operands, other.operands, is_list=True) + and self.floating_point == other.floating_point + and self.rounding_mode == other.rounding_mode + ) + __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -843,14 +925,14 @@ def has_atom(self, atom, identity=True): return False - def replace(self, old_expr, new_expr): + def replace(self, old_expr: Expression, new_expr: Expression) -> tuple[bool, BinaryOp]: if self.operands[0] == old_expr: r0 = True replaced_operand_0 = new_expr elif isinstance(self.operands[0], Expression): r0, replaced_operand_0 = self.operands[0].replace(old_expr, new_expr) else: - r0, replaced_operand_0 = False, None + r0, replaced_operand_0 = False, new_expr if self.operands[1] == old_expr: r1 = True @@ -858,7 +940,7 @@ def replace(self, old_expr, new_expr): elif isinstance(self.operands[1], Expression): r1, replaced_operand_1 = self.operands[1].replace(old_expr, new_expr) else: - r1, replaced_operand_1 = False, None + r1, replaced_operand_1 = False, new_expr r2, replaced_rm = False, None if self.rounding_mode is not None: @@ -871,12 +953,10 @@ def replace(self, old_expr, new_expr): self.idx, self.op, [replaced_operand_0 if r0 else self.operands[0], replaced_operand_1 if r1 else self.operands[1]], - self.signed, + signed=self.signed, bits=self.bits, floating_point=self.floating_point, rounding_mode=replaced_rm if r2 else self.rounding_mode, - from_bits=self.from_bits, - to_bits=self.to_bits, **self.tags, ) else: @@ -886,10 +966,10 @@ def replace(self, old_expr, new_expr): def verbose_op(self): op = self.op if self.floating_point: - op += "F" + op += " (float)" else: if self.signed: - op += "s" + op += " (signed)" return op @property @@ -901,14 +981,12 @@ def copy(self) -> BinaryOp: self.idx, self.op, self.operands[::], - self.signed, variable=self.variable, + signed=self.signed, variable_offset=self.variable_offset, bits=self.bits, floating_point=self.floating_point, rounding_mode=self.rounding_mode, - from_bits=self.from_bits, - to_bits=self.to_bits, **self.tags, ) @@ -922,9 +1000,21 @@ class Load(Expression): "variable_offset", "guard", "alt", + "bits", ) - def __init__(self, idx, addr, size, endness, variable=None, variable_offset=None, guard=None, alt=None, **kwargs): + def __init__( + self, + idx: int | None, + addr: Expression, + size: int, + endness: str, + variable=None, + variable_offset=None, + guard=None, + alt=None, + **kwargs, + ): depth = max(addr.depth, size.depth if isinstance(size, Expression) else 0) + 1 super().__init__(idx, depth, **kwargs) @@ -935,10 +1025,7 @@ def __init__(self, idx, addr, size, endness, variable=None, variable_offset=None self.alt = alt self.variable = variable self.variable_offset = variable_offset - - @property - def bits(self): - return self.size * 8 + self.bits = self.size * 8 def __repr__(self): return str(self) @@ -982,6 +1069,21 @@ def likes(self, other): and self.alt == other.alt ) + def _matches_addr(self, other_addr): + if hasattr(self.addr, "matches") and hasattr(other_addr, "matches"): + return self.addr.matches(other_addr) + return self.addr == other_addr + + def matches(self, other): + return ( + type(other) is Load + and self._matches_addr(other.addr) + and self.size == other.size + and self.endness == other.endness + and self.guard == other.guard + and self.alt == other.alt + ) + __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -1011,7 +1113,16 @@ class ITE(Expression): "variable_offset", ) - def __init__(self, idx, cond, iffalse, iftrue, variable=None, variable_offset=None, **kwargs): + def __init__( + self, + idx: int | None, + cond: Expression, + iffalse: Expression, + iftrue: Expression, + variable=None, + variable_offset=None, + **kwargs, + ): depth = ( max( cond.depth if isinstance(cond, Expression) else 0, @@ -1035,13 +1146,22 @@ def __repr__(self): def __str__(self): return f"(({self.cond}) ? ({self.iftrue}) : ({self.iffalse}))" - def likes(self, atom): + def likes(self, other): + return ( + type(other) is ITE + and self.cond.likes(other.cond) + and self.iffalse == other.iffalse + and self.iftrue == other.iftrue + and self.bits == other.bits + ) + + def matches(self, other): return ( - type(atom) is ITE - and self.cond == atom.cond - and self.iffalse == atom.iffalse - and self.iftrue == atom.iftrue - and self.bits == atom.bits + type(other) is ITE + and self.cond.matches(other.cond) + and self.iffalse == other.iffalse + and self.iftrue == other.iftrue + and self.bits == other.bits ) __hash__ = TaggedObject.__hash__ @@ -1162,6 +1282,7 @@ def matches(self, other): and self.bits == other.bits ) + matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -1229,6 +1350,8 @@ def replace(self, old_expr: Expression, new_expr: Expression): @property def size(self): + if self.bits is None: + return None return self.bits // 8 @@ -1239,7 +1362,7 @@ class VEXCCallExpression(Expression): "bits", ) - def __init__(self, idx, callee: str, operands: list[Expression], bits=None, **kwargs): + def __init__(self, idx: int | None, cee_name: str, operands: tuple[Expression, ...], bits: int, **kwargs): super().__init__(idx, max(operand.depth for operand in operands), **kwargs) self.callee = callee self.operands = operands @@ -1308,6 +1431,8 @@ def replace(self, old_expr, new_expr): @property def size(self): + if self.bits is None: + return None return self.bits // 8 @@ -1319,12 +1444,14 @@ class MultiStatementExpression(Expression): __slots__ = ( "stmts", "expr", + "bits", ) def __init__(self, idx: int | None, stmts: list[Statement], expr: Expression, **kwargs): super().__init__(idx, expr.depth + 1, **kwargs) self.stmts = stmts self.expr = expr + self.bits = self.expr.bits __hash__ = TaggedObject.__hash__ @@ -1332,7 +1459,20 @@ def _hash_core(self): return stable_hash((MultiStatementExpression,) + tuple(self.stmts) + (self.expr,)) def likes(self, other): - return type(self) is type(other) and self.stmts == other.stmts and self.expr == other.expr + return ( + type(self) is type(other) + and len(self.stmts) == len(other.stmts) + and all(s_stmt.likes(o_stmt) for s_stmt, o_stmt in zip(self.stmts, other.stmts)) + and self.expr.likes(other.expr) + ) + + def matches(self, other): + return ( + type(self) is type(other) + and len(self.stmts) == len(other.stmts) + and all(s_stmt.matches(o_stmt) for s_stmt, o_stmt in zip(self.stmts, other.stmts)) + and self.expr.matches(other.expr) + ) def __repr__(self): return f"MultiStatementExpression({self.stmts}, {self.expr})" @@ -1343,10 +1483,6 @@ def __str__(self): concatenated_str = ", ".join(stmts_str + [expr_str]) return f"({concatenated_str})" - @property - def bits(self): - return self.expr.bits - @property def size(self): return self.expr.size @@ -1391,7 +1527,16 @@ class BasePointerOffset(Expression): "variable_offset", ) - def __init__(self, idx, bits, base, offset, variable=None, variable_offset=None, **kwargs): + def __init__( + self, + idx: int | None, + bits: int, + base: Expression | str, + offset: int, + variable=None, + variable_offset=None, + **kwargs, + ): super().__init__(idx, (offset.depth if isinstance(offset, Expression) else 0) + 1, **kwargs) self.bits = bits self.base = base @@ -1425,6 +1570,7 @@ def likes(self, other): and self.offset == other.offset ) + matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): @@ -1451,7 +1597,7 @@ def copy(self) -> BasePointerOffset: class StackBaseOffset(BasePointerOffset): __slots__ = () - def __init__(self, idx, bits, offset, **kwargs): + def __init__(self, idx: int | None, bits: int, offset: int, **kwargs): # stack base offset is always signed if offset >= (1 << (bits - 1)): offset -= 1 << bits @@ -1470,7 +1616,7 @@ def negate(expr: Expression) -> Expression: expr.idx, BinaryOp.COMPARISON_NEGATION[expr.op], expr.operands, - expr.signed, + signed=expr.signed, bits=expr.bits, floating_point=expr.floating_point, rounding_mode=expr.rounding_mode, diff --git a/ailment/statement.py b/ailment/statement.py index e6ed9aa..bc9d2a3 100644 --- a/ailment/statement.py +++ b/ailment/statement.py @@ -1,5 +1,6 @@ # pylint:disable=isinstance-second-argument-not-valid-type,no-self-use,arguments-renamed -from typing import Optional, TYPE_CHECKING +from __future__ import annotations +from typing import TYPE_CHECKING, Sequence from abc import ABC, abstractmethod try: @@ -463,11 +464,11 @@ def __init__( self, idx, target, - calling_convention: Optional["SimCC"] = None, + calling_convention: SimCC | None = None, prototype=None, - args=None, - ret_expr=None, - fp_ret_expr=None, + args: Sequence[Expression] | None=None, + ret_expr: Expression | None = None, + fp_ret_expr: Expression | None = None, bits: int | None = None, **kwargs, ): @@ -479,7 +480,14 @@ def __init__( self.args = args self.ret_expr = ret_expr self.fp_ret_expr = fp_ret_expr - self.bits = bits if bits is not None else ret_expr.bits if ret_expr is not None else None + if bits is not None: + self.bits = bits + elif ret_expr is not None: + self.bits = ret_expr.bits + elif fp_ret_expr is not None: + self.bits = fp_ret_expr.bits + else: + self.bits = 0 # uhhhhhhhhhhhhhhhhhhh def likes(self, other): return ( @@ -719,6 +727,9 @@ def replace(self, old_expr, new_expr): def copy(self) -> "DirtyStatement": return DirtyStatement(self.idx, self.dirty, **self.tags) + + def replace(self, old_expr, new_expr): + return self class Label(Statement): @@ -741,6 +752,9 @@ def __init__(self, idx, name: str, ins_addr: int, block_idx: int | None = None, def likes(self, other: "Label"): return isinstance(other, Label) + def replace(self, old_expr, new_expr): + return self + matches = likes def _hash_core(self): diff --git a/ailment/tagged_object.py b/ailment/tagged_object.py index b7dc446..f174497 100644 --- a/ailment/tagged_object.py +++ b/ailment/tagged_object.py @@ -9,7 +9,7 @@ class TaggedObject: "_hash", ) - def __init__(self, idx, **kwargs): + def __init__(self, idx: int | None, **kwargs): self._tags = None self.idx = idx self._hash = None @@ -43,7 +43,7 @@ def __new__(cls, *args, **kwargs): # pylint:disable=unused-argument self._tags = None return self - def __hash__(self): + def __hash__(self) -> int: if self._hash is None: self._hash = self._hash_core() return self._hash diff --git a/ailment/utils.py b/ailment/utils.py index 01cbf4e..933d8c0 100644 --- a/ailment/utils.py +++ b/ailment/utils.py @@ -1,37 +1,28 @@ -from typing import Union, TYPE_CHECKING +from __future__ import annotations +from typing import TYPE_CHECKING, TypeAlias import struct try: - import claripy + from claripy.ast import Bits except ImportError: - claripy = None + from typing import Never as Bits try: import _md5 as md5lib except ImportError: import hashlib as md5lib -if TYPE_CHECKING: - from .expression import Expression +GetBitsTypeParams: TypeAlias = "Bits | Expression" -get_bits_type_params = Union[int, "Expression"] -if claripy: - get_bits_type_params = Union[int, claripy.ast.Bits, "Expression"] - - -def get_bits(expr: get_bits_type_params) -> int | None: - # delayed import - from .expression import Expression # pylint:disable=import-outside-toplevel +def get_bits(expr: GetBitsTypeParams) -> int: if isinstance(expr, Expression): return expr.bits - elif isinstance(expr, claripy.ast.Bits): + elif isinstance(expr, Bits): return expr.size() - elif hasattr(expr, "bits"): - return expr.bits else: - return None + raise TypeError(type(expr)) md5_unpacker = struct.Struct("4I") @@ -127,3 +118,5 @@ def is_none_or_matchable(arg1, arg2, is_list=False): if isinstance(arg1, Expression): return arg1.matches(arg2) return arg1 == arg2 + +from .expression import Expression From dc70bb483d2ff00f2c5c848ae117caa0edd01ba3 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Thu, 24 Oct 2024 12:44:02 -0700 Subject: [PATCH 03/13] rebase fix --- ailment/expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ailment/expression.py b/ailment/expression.py index feb8bad..3445c79 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -1362,7 +1362,7 @@ class VEXCCallExpression(Expression): "bits", ) - def __init__(self, idx: int | None, cee_name: str, operands: tuple[Expression, ...], bits: int, **kwargs): + def __init__(self, idx: int | None, callee: str, operands: tuple[Expression, ...], bits: int, **kwargs): super().__init__(idx, max(operand.depth for operand in operands), **kwargs) self.callee = callee self.operands = operands From 0c3567f191cc0aa192133325f1e345707d15cd02 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Thu, 31 Oct 2024 10:11:36 -0700 Subject: [PATCH 04/13] continue --- ailment/block_walker.py | 4 ++-- ailment/expression.py | 2 -- ailment/statement.py | 25 +++++++++++++++---------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/ailment/block_walker.py b/ailment/block_walker.py index cdc3651..8c12e17 100644 --- a/ailment/block_walker.py +++ b/ailment/block_walker.py @@ -72,10 +72,10 @@ def walk_expression( self, expr: Expression, stmt_idx: int | None = None, - stmt: int | None = None, + stmt: Statement | None = None, block: Block | None = None, ): - return self._handle_expr(0, expr, stmt_idx, stmt, block) + return self._handle_expr(0, expr, stmt_idx or 0, stmt, block) def _handle_stmt(self, stmt_idx: int, stmt: Statement, block: Block | None) -> Any: try: diff --git a/ailment/expression.py b/ailment/expression.py index 3445c79..170aadc 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -815,8 +815,6 @@ def __init__( bits=None, floating_point=False, rounding_mode=None, - from_bits=None, - to_bits=None, vector_count: int | None = None, vector_size: int | None = None, **kwargs, diff --git a/ailment/statement.py b/ailment/statement.py index bc9d2a3..82ab807 100644 --- a/ailment/statement.py +++ b/ailment/statement.py @@ -10,7 +10,7 @@ from .utils import stable_hash, is_none_or_likeable, is_none_or_matchable from .tagged_object import TaggedObject -from .expression import Expression, DirtyExpression +from .expression import Atom, Expression, DirtyExpression if TYPE_CHECKING: from angr.calling_conventions import SimCC @@ -40,11 +40,13 @@ def eq(self, expr0, expr1): # pylint:disable=no-self-use return expr0 is expr1 return expr0 == expr1 - def likes(self, atom): # pylint:disable=unused-argument,no-self-use + @abstractmethod + def likes(self, other) -> bool: # pylint:disable=unused-argument,no-self-use raise NotImplementedError() - def matches(self, atom): # pylint:disable=unused-argument,no-self-use - return NotImplementedError() + @abstractmethod + def matches(self, other) -> bool: # pylint:disable=unused-argument,no-self-use + raise NotImplementedError() class Assignment(Statement): @@ -57,7 +59,7 @@ class Assignment(Statement): "src", ) - def __init__(self, idx, dst, src, **kwargs): + def __init__(self, idx: int | None, dst: Atom, src: Expression, **kwargs): super().__init__(idx, **kwargs) self.dst = dst @@ -120,7 +122,7 @@ class Store(Statement): "guard", ) - def __init__(self, idx, addr, data, size, endness, guard=None, variable=None, offset=None, **kwargs): + def __init__(self, idx: int | None, addr: Expression, data: Expression, size: int, endness: str, guard: Expression | None = None, variable=None, offset=None, **kwargs): super().__init__(idx, **kwargs) self.addr = addr @@ -704,7 +706,7 @@ class DirtyStatement(Statement): __slots__ = ("dirty",) - def __init__(self, idx, dirty: DirtyExpression, **kwargs): + def __init__(self, idx: int | None, dirty: DirtyExpression, **kwargs): super().__init__(idx, **kwargs) self.dirty = dirty @@ -727,9 +729,12 @@ def replace(self, old_expr, new_expr): def copy(self) -> "DirtyStatement": return DirtyStatement(self.idx, self.dirty, **self.tags) - - def replace(self, old_expr, new_expr): - return self + + def likes(self, other): + return type(other) is DirtyStatement and self.dirty.likes(other.dirty) + + def matches(self, other): + return type(other) is DirtyStatement and self.dirty.matches(other.dirty) class Label(Statement): From 1ea9d9ea2fd2cbe9702fa35493817db3a98e51f6 Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Thu, 31 Oct 2024 13:55:03 -0700 Subject: [PATCH 05/13] continue --- ailment/py.typed | 0 ailment/statement.py | 11 +++++++---- setup.cfg | 4 ++++ 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 ailment/py.typed diff --git a/ailment/py.typed b/ailment/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/ailment/statement.py b/ailment/statement.py index 82ab807..2bb8c77 100644 --- a/ailment/statement.py +++ b/ailment/statement.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, Sequence from abc import ABC, abstractmethod +from typing_extensions import Self try: import claripy @@ -32,7 +33,7 @@ def __str__(self): raise NotImplementedError() @abstractmethod - def replace(self, old_expr, new_expr): + def replace(self, old_expr: Expression, new_expr: Expression) -> tuple[bool, Self]: raise NotImplementedError() def eq(self, expr0, expr1): # pylint:disable=no-self-use @@ -85,9 +86,10 @@ def __repr__(self): def __str__(self): return f"{str(self.dst)} = {str(self.src)}" - def replace(self, old_expr, new_expr): + def replace(self, old_expr: Expression, new_expr: Expression): if self.dst == old_expr: r_dst = True + assert isinstance(new_expr, Atom) replaced_dst = new_expr else: r_dst, replaced_dst = self.dst.replace(old_expr, new_expr) @@ -558,7 +560,7 @@ def verbose_op(self): def op(self): return "call" - def replace(self, old_expr, new_expr): + def replace(self, old_expr: Expression, new_expr: Expression): if isinstance(self.target, Expression): r0, replaced_target = self.target.replace(old_expr, new_expr) else: @@ -721,6 +723,7 @@ def __str__(self): def replace(self, old_expr, new_expr): if self.dirty == old_expr: + assert isinstance(new_expr, DirtyExpression) return True, DirtyStatement(self.idx, new_expr, **self.tags) r, new_dirty = self.dirty.replace(old_expr, new_expr) if r: @@ -758,7 +761,7 @@ def likes(self, other: "Label"): return isinstance(other, Label) def replace(self, old_expr, new_expr): - return self + return False, self matches = likes diff --git a/setup.cfg b/setup.cfg index de5280f..fa9e26d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,3 +29,7 @@ docs = testing = pytest pytest-xdist + +[options.package_data] +ailment = + py.typed From bf34b9e86e42b1e66c039192cb191c3b9ca5f090 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:18:01 +0000 Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ailment/converter_vex.py | 4 +++- ailment/expression.py | 3 ++- ailment/statement.py | 32 ++++++++++++++++++++++---------- ailment/utils.py | 3 ++- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/ailment/converter_vex.py b/ailment/converter_vex.py index 2bcbc26..6c87ddf 100644 --- a/ailment/converter_vex.py +++ b/ailment/converter_vex.py @@ -350,7 +350,9 @@ def Triop(expr, manager): bits=bits, ) - raise TypeError("Please figure out what kind of operation this is (smart money says fused multiply) and convert it into multiple binops") + raise TypeError( + "Please figure out what kind of operation this is (smart money says fused multiply) and convert it into multiple binops" + ) @staticmethod def Const(expr, manager): diff --git a/ailment/expression.py b/ailment/expression.py index 170aadc..769aeb9 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -1,6 +1,7 @@ # pylint:disable=arguments-renamed,isinstance-second-argument-not-valid-type,missing-class-docstring from __future__ import annotations -from typing import TYPE_CHECKING, Sequence, cast +from typing import TYPE_CHECKING, cast +from collections.abc import Sequence from typing_extensions import Self from enum import Enum, IntEnum from abc import abstractmethod diff --git a/ailment/statement.py b/ailment/statement.py index 2bb8c77..0af5951 100644 --- a/ailment/statement.py +++ b/ailment/statement.py @@ -1,6 +1,7 @@ # pylint:disable=isinstance-second-argument-not-valid-type,no-self-use,arguments-renamed from __future__ import annotations -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING +from collections.abc import Sequence from abc import ABC, abstractmethod from typing_extensions import Self @@ -105,7 +106,7 @@ def replace(self, old_expr: Expression, new_expr: Expression): else: return False, self - def copy(self) -> "Assignment": + def copy(self) -> Assignment: return Assignment(self.idx, self.dst, self.src, **self.tags) @@ -124,7 +125,18 @@ class Store(Statement): "guard", ) - def __init__(self, idx: int | None, addr: Expression, data: Expression, size: int, endness: str, guard: Expression | None = None, variable=None, offset=None, **kwargs): + def __init__( + self, + idx: int | None, + addr: Expression, + data: Expression, + size: int, + endness: str, + guard: Expression | None = None, + variable=None, + offset=None, + **kwargs, + ): super().__init__(idx, **kwargs) self.addr = addr @@ -228,7 +240,7 @@ def replace(self, old_expr, new_expr): else: return False, self - def copy(self) -> "Store": + def copy(self) -> Store: return Store( self.idx, self.addr, @@ -433,7 +445,7 @@ def replace(self, old_expr, new_expr): else: return False, self - def copy(self) -> "ConditionalJump": + def copy(self) -> ConditionalJump: return ConditionalJump( self.idx, self.condition, @@ -470,7 +482,7 @@ def __init__( target, calling_convention: SimCC | None = None, prototype=None, - args: Sequence[Expression] | None=None, + args: Sequence[Expression] | None = None, ret_expr: Expression | None = None, fp_ret_expr: Expression | None = None, bits: int | None = None, @@ -491,7 +503,7 @@ def __init__( elif fp_ret_expr is not None: self.bits = fp_ret_expr.bits else: - self.bits = 0 # uhhhhhhhhhhhhhhhhhhh + self.bits = 0 # uhhhhhhhhhhhhhhhhhhh def likes(self, other): return ( @@ -730,7 +742,7 @@ def replace(self, old_expr, new_expr): return True, DirtyStatement(self.idx, new_dirty, **self.tags) return False, self - def copy(self) -> "DirtyStatement": + def copy(self) -> DirtyStatement: return DirtyStatement(self.idx, self.dirty, **self.tags) def likes(self, other): @@ -757,7 +769,7 @@ def __init__(self, idx, name: str, ins_addr: int, block_idx: int | None = None, self.ins_addr = ins_addr self.block_idx = block_idx - def likes(self, other: "Label"): + def likes(self, other: Label): return isinstance(other, Label) def replace(self, old_expr, new_expr): @@ -781,5 +793,5 @@ def __repr__(self): def __str__(self): return f"{self.name}:" - def copy(self) -> "Label": + def copy(self) -> Label: return Label(self.idx, self.name, self.ins_addr, self.block_idx, **self.tags) diff --git a/ailment/utils.py b/ailment/utils.py index 933d8c0..51bf290 100644 --- a/ailment/utils.py +++ b/ailment/utils.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, TypeAlias +from typing import TypeAlias import struct try: @@ -119,4 +119,5 @@ def is_none_or_matchable(arg1, arg2, is_list=False): return arg1.matches(arg2) return arg1 == arg2 + from .expression import Expression From 28d2022c6e8b7da9730eddaf999747f855c92e9e Mon Sep 17 00:00:00 2001 From: Audrey Dutcher Date: Wed, 4 Dec 2024 12:28:25 -0700 Subject: [PATCH 07/13] add dep --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index fa9e26d..8576969 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,6 +19,8 @@ classifiers = [options] packages = find: python_requires = >=3.10 +install_requires = + typing-extensions [options.extras_require] docs = From 4102d701690946d6682c69eea3092ec1273e7628 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Dec 2024 19:29:18 +0000 Subject: [PATCH 08/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 8576969..eb8fbc3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,9 +18,9 @@ classifiers = [options] packages = find: -python_requires = >=3.10 install_requires = typing-extensions +python_requires = >=3.10 [options.extras_require] docs = From a61add981037a1bb5d46e02cd0c2d7a4bc02e9d9 Mon Sep 17 00:00:00 2001 From: Fish Date: Wed, 11 Dec 2024 15:28:58 -0700 Subject: [PATCH 09/13] Fix DivMod conversion. --- ailment/converter_vex.py | 44 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/ailment/converter_vex.py b/ailment/converter_vex.py index 6c87ddf..04731c1 100644 --- a/ailment/converter_vex.py +++ b/ailment/converter_vex.py @@ -288,6 +288,23 @@ def Binop(expr, manager): bits = op._output_size_bits if op_name == "DivMod": + op1_size = op._from_size if op._from_size is not None else operands[0].bits + op2_size = op._to_size if op._to_size is not None else operands[1].bits + + if op2_size < op1_size: + # e.g., DivModU64to32 + operands[1] = Convert( + manager.next_atom(), + op2_size, + op1_size, + False if op._from_signed == "U" else True, + operands[1], + ins_addr=manager.ins_addr, + vex_block_addr=manager.block_addr, + vex_stmt_idx=manager.vex_stmt_idx, + ) + chunk_bits = bits // 2 + div = BinaryOp( manager.next_atom(), "Div", @@ -296,7 +313,17 @@ def Binop(expr, manager): ins_addr=manager.ins_addr, vex_block_addr=manager.block_addr, vex_stmt_idx=manager.vex_stmt_idx, - bits=bits, + bits=op1_size, + ) + truncated_div = Convert( + manager.next_atom(), + op1_size, + chunk_bits, + signed, + div, + ins_addr=manager.ins_addr, + vex_block_addr=manager.block_addr, + vex_stmt_idx=manager.vex_stmt_idx, ) mod = BinaryOp( manager.next_atom(), @@ -306,9 +333,20 @@ def Binop(expr, manager): ins_addr=manager.ins_addr, vex_block_addr=manager.block_addr, vex_stmt_idx=manager.vex_stmt_idx, - bits=bits, + bits=op1_size, ) - operands = [mod, div] + truncated_mod = Convert( + manager.next_atom(), + op1_size, + chunk_bits, + signed, + mod, + ins_addr=manager.ins_addr, + vex_block_addr=manager.block_addr, + vex_stmt_idx=manager.vex_stmt_idx, + ) + + operands = [truncated_mod, truncated_div] op_name = "Concat" signed = False From 7459e1de2b607654809d00fedf18062f9d515da9 Mon Sep 17 00:00:00 2001 From: Fish Date: Wed, 11 Dec 2024 15:58:11 -0700 Subject: [PATCH 10/13] Fix UnaryOp size. --- ailment/expression.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ailment/expression.py b/ailment/expression.py index 769aeb9..652b4bf 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -462,12 +462,19 @@ class UnaryOp(Op): ) def __init__( - self, idx: int | None, op: str, operand: Expression, variable=None, variable_offset: int | None = None, **kwargs + self, + idx: int | None, + op: str, + operand: Expression, + variable=None, + variable_offset: int | None = None, + bits=None, + **kwargs, ): super().__init__(idx, (operand.depth if isinstance(operand, Expression) else 0) + 1, op, **kwargs) self.operand = operand - self.bits = operand.bits + self.bits = operand.bits if bits is None else bits self.variable = variable self.variable_offset = variable_offset @@ -506,7 +513,7 @@ def replace(self, old_expr, new_expr): r, replaced_operand = self.operand.replace(old_expr, new_expr) if r: - return True, UnaryOp(self.idx, self.op, replaced_operand, **self.tags) + return True, UnaryOp(self.idx, self.op, replaced_operand, bits=self.bits, **self.tags) else: return False, self @@ -520,7 +527,13 @@ def size(self): def copy(self) -> UnaryOp: return UnaryOp( - self.idx, self.op, self.operand, variable=self.variable, variable_offset=self.variable_offset, **self.tags + self.idx, + self.op, + self.operand, + variable=self.variable, + variable_offset=self.variable_offset, + bits=self.bits, + **self.tags, ) def has_atom(self, atom, identity=True): From 60bf8c50c45762e6e50d871e8f26c8e37913c735 Mon Sep 17 00:00:00 2001 From: Fish Date: Thu, 12 Dec 2024 00:01:52 -0700 Subject: [PATCH 11/13] Lint code. --- ailment/converter_vex.py | 5 +++-- ailment/expression.py | 36 +++++++++--------------------------- ailment/statement.py | 1 - ailment/utils.py | 7 ++----- 4 files changed, 14 insertions(+), 35 deletions(-) diff --git a/ailment/converter_vex.py b/ailment/converter_vex.py index 04731c1..dd473be 100644 --- a/ailment/converter_vex.py +++ b/ailment/converter_vex.py @@ -297,7 +297,7 @@ def Binop(expr, manager): manager.next_atom(), op2_size, op1_size, - False if op._from_signed == "U" else True, + op._from_signed != "U", operands[1], ins_addr=manager.ins_addr, vex_block_addr=manager.block_addr, @@ -389,7 +389,8 @@ def Triop(expr, manager): ) raise TypeError( - "Please figure out what kind of operation this is (smart money says fused multiply) and convert it into multiple binops" + "Please figure out what kind of operation this is (smart money says fused multiply) and convert it into " + "multiple binops" ) @staticmethod diff --git a/ailment/expression.py b/ailment/expression.py index 652b4bf..1ecdded 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -2,9 +2,9 @@ from __future__ import annotations from typing import TYPE_CHECKING, cast from collections.abc import Sequence -from typing_extensions import Self from enum import Enum, IntEnum from abc import abstractmethod +from typing_extensions import Self try: @@ -26,7 +26,10 @@ class Expression(TaggedObject): bits: int - __slots__ = ("depth",) + __slots__ = ( + "bits", + "depth", + ) def __init__(self, idx, depth, **kwargs): super().__init__(idx, **kwargs) @@ -92,10 +95,7 @@ def copy(self) -> Self: # pylint:disable=no-self-use class Const(Atom): - __slots__ = ( - "value", - "bits", - ) + __slots__ = ("value",) def __init__(self, idx: int | None, variable, value: int | float, bits: int, **kwargs): super().__init__(idx, variable, **kwargs) @@ -141,10 +141,7 @@ def copy(self) -> Const: class Tmp(Atom): - __slots__ = ( - "tmp_idx", - "bits", - ) + __slots__ = ("tmp_idx",) def __init__(self, idx: int | None, variable, tmp_idx: int, bits, **kwargs): super().__init__(idx, variable, **kwargs) @@ -176,10 +173,7 @@ def copy(self) -> Tmp: class Register(Atom): - __slots__ = ( - "reg_offset", - "bits", - ) + __slots__ = ("reg_offset",) def __init__(self, idx: int | None, variable, reg_offset: int, bits: int, **kwargs): super().__init__(idx, variable, **kwargs) @@ -227,7 +221,6 @@ class VirtualVariableCategory(IntEnum): class VirtualVariable(Atom): __slots__ = ( - "bits", "varid", "category", "oident", @@ -331,10 +324,7 @@ def copy(self) -> VirtualVariable: class Phi(Atom): - __slots__ = ( - "bits", - "src_and_vvars", - ) + __slots__ = ("src_and_vvars",) def __init__( self, @@ -456,7 +446,6 @@ def verbose_op(self): class UnaryOp(Op): __slots__ = ( "operand", - "bits", "variable", "variable_offset", ) @@ -759,7 +748,6 @@ def copy(self) -> Reinterpret: class BinaryOp(Op): __slots__ = ( "operands", - "bits", "variable", "variable_offset", "floating_point", @@ -1012,7 +1000,6 @@ class Load(Expression): "variable_offset", "guard", "alt", - "bits", ) def __init__( @@ -1120,7 +1107,6 @@ class ITE(Expression): "cond", "iffalse", "iftrue", - "bits", "variable", "variable_offset", ) @@ -1233,7 +1219,6 @@ class DirtyExpression(Expression): "mfx", "maddr", "msize", - "bits", ) def __init__( @@ -1371,7 +1356,6 @@ class VEXCCallExpression(Expression): __slots__ = ( "callee", "operands", - "bits", ) def __init__(self, idx: int | None, callee: str, operands: tuple[Expression, ...], bits: int, **kwargs): @@ -1456,7 +1440,6 @@ class MultiStatementExpression(Expression): __slots__ = ( "stmts", "expr", - "bits", ) def __init__(self, idx: int | None, stmts: list[Statement], expr: Expression, **kwargs): @@ -1532,7 +1515,6 @@ def copy(self) -> MultiStatementExpression: class BasePointerOffset(Expression): __slots__ = ( - "bits", "base", "offset", "variable", diff --git a/ailment/statement.py b/ailment/statement.py index 0af5951..0e436c4 100644 --- a/ailment/statement.py +++ b/ailment/statement.py @@ -473,7 +473,6 @@ class Call(Expression, Statement): "args", "ret_expr", "fp_ret_expr", - "bits", ) def __init__( diff --git a/ailment/utils.py b/ailment/utils.py index 51bf290..833e4cc 100644 --- a/ailment/utils.py +++ b/ailment/utils.py @@ -1,3 +1,4 @@ +# pylint:disable=ungrouped-imports from __future__ import annotations from typing import TypeAlias import struct @@ -86,8 +87,6 @@ def is_none_or_likeable(arg1, arg2, is_list=False): """ Returns whether two things are both None or can like each other """ - from .expression import Expression # pylint:disable=import-outside-toplevel - if arg1 is None or arg2 is None: if arg1 == arg2: return True @@ -105,8 +104,6 @@ def is_none_or_matchable(arg1, arg2, is_list=False): """ Returns whether two things are both None or can match each other """ - from .expression import Expression # pylint:disable=import-outside-toplevel - if arg1 is None or arg2 is None: if arg1 == arg2: return True @@ -120,4 +117,4 @@ def is_none_or_matchable(arg1, arg2, is_list=False): return arg1 == arg2 -from .expression import Expression +from .expression import Expression # pylint:disable=wrong-import-position From c0fc33052bef09a743334090cc1fcd624f40d4bc Mon Sep 17 00:00:00 2001 From: Fish Date: Thu, 12 Dec 2024 00:36:09 -0700 Subject: [PATCH 12/13] typing.Never does not exist in Python 3.10. --- ailment/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ailment/utils.py b/ailment/utils.py index 833e4cc..84e5d13 100644 --- a/ailment/utils.py +++ b/ailment/utils.py @@ -6,7 +6,7 @@ try: from claripy.ast import Bits except ImportError: - from typing import Never as Bits + from typing_extensions import Never as Bits try: import _md5 as md5lib From b35cac6a9baa57881a4d92d02b4138d9ace3501f Mon Sep 17 00:00:00 2001 From: Fish Date: Thu, 12 Dec 2024 00:39:34 -0700 Subject: [PATCH 13/13] Lint code. --- ailment/expression.py | 1 - ailment/utils.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ailment/expression.py b/ailment/expression.py index 1ecdded..e49af80 100644 --- a/ailment/expression.py +++ b/ailment/expression.py @@ -1279,7 +1279,6 @@ def matches(self, other): and self.bits == other.bits ) - matches = likes __hash__ = TaggedObject.__hash__ def _hash_core(self): diff --git a/ailment/utils.py b/ailment/utils.py index 84e5d13..a441807 100644 --- a/ailment/utils.py +++ b/ailment/utils.py @@ -1,4 +1,4 @@ -# pylint:disable=ungrouped-imports +# pylint:disable=ungrouped-imports,wrong-import-position from __future__ import annotations from typing import TypeAlias import struct @@ -117,4 +117,4 @@ def is_none_or_matchable(arg1, arg2, is_list=False): return arg1 == arg2 -from .expression import Expression # pylint:disable=wrong-import-position +from .expression import Expression # noqa: E402