Skip to content

Commit

Permalink
C++ (WIP): Add field-flow through addresses of fields
Browse files Browse the repository at this point in the history
  • Loading branch information
MathiasVP committed Dec 6, 2023
1 parent 68c88f8 commit f0b671e
Show file tree
Hide file tree
Showing 12 changed files with 1,358 additions and 1,133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ private module Input implements InputSig<CppDataFlow> {
// complex to model here.
any()
}

predicate reverseReadExclude(Public::Node n) {
// Suppress the "missing reverse read" consistency check for reads of addresses of fields.
// TODO: This should only be true when there's a store step.
exists(Public::Node n2 |
Private::readStep(n, _, n2) and
any(Public::PostFieldUpdateNode pun | pun.getIndirectionIndex() = 0).getPreUpdateNode() = n2
)
}
}

module Consistency = MakeConsistency<CppDataFlow, CppTaintTracking, Input>;
Original file line number Diff line number Diff line change
Expand Up @@ -786,21 +786,22 @@ predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex)
* `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) {
exists(FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2 |
nodeHasOperand(node2, operand, indirectionIndex2) and
// The `1` here matches the `node2.getIndirectionIndex() = 1` conjunct
// in `storeStep`.
nodeHasOperand(node1, fa1.getObjectAddressOperand(), 1) and
exists(
FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2,
int indirectionIndex1
|
nodeHasOperand(node2, operand, (indirectionIndex1 - 1) + indirectionIndex2) and
nodeHasOperand(node1, fa1.getObjectAddressOperand(), indirectionIndex1) and
numberOfLoadsFromOperand(fa1, operand, numberOfLoads, _)
|
exists(FieldContent fc | fc = c |
fc.getField() = fa1.getField() and
fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads
fc.getIndirectionIndex() = (indirectionIndex1 - 1) + indirectionIndex2 + numberOfLoads
)
or
exists(UnionContent uc | uc = c |
uc.getAField() = fa1.getField() and
uc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads
uc.getIndirectionIndex() = (indirectionIndex1 - 1) + indirectionIndex2 + numberOfLoads
)
)
}
Expand Down
61 changes: 45 additions & 16 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ private newtype TIRDataFlowNode =
} or
TPostFieldUpdateNode(FieldAddress operand, int indirectionIndex) {
indirectionIndex =
[1 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType())]
[0 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType())]
} or
TSsaPhiNode(Ssa::PhiNode phi) or
TIndirectArgumentOutNode(ArgumentOperand operand, int indirectionIndex) {
Expand Down Expand Up @@ -437,7 +437,12 @@ class Node extends TIRDataFlowNode {
* `x.set(taint())` is a partial definition of `x`, and `transfer(&x, taint())` is
* a partial definition of `&x`).
*/
Expr asPartialDefinition() { result = this.(PartialDefinitionNode).getDefinedExpr() }
Expr asPartialDefinition() {
exists(PartialDefinitionNode pdn | this = pdn |
pdn.getIndirectionIndex() > 0 and
result = pdn.getDefinedExpr()
)
}

/**
* Gets an upper bound on the type of this node.
Expand Down Expand Up @@ -569,11 +574,15 @@ class PostFieldUpdateNode extends TPostFieldUpdateNode, PartialDefinitionNode {

Field getUpdatedField() { result = fieldAddress.getField() }

int getIndirectionIndex() { result = indirectionIndex }
override int getIndirectionIndex() { result = indirectionIndex }

override Node getPreUpdateNode() {
indirectionIndex > 0 and
hasOperandAndIndex(result, pragma[only_bind_into](fieldAddress).getObjectAddressOperand(),
indirectionIndex)
or
indirectionIndex = 0 and
result.asOperand() = fieldAddress
}

override Expr getDefinedExpr() {
Expand Down Expand Up @@ -822,7 +831,7 @@ class IndirectArgumentOutNode extends Node, TIndirectArgumentOutNode, PartialDef

IndirectArgumentOutNode() { this = TIndirectArgumentOutNode(operand, indirectionIndex) }

int getIndirectionIndex() { result = indirectionIndex }
override int getIndirectionIndex() { result = indirectionIndex }

int getArgumentIndex() {
exists(CallInstruction call | call.getArgumentOperand(result) = operand)
Expand All @@ -838,15 +847,31 @@ class IndirectArgumentOutNode extends Node, TIndirectArgumentOutNode, PartialDef

override Declaration getFunction() { result = this.getCallInstruction().getEnclosingFunction() }

override Node getPreUpdateNode() { hasOperandAndIndex(result, operand, indirectionIndex) }
override Node getPreUpdateNode() {
hasOperandAndIndex(result, operand, indirectionIndex)
or
indirectionIndex = 0 and
result.asOperand() = operand
}

override string toStringImpl() {
// This string should be unique enough to be helpful but common enough to
// avoid storing too many different strings.
result = this.getStaticCallTarget().getName() + " output argument"
indirectionIndex > 0 and
(
// This string should be unique enough to be helpful but common enough to
// avoid storing too many different strings.
result = this.getStaticCallTarget().getName() + " output argument"
or
not exists(this.getStaticCallTarget()) and
result = "output argument"
)
or
not exists(this.getStaticCallTarget()) and
result = "output argument"
indirectionIndex = 0 and
(
result = this.getStaticCallTarget().getName() + " output argument pointer"
or
not exists(this.getStaticCallTarget()) and
result = "output argument pointer"
)
}

override Location getLocationImpl() { result = operand.getLocation() }
Expand Down Expand Up @@ -1708,6 +1733,8 @@ abstract class PostUpdateNode extends Node {
* ```
*/
abstract private class PartialDefinitionNode extends PostUpdateNode {
abstract int getIndirectionIndex();

Check warning on line 1736 in cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

View workflow job for this annotation

GitHub Actions / qldoc

Missing QLdoc for member-predicate DataFlowUtil::PartialDefinitionNode::getIndirectionIndex/0

abstract Expr getDefinedExpr();
}

Expand All @@ -1722,6 +1749,8 @@ abstract private class PartialDefinitionNode extends PostUpdateNode {
* `getVariableAccess()` equal to `x`.
*/
class DefinitionByReferenceNode extends IndirectArgumentOutNode {
DefinitionByReferenceNode() { this.getIndirectionIndex() > 0 }

/** Gets the unconverted argument corresponding to this node. */
Expr getArgument() { result = this.getAddressOperand().getDef().getUnconvertedResultExpression() }

Expand Down Expand Up @@ -2192,7 +2221,7 @@ private Field getAFieldWithSize(Union u, int bytes) {
cached
private newtype TContent =
TFieldContent(Field f, int indirectionIndex) {
indirectionIndex = [1 .. Ssa::getMaxIndirectionsForType(f.getUnspecifiedType())] and
indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(f.getUnspecifiedType())] and
// Reads and writes of union fields are tracked using `UnionContent`.
not f.getDeclaringType() instanceof Union
} or
Expand All @@ -2203,7 +2232,7 @@ private newtype TContent =
// We key `UnionContent` by the union instead of its fields since a write to one
// field can be read by any read of the union's fields.
indirectionIndex =
[1 .. max(Ssa::getMaxIndirectionsForType(getAFieldWithSize(u, bytes).getUnspecifiedType()))]
[0 .. max(Ssa::getMaxIndirectionsForType(getAFieldWithSize(u, bytes).getUnspecifiedType()))]
)
}

Expand Down Expand Up @@ -2243,9 +2272,9 @@ class FieldContent extends Content, TFieldContent {
FieldContent() { this = TFieldContent(f, indirectionIndex) }

override string toString() {
indirectionIndex = 1 and result = f.toString()
indirectionIndex = 0 and result = f.toString()
or
indirectionIndex > 1 and result = f.toString() + " indirection"
indirectionIndex > 0 and result = f.toString() + " indirection"
}

Field getField() { result = f }
Expand Down Expand Up @@ -2276,9 +2305,9 @@ class UnionContent extends Content, TUnionContent {
UnionContent() { this = TUnionContent(u, bytes, indirectionIndex) }

override string toString() {
indirectionIndex = 1 and result = u.toString()
indirectionIndex = 0 and result = u.toString()
or
indirectionIndex > 1 and result = u.toString() + " indirection"
indirectionIndex > 0 and result = u.toString() + " indirection"
}

/** Gets a field of the underlying union of this `UnionContent`, if any. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ class BaseCallVariable extends AbstractBaseSourceVariable, TBaseCallVariable {
*/
predicate isModifiableByCall(ArgumentOperand operand, int indirectionIndex) {
exists(CallInstruction call, int index, CppType type |
indirectionIndex = [1 .. countIndirectionsForCppType(type)] and
indirectionIndex = [0 .. countIndirectionsForCppType(type)] and
type = getLanguageType(operand) and
call.getArgumentOperand(index) = operand and
if index = -1
Expand Down Expand Up @@ -459,34 +459,49 @@ predicate isModifiableByCall(ArgumentOperand operand, int indirectionIndex) {
)
}

/**
* Holds if `t` is a pointer or reference type that supports at least `indirectionIndex` number
* of indirections, and the `indirectionIndex` indirection cannot be modfiied by passing a
* value of `t` to a function.
*/
private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) {
bindingset[indirectionIndex]
pragma[inline_late]
private predicate isModifiableAtImplImpl(CppType cppType, int indirectionIndex) {
exists(Type pointerType, Type base, Type t |
pointerType = t.getUnderlyingType() and
pointerType = any(Indirection ind).getUnderlyingType() and
cppType.hasType(t, _) and
base = getTypeImpl(pointerType, indirectionIndex)
|
// The value cannot be modified if it has a const specifier,
not base.isConst()
or
// but in the case of a class type, it may be the case that
// one of the members was modified.
exists(base.stripType().(Cpp::Class).getAField())
)
}

private predicate isModifiableAtImplAtLeast1(CppType cppType, int indirectionIndex) {
indirectionIndex = [1 .. countIndirectionsForCppType(cppType)] and
(
exists(Type pointerType, Type base, Type t |
pointerType = t.getUnderlyingType() and
pointerType = any(Indirection ind).getUnderlyingType() and
cppType.hasType(t, _) and
base = getTypeImpl(pointerType, indirectionIndex)
|
// The value cannot be modified if it has a const specifier,
not base.isConst()
or
// but in the case of a class type, it may be the case that
// one of the members was modified.
exists(base.stripType().(Cpp::Class).getAField())
)
isModifiableAtImplImpl(cppType, indirectionIndex)
or
// If the `indirectionIndex`'th dereference of a type can be modified
// then so can the `indirectionIndex + 1`'th dereference.
isModifiableAtImpl(cppType, indirectionIndex - 1)
isModifiableAtImplAtLeast1(cppType, indirectionIndex - 1)
)
}

private predicate isModifiableAtImplAt0(CppType cppType) { isModifiableAtImplImpl(cppType, 0) }

/**
* Holds if `t` is a pointer or reference type that supports at least `indirectionIndex` number
* of indirections, and the `indirectionIndex` indirection cannot be modfiied by passing a
* value of `t` to a function.
*/
private predicate isModifiableAtImpl(CppType cppType, int indirectionIndex) {
isModifiableAtImplAtLeast1(cppType, indirectionIndex)
or
indirectionIndex = 0 and
isModifiableAtImplAt0(cppType)
}

/**
* Holds if `t` is a type with at least `indirectionIndex` number of indirections,
* and the `indirectionIndex` indirection can be modified by passing a value of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ uniquePostUpdate
| example.c:24:13:24:18 | coords indirection | Node has multiple PostUpdateNodes. |
postIsInSameCallable
reverseRead
| self_argument_flow.cpp:10:13:10:16 | info | Origin of readStep is missing a PostUpdateNode. |
argHasPostUpdate
postWithInFlow
| test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@ uniqueType
uniqueNodeLocation
missingLocation
uniqueNodeToString
| complex.cpp:22:11:22:17 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:25:7:25:7 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:42:16:42:16 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:42:16:42:16 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:43:16:43:16 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:43:16:43:16 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:53:12:53:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:53:12:53:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:54:12:54:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:54:12:54:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:55:12:55:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:55:12:55:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:56:12:56:12 | (no string representation) | Node should have one toString but has 0. |
| complex.cpp:56:12:56:12 | (no string representation) | Node should have one toString but has 0. |
parameterCallable
localFlowIsLocal
readStepIsLocal
Expand All @@ -13,6 +27,20 @@ unreachableNodeCCtx
localCallNodes
postIsNotPre
postHasUniquePre
| complex.cpp:22:11:22:17 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:25:7:25:7 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:42:16:42:16 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:42:16:42:16 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:43:16:43:16 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:43:16:43:16 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:53:12:53:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:53:12:53:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:54:12:54:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:54:12:54:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:55:12:55:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:55:12:55:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:56:12:56:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
| complex.cpp:56:12:56:12 | (no string representation) | PostUpdateNode should have one pre-update node but has 0. |
uniquePostUpdate
| aliasing.cpp:70:11:70:11 | definition of w indirection | Node has multiple PostUpdateNodes. |
| aliasing.cpp:77:11:77:11 | definition of w indirection | Node has multiple PostUpdateNodes. |
Expand All @@ -30,17 +58,50 @@ uniquePostUpdate
| clearning.cpp:165:3:165:3 | s indirection | Node has multiple PostUpdateNodes. |
| clearning.cpp:172:3:172:3 | s indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:22:3:22:5 | this indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:22:11:22:17 | constructor init of field f | Node has multiple PostUpdateNodes. |
| complex.cpp:25:7:25:7 | constructor init of field inner | Node has multiple PostUpdateNodes. |
| complex.cpp:25:7:25:7 | this indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:42:10:42:14 | inner indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:42:16:42:16 | f | Node has multiple PostUpdateNodes. |
| complex.cpp:43:10:43:14 | inner indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:43:16:43:16 | f | Node has multiple PostUpdateNodes. |
| complex.cpp:53:6:53:10 | inner indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:53:12:53:12 | f | Node has multiple PostUpdateNodes. |
| complex.cpp:54:6:54:10 | inner indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:54:12:54:12 | f | Node has multiple PostUpdateNodes. |
| complex.cpp:55:6:55:10 | inner indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:55:12:55:12 | f | Node has multiple PostUpdateNodes. |
| complex.cpp:56:6:56:10 | inner indirection | Node has multiple PostUpdateNodes. |
| complex.cpp:56:12:56:12 | f | Node has multiple PostUpdateNodes. |
| struct_init.c:26:16:26:20 | definition of outer indirection | Node has multiple PostUpdateNodes. |
| struct_init.c:41:16:41:20 | definition of outer indirection | Node has multiple PostUpdateNodes. |
postIsInSameCallable
reverseRead
| aliasing.cpp:111:16:111:16 | s | Origin of readStep is missing a PostUpdateNode. |
| aliasing.cpp:147:16:147:19 | access to array | Origin of readStep is missing a PostUpdateNode. |
| aliasing.cpp:158:15:158:15 | s | Origin of readStep is missing a PostUpdateNode. |
| aliasing.cpp:164:15:164:15 | s | Origin of readStep is missing a PostUpdateNode. |
| arrays.cpp:42:5:42:12 | indirect | Origin of readStep is missing a PostUpdateNode. |
| arrays.cpp:43:10:43:17 | indirect | Origin of readStep is missing a PostUpdateNode. |
| arrays.cpp:44:10:44:17 | indirect | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:102:22:102:26 | outer | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:104:16:104:20 | outer | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:106:22:106:27 | pouter | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:108:16:108:21 | pouter | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:122:21:122:25 | outer | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:124:15:124:19 | outer | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:126:21:126:26 | pouter | Origin of readStep is missing a PostUpdateNode. |
| by_reference.cpp:128:15:128:20 | pouter | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:49:9:49:11 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:53:9:53:11 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:54:16:54:18 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:55:12:55:14 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:57:88:57:90 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:60:21:60:23 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:60:55:60:57 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:61:21:61:23 | foo | Origin of readStep is missing a PostUpdateNode. |
| realistic.cpp:65:21:65:23 | foo | Origin of readStep is missing a PostUpdateNode. |
| struct_init.c:36:11:36:15 | outer | Origin of readStep is missing a PostUpdateNode. |
argHasPostUpdate
postWithInFlow
| realistic.cpp:54:16:54:47 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
Expand Down
Loading

0 comments on commit f0b671e

Please sign in to comment.