Skip to content

Commit

Permalink
Resolve cast ambiguity in C++ frontend (#1551)
Browse files Browse the repository at this point in the history
  • Loading branch information
oxisto authored May 6, 2024
1 parent 11bccdc commit 928726a
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ private constructor(
/** Enables or disables the inference system as a whole. */
val enabled: Boolean,

/** Enables smart guessing of cast vs. call expressions in the C/C++ language frontend. */
val guessCastExpressions: Boolean,

/** Enables the inference of record declarations. */
val inferRecords: Boolean,

Expand All @@ -57,14 +54,11 @@ private constructor(
) {
class Builder(
private var enabled: Boolean = true,
private var guessCastExpressions: Boolean = true,
private var inferRecords: Boolean = true,
private var inferFunctions: Boolean = true,
private var inferVariables: Boolean = true,
private var inferDfgForUnresolvedCalls: Boolean = true
) {
fun guessCastExpressions(guess: Boolean) = apply { this.guessCastExpressions = guess }

fun enabled(infer: Boolean) = apply { this.enabled = infer }

fun inferRecords(infer: Boolean) = apply { this.inferRecords = infer }
Expand All @@ -80,7 +74,6 @@ private constructor(
fun build() =
InferenceConfiguration(
enabled,
guessCastExpressions,
inferRecords,
inferFunctions,
inferVariables,
Expand All @@ -97,7 +90,6 @@ private constructor(

override fun toString(): String {
return ToStringBuilder(this, ToStringStyle.JSON_STYLE)
.append("guessCastExpressions", guessCastExpressions)
.append("inferRecords", inferRecords)
.append("inferDfgForUnresolvedCalls", inferDfgForUnresolvedSymbols)
.toString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class TypeManager {
}

fun typeExists(name: String): Boolean {
return firstOrderTypes.stream().anyMatch { type: Type -> type.root.name.toString() == name }
return firstOrderTypes.any { type: Type -> type.root.name.toString() == name }
}

fun resolvePossibleTypedef(alias: Type, scopeManager: ScopeManager): Type {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.CastExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConstructExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.UnaryOperator
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.types.Type
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker
import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn
Expand Down Expand Up @@ -133,22 +130,27 @@ fun SubgraphWalker.ScopedWalker.replaceCallWithCast(
cast.expression = call.arguments.single()
cast.name = cast.castType.name

replaceArgument(parent, call, cast)
}

context(ContextProvider)
fun SubgraphWalker.ScopedWalker.replaceArgument(parent: Node?, old: Expression, new: Expression) {
if (parent !is ArgumentHolder) {
Pass.log.error(
"Parent AST node of call expression is not an argument holder. Cannot convert to cast expression. Further analysis might not be entirely accurate."
)
return
}

val success = parent.replaceArgument(call, cast)
val success = parent.replaceArgument(old, new)
if (!success) {
Pass.log.error(
"Replacing call expression with cast expression was not successful. Further analysis might not be entirely accurate."
"Replacing expression $old was not successful. Further analysis might not be entirely accurate."
)
} else {
call.disconnectFromGraph()
old.disconnectFromGraph()

// Make sure to inform the walker about our change
this.registerReplacement(call, cast)
this.registerReplacement(old, new)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -383,24 +383,20 @@ class ExpressionHandler(lang: CXXLanguageFrontend) :
IASTUnaryExpression.op_not -> operatorCode = "!"
IASTUnaryExpression.op_sizeof -> operatorCode = "sizeof"
IASTUnaryExpression.op_bracketedPrimary -> {
if (
frontend.config.inferenceConfiguration.guessCastExpressions &&
ctx.operand is IASTIdExpression
) {
// this can either be just a meaningless bracket or it can be a cast expression
val typeName = (ctx.operand as IASTIdExpression).name.toString()
if (frontend.typeManager.typeExists(typeName)) {
val cast = newCastExpression(rawNode = ctx)
cast.setCastOperator(0)
cast.castType = frontend.typeOf((ctx.operand as IASTIdExpression).name)
// The expression member can only be filled by the parent call
// (handleFunctionCallExpression and handleBinaryExpression)
cast.location = frontend.locationOf(ctx)
return cast
// If this expression is NOT part of a call expression and contains a "name" or
// something similar, we want to keep the information that this is an expression
// wrapped in parentheses. The best way to do this is to create a unary expression
if (ctx.operand is IASTIdExpression && ctx.parent !is IASTFunctionCallExpression) {
val op = newUnaryOperator("()", postfix = true, prefix = true, rawNode = ctx)
if (input != null) {
op.input = input
}
return op
}

// otherwise, ignore this kind of expression and return the input directly
// In all other cases, e.g., if the parenthesis is nested or part of a function
// call, we just return the unwrapped expression, because in this case we do not
// need to information about the parenthesis.
return input as Expression
}
IASTUnaryExpression.op_throw -> operatorCode = "throw"
Expand Down Expand Up @@ -476,14 +472,6 @@ class ExpressionHandler(lang: CXXLanguageFrontend) :
)
.forEach { callExpression.addTemplateParameter(it) }
}
reference is CastExpression &&
frontend.config.inferenceConfiguration.guessCastExpressions -> {
// this really is a cast expression in disguise
reference.expression =
ctx.arguments.firstOrNull()?.let { handle(it) }
?: newProblemExpression("could not parse argument for cast")
return reference
}
else -> {
callExpression =
newCallExpression(reference, reference?.name, template = false, rawNode = ctx)
Expand Down Expand Up @@ -577,21 +565,6 @@ class ExpressionHandler(lang: CXXLanguageFrontend) :
handle(ctx.initOperand2)
} ?: newProblemExpression("could not parse rhs")

if (
lhs is CastExpression &&
(language as? CLanguage)
?.unaryOperators
?.contains((binaryOperator.operatorCode ?: "")) == true &&
frontend.config.inferenceConfiguration.guessCastExpressions
) {
// this really is a combination of a cast and a unary operator
val op = newUnaryOperator(operatorCode, postfix = true, prefix = false, rawNode = ctx)
op.input = rhs
op.location = frontend.locationOf(ctx.operand2)
lhs.expression = op
return lhs
}

binaryOperator.lhs = lhs
binaryOperator.rhs = rhs

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,13 @@
package de.fraunhofer.aisec.cpg.passes

import de.fraunhofer.aisec.cpg.TranslationContext
import de.fraunhofer.aisec.cpg.graph.Component
import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.frontends.cxx.CLanguage
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration
import de.fraunhofer.aisec.cpg.graph.implicit
import de.fraunhofer.aisec.cpg.graph.newConstructExpression
import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope
import de.fraunhofer.aisec.cpg.graph.scopes.ValueDeclarationScope
import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConstructExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker
import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn
Expand All @@ -52,17 +49,104 @@ import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore
@DependsOn(TypeResolver::class)
class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) {

private lateinit var walker: SubgraphWalker.ScopedWalker

override fun accept(component: Component) {
val walker = SubgraphWalker.ScopedWalker(ctx.scopeManager)
walker = SubgraphWalker.ScopedWalker(ctx.scopeManager)

walker.registerHandler(::fixInitializers)
walker.registerHandler { _, parent, node ->
when (node) {
is UnaryOperator -> removeBracketOperators(node, parent)
is BinaryOperator -> convertOperators(node, parent)
}
}
walker.registerHandler(::connectDefinitions)

for (tu in component.translationUnits) {
walker.iterate(tu)
}
}

/**
* In the frontend, we keep parenthesis around some expressions, so we can decide whether they
* are [CastExpression] nodes or just simply brackets with no syntactic value. The
* [CastExpression] conversion is done in [convertOperators], but in this function we are trying
* to get rid of those ()-unary operators that are meaningless, in order to reduce clutter to
* the graph.
*/
private fun removeBracketOperators(node: UnaryOperator, parent: Node?) {
if (node.operatorCode == "()" && !typeManager.typeExists(node.input.name.toString())) {
// It was really just parenthesis around an identifier, but we can only make this
// distinction now.
//
// In theory, we could just keep this meaningless unary expression, but it in order
// to reduce nodes, we unwrap the reference and exchange it in the arguments of the
// binary op
walker.replaceArgument(parent, node, node.input)
}
}

/**
* In C++ there is an ambiguity between the combination of a cast + unary operator or a binary
* operator where some arguments are wrapped in parentheses. This function tries to resolve
* this.
*
* Note: This is done especially for the C++ frontend. [ReplaceCallCastPass.handleCall] handles
* the more general case (which also applies to C++), in which a cast and a call are
* indistinguishable and need to be resolved once all types are known.
*/
private fun convertOperators(binOp: BinaryOperator, parent: Node?) {
val fakeUnaryOp = binOp.lhs
val language = fakeUnaryOp.language as? CLanguage
// We need to check, if the expression in parentheses is really referring to a type or
// not. A common example is code like `(long) &addr`. We could end up parsing this as a
// binary operator with the left-hand side of `(long)`, an operator code `&` and a rhs
// of `addr`.
if (
language != null &&
fakeUnaryOp is UnaryOperator &&
fakeUnaryOp.operatorCode == "()" &&
typeManager.typeExists(fakeUnaryOp.input.name.toString())
) {
// If the name (`long` in the example) is a type, then the unary operator (`(long)`)
// is really a cast and our binary operator is really a unary operator `&addr`.
// We need to perform the following steps:
// * create a cast expression out of the ()-unary operator, with the type that is
// referred to in the op.
val cast = newCastExpression().codeAndLocationFrom(fakeUnaryOp)
cast.language = language
cast.castType = language.objectType(fakeUnaryOp.input.name)

// * create a unary operator with the rhs of the binary operator (and the same
// operator code).
// * in the unlikely case that the binary operator cannot actually be used as a
// unary operator, we abort this. This should not happen, but we might never know
val opCode = binOp.operatorCode ?: ""
if (opCode !in language.unaryOperators) {
log.error(
"We tried to convert a binary operator into a unary operator, but the list of " +
"operator codes does not allow that. This is suspicious and the translation " +
"probably was incorrect"
)
return
}

val unaryOp =
newUnaryOperator(opCode, postfix = false, prefix = true)
.codeAndLocationFrom(binOp.rhs)
unaryOp.language = language
unaryOp.input = binOp.rhs

// * set the unary operator as the "expression" of the cast
cast.expression = unaryOp

// * replace the binary operator with the cast expression in the parent argument
// holder
walker.replaceArgument(parent, binOp, cast)
}
}

protected fun fixInitializers(node: Node?) {
if (node is VariableDeclaration) {
// check if we have the corresponding class for this type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1266,7 +1266,6 @@ internal class CXXLanguageFrontendTest : BaseTest() {
val file = File("src/test/resources/cxx/parenthesis.cpp")
val tu =
analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) {
it.inferenceConfiguration(builder().guessCastExpressions(true).build())
it.registerLanguage<CPPLanguage>()
}
val main = tu.functions["main"]
Expand Down
10 changes: 10 additions & 0 deletions cpg-language-cxx/src/test/resources/cxx/parenthesis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ int main() {
// this cast could be mistaken for a binary operation
int64_t addr = (int64_t) &count;

// this is a binary operation, even though it has parenthesis.
// The added parenthesis are just for more confusion
addr = ((addr)) &count;

// this is the same as in line 11, just with a different type
addr = (long) &count;

// this is a complex combination of cast and binary operation
addr = (long) &addr + 1;

// finally, a more complex example of unary operators and casts
char* outptr, key;
*(int64_t *)outptr = *(int64_t *)&key;
Expand Down

0 comments on commit 928726a

Please sign in to comment.