Skip to content

Commit

Permalink
Do not resolve unqualified symbols in current record without `HasImpl…
Browse files Browse the repository at this point in the history
…icitReceiver` (#1984)

* Introduce `HasExplicitMemberAccess` trait

Fixes #1983

* Added trait to GoLanguage as well to be safe

* Added more documentation

* Spotless

* Addressed code review

* Using existing trait

* added more tests

* Fixed the tests as good as they can be fixed in this PR

* a -> globalA

* Fixed another bug where field declarations were declared twice

* Update cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt

Co-authored-by: Maximilian Kaul <[email protected]>

* Better documentation

---------

Co-authored-by: Maximilian Kaul <[email protected]>
  • Loading branch information
oxisto and maximiliankaul authored Jan 29, 2025
1 parent 167a54d commit c72becd
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,13 @@ class ScopeManager : ScopeProvider {
scope: Scope? = node.scope,
predicate: ((Declaration) -> Boolean)? = null,
): List<Declaration> {
return lookupSymbolByName(node.name, node.language, node.location, scope, predicate)
return lookupSymbolByName(
node.name,
node.language,
node.location,
scope,
predicate = predicate,
)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,14 @@ interface HasSuperClasses : LanguageTrait {

/**
* A language trait, that specifies that this language has support for implicit receiver, e.g., that
* one can omit references to a base such as `this`.
* one can omit references to a base such as `this`. Common examples are C++ and Java.
*
* This is contrast to languages such as Python and Go where the name of the receiver such as `self`
* is always required to access a field or method.
*
* We need this information to make a decision which symbols or scopes to consider when doing an
* unqualified lookup of a symbol in [Scope.lookupSymbol]. More specifically, we need to skip the
* symbols of a [RecordScope] if the language does NOT have this trait.
*/
interface HasImplicitReceiver : LanguageTrait {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import de.fraunhofer.aisec.cpg.TranslationResult
import de.fraunhofer.aisec.cpg.graph.declarations.*
import de.fraunhofer.aisec.cpg.graph.edges.Edge
import de.fraunhofer.aisec.cpg.graph.edges.flows.*
import de.fraunhofer.aisec.cpg.graph.scopes.Scope
import de.fraunhofer.aisec.cpg.graph.statements.*
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Block
Expand Down Expand Up @@ -1062,10 +1063,8 @@ val Node?.assigns: List<AssignExpression>

/**
* This function tries to find the first parent node that satisfies the condition specified in
* [predicate]. It starts searching in the [searchNode], moving up-wards using the [Node.astParent]
* attribute.
* [predicate]. It starts searching in [this], moving upwards using the [Node.astParent] attribute.
*
* @param searchNode the child node that we start the search from
* @param predicate the search predicate
*/
fun Node.firstParentOrNull(predicate: (Node) -> Boolean): Node? {
Expand All @@ -1085,6 +1084,26 @@ fun Node.firstParentOrNull(predicate: (Node) -> Boolean): Node? {
return null
}

/**
* This function returns the first parent scope that matches the given [predicate]. If no parent
* scope matches the predicate, null is returned.
*
* @param predicate The predicate to match the parent scope against
*/
fun Scope.firstScopeParentOrNull(predicate: (Scope) -> Boolean): Scope? {
var scope = this.parent
while (scope != null) {
if (predicate(scope)) {
return scope
}

// go upwards in the scope tree
scope = scope.parent
}

return null
}

/**
* Return all [ProblemNode] children in this graph (either stored directly or in
* [Node.additionalProblems]), starting with this [Node].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@
package de.fraunhofer.aisec.cpg.graph.scopes

import com.fasterxml.jackson.annotation.JsonBackReference
import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver
import de.fraunhofer.aisec.cpg.frontends.Language
import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.graph.Node.Companion.TO_STRING_STYLE
import de.fraunhofer.aisec.cpg.graph.declarations.Declaration
import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration
import de.fraunhofer.aisec.cpg.graph.firstScopeParentOrNull
import de.fraunhofer.aisec.cpg.graph.statements.LabelStatement
import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference
Expand Down Expand Up @@ -112,6 +113,10 @@ sealed class Scope(
* current scope. This behaviour can be turned off with [thisScopeOnly]. This is useful for
* qualified lookups, where we want to stay in our lookup-scope.
*
* We need to consider the language trait [HasImplicitReceiver] here as well. If the language
* requires explicit member access, we must not consider symbols from record scopes unless we
* are in a qualified lookup.
*
* @param symbol the symbol to lookup
* @param thisScopeOnly whether we should stay in the current scope for lookup or traverse to
* its parents if no match was found.
Expand Down Expand Up @@ -166,11 +171,19 @@ sealed class Scope(

// If we do not have a hit, we can go up one scope, unless thisScopeOnly is set to true
// (or we had a modified scope)
if (thisScopeOnly || modifiedScoped != null) {
break
} else {
scope = scope.parent
}
scope =
if (thisScopeOnly || modifiedScoped != null) {
break
} else {
// If our language needs explicit lookup for fields (and other class members),
// we need to skip record scopes unless we are in a qualified lookup
if (languageOnly !is HasImplicitReceiver && scope.parent is RecordScope) {
scope.firstScopeParentOrNull { it !is RecordScope }
} else {
// Otherwise, we can just go to the next parent
scope.parent
}
}
}

return list ?: listOf()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import kotlin.reflect.KClass
* a specific one.
*/
open class TestLanguage(final override var namespaceDelimiter: String = "::") :
Language<TestLanguageFrontend>() {
Language<TestLanguageFrontend>(), HasImplicitReceiver {
override val fileExtensions: List<String> = listOf()
override val frontend: KClass<out TestLanguageFrontend> = TestLanguageFrontend::class
override val compoundAssignmentOperators =
Expand All @@ -60,6 +60,8 @@ open class TestLanguage(final override var namespaceDelimiter: String = "::") :
"double" to FloatingPointType("double", 64, this, NumericType.Modifier.SIGNED),
"string" to StringType("string", this),
)
override val receiverName: String
get() = "this"
}

class StructTestLanguage(namespaceDelimiter: String = "::") :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@ import de.fraunhofer.aisec.cpg.TranslationConfiguration
import de.fraunhofer.aisec.cpg.TranslationManager
import de.fraunhofer.aisec.cpg.TranslationResult
import de.fraunhofer.aisec.cpg.frontends.CompilationDatabase
import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.ContextProvider
import de.fraunhofer.aisec.cpg.graph.LanguageProvider
import de.fraunhofer.aisec.cpg.graph.Node
import de.fraunhofer.aisec.cpg.graph.declarations.Declaration
import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration
import de.fraunhofer.aisec.cpg.graph.get
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.types.Type
import de.fraunhofer.aisec.cpg.test.TestUtils.ENFORCE_MEMBER_EXPRESSION
Expand All @@ -43,16 +46,7 @@ import java.nio.file.Path
import java.util.function.Consumer
import java.util.function.Predicate
import java.util.stream.Collectors
import kotlin.collections.filter
import kotlin.collections.first
import kotlin.collections.firstOrNull
import kotlin.collections.flatMap
import kotlin.collections.isNotEmpty
import kotlin.collections.joinToString
import kotlin.jvm.Throws
import kotlin.test.*
import kotlin.text.endsWith
import kotlin.toString

object TestUtils {

Expand Down Expand Up @@ -237,6 +231,15 @@ fun assertRefersTo(expression: Expression?, b: Declaration?) {
}
}

/** Asserts, that the expression given in [expression] does not refer to the declaration [b]. */
fun assertNotRefersTo(expression: Expression?, b: Declaration?) {
if (expression is Reference) {
assertNotEquals(b, (expression as Reference?)?.refersTo)
} else {
fail("not a reference")
}
}

/**
* Asserts, that the call expression given in [call] refers to the expected function declaration
* [func].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,12 @@ class StatementHandler(frontend: PythonLanguageFrontend) :

for (s in stmt.body) {
when (s) {
is Python.AST.FunctionDef -> handleFunctionDef(s, cls)
is Python.AST.FunctionDef -> {
val stmt = handleFunctionDef(s, cls)
// We need to manually set the astParent because we are not assigning it to our
// statements and therefore are not triggering our automagic parent setter
stmt.astParent = cls
}
else -> cls.statements += handleNode(s)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,22 +100,33 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx), L
}

// Look for a potential scope modifier for this reference
// lookupScope
var targetScope =
scopeManager.currentScope?.predefinedLookupScopes[ref.name.toString()]?.targetScope

// There are a couple of things to consider now
// Try to see whether our symbol already exists. There are basically three rules to follow
// here.
var symbol =
// Since this is a WRITE access, we need
// - to look for a local symbol, unless
// - a global keyword is present for this symbol and scope
if (targetScope != null) {
scopeManager.lookupSymbolByNodeName(ref, targetScope)
} else {
scopeManager.lookupSymbolByNodeName(ref) { it.scope == scopeManager.currentScope }
when {
// When a target scope is set, then we have a `global` or `nonlocal` keyword for
// this symbol, and we need to start looking in this scope
targetScope != null -> scopeManager.lookupSymbolByNodeName(ref, targetScope)
// When we have a qualified reference (such as `self.a`), we do not have any
// specific restrictions, because the lookup will anyway be a qualified lookup,
// and it will consider only the scope of `self`.
ref.name.isQualified() -> scopeManager.lookupSymbolByNodeName(ref)
// In any other case, we need to restrict the lookup to the current scope. The
// main reason for this is that Python requires the `global` keyword in functions
// for assigning to global variables. See
// https://docs.python.org/3/reference/simple_stmts.html#the-global-statement. So
// basically we need to ignore all global variables at this point and only look
// for local ones.
else ->
scopeManager.lookupSymbolByNodeName(ref) {
it.scope == scopeManager.currentScope
}
}

// Nothing to create
// If the symbol is already defined in the designed scope, there is nothing to create
if (symbol.isNotEmpty()) return null

// First, check if we need to create a field
Expand All @@ -137,7 +148,7 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx), L
scopeManager.isInRecord && !scopeManager.isInFunction -> {
// We end up here for fields declared directly in the class body. These are
// class attributes; more or less static fields.
newFieldDeclaration(ref.name)
newFieldDeclaration(scopeManager.currentNamespace.fqn(ref.name.localName))
}
else -> {
null
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (c) 2025, Fraunhofer AISEC. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $$$$$$\ $$$$$$$\ $$$$$$\
* $$ __$$\ $$ __$$\ $$ __$$\
* $$ / \__|$$ | $$ |$$ / \__|
* $$ | $$$$$$$ |$$ |$$$$\
* $$ | $$ ____/ $$ |\_$$ |
* $$ | $$\ $$ | $$ | $$ |
* \$$$$$ |$$ | \$$$$$ |
* \______/ \__| \______/
*
*/
package de.fraunhofer.aisec.cpg.frontends.python

import de.fraunhofer.aisec.cpg.graph.*
import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration
import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression
import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression
import de.fraunhofer.aisec.cpg.test.analyze
import de.fraunhofer.aisec.cpg.test.assertNotRefersTo
import de.fraunhofer.aisec.cpg.test.assertRefersTo
import java.io.File
import kotlin.test.*

class SymbolResolverTest {
@Test
fun testFields() {
val topLevel = File("src/test/resources/python/fields.py")
val result =
analyze(listOf(topLevel), topLevel.toPath(), true) {
it.registerLanguage<PythonLanguage>()
}

val globalA =
result.namespaces["fields"]
.variables[{ it.name.localName == "a" && it !is FieldDeclaration }]
assertNotNull(globalA)

// Make sure, we only have one (!) field a
val fieldsA = result.records["MyClass"]?.fields("a")
val fieldA = fieldsA?.singleOrNull()
assertNotNull(fieldA)

val aRefs = result.refs("a")
aRefs.filterIsInstance<MemberExpression>().forEach { assertRefersTo(it, fieldA) }
aRefs.filter { it !is MemberExpression }.forEach { assertRefersTo(it, globalA) }

// We should only have one reference to "os" -> the member expression "self.os"
val osRefs = result.refs("os")
assertEquals(1, osRefs.size)
assertIs<MemberExpression>(osRefs.singleOrNull())

// "os.name" is not a member expression but a reference to the field "name" of the "os"
// module, therefore it is a reference
val osNameRefs = result.refs("os.name")
assertEquals(1, osNameRefs.size)
assertIsNot<MemberExpression>(osNameRefs.singleOrNull())

// Same tests but for fields declared at the record level.
// A variable "declared" inside a class is considered a field in Python.
val fieldCopyA = result.records["MyClass"]?.fields["copyA"]
assertIs<FieldDeclaration>(fieldCopyA)
val baz = result.records["MyClass"]?.methods["baz"]
assertIs<MethodDeclaration>(baz)
val bazPrint = baz.calls("print").singleOrNull()
assertIs<CallExpression>(bazPrint)
val bazPrintArgument = bazPrint.arguments.firstOrNull()
assertRefersTo(bazPrintArgument, fieldCopyA)

// make sure, that this does not work without the receiver
val bazDoesNotWork = baz.calls("doesNotWork").singleOrNull()
assertIs<CallExpression>(bazDoesNotWork)
val bazDoesNotWorkArgument = bazDoesNotWork.arguments.firstOrNull()
assertNotNull(bazDoesNotWorkArgument)
assertNotRefersTo(bazDoesNotWorkArgument, fieldCopyA)
}
}
23 changes: 23 additions & 0 deletions cpg-language-python/src/test/resources/python/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os

a = "Hello"

class MyClass:
copyA = 1
def foo(self):
self.a = 1
print(a)

def bar(self):
self.os = 1
print(os.name)
self.a = 1

def baz(self):
print(self.copyA)
doesNotWork(copyA) # not defined

m = MyClass()
m.foo()
m.bar()
m.baz()

0 comments on commit c72becd

Please sign in to comment.