diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt index 50e105dfd8..ad4b11a93c 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt @@ -726,7 +726,13 @@ class ScopeManager : ScopeProvider { scope: Scope? = node.scope, predicate: ((Declaration) -> Boolean)? = null, ): List { - return lookupSymbolByName(node.name, node.language, node.location, scope, predicate) + return lookupSymbolByName( + node.name, + node.language, + node.location, + scope, + predicate = predicate, + ) } /** diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt index e319747073..c28bf7b2dd 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt @@ -125,7 +125,14 @@ interface HasSuperClasses : LanguageTrait { /** * A language trait, that specifies that this language has support for implicit receiver, e.g., that - * one can omit references to a base such as `this`. + * one can omit references to a base such as `this`. Common examples are C++ and Java. + * + * This is contrast to languages such as Python and Go where the name of the receiver such as `self` + * is always required to access a field or method. + * + * We need this information to make a decision which symbols or scopes to consider when doing an + * unqualified lookup of a symbol in [Scope.lookupSymbol]. More specifically, we need to skip the + * symbols of a [RecordScope] if the language does NOT have this trait. */ interface HasImplicitReceiver : LanguageTrait { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt index 365d6bcac3..6028ce8999 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt @@ -29,6 +29,7 @@ import de.fraunhofer.aisec.cpg.TranslationResult import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.edges.Edge import de.fraunhofer.aisec.cpg.graph.edges.flows.* +import de.fraunhofer.aisec.cpg.graph.scopes.Scope import de.fraunhofer.aisec.cpg.graph.statements.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.statements.expressions.Block @@ -1062,10 +1063,8 @@ val Node?.assigns: List /** * This function tries to find the first parent node that satisfies the condition specified in - * [predicate]. It starts searching in the [searchNode], moving up-wards using the [Node.astParent] - * attribute. + * [predicate]. It starts searching in [this], moving upwards using the [Node.astParent] attribute. * - * @param searchNode the child node that we start the search from * @param predicate the search predicate */ fun Node.firstParentOrNull(predicate: (Node) -> Boolean): Node? { @@ -1085,6 +1084,26 @@ fun Node.firstParentOrNull(predicate: (Node) -> Boolean): Node? { return null } +/** + * This function returns the first parent scope that matches the given [predicate]. If no parent + * scope matches the predicate, null is returned. + * + * @param predicate The predicate to match the parent scope against + */ +fun Scope.firstScopeParentOrNull(predicate: (Scope) -> Boolean): Scope? { + var scope = this.parent + while (scope != null) { + if (predicate(scope)) { + return scope + } + + // go upwards in the scope tree + scope = scope.parent + } + + return null +} + /** * Return all [ProblemNode] children in this graph (either stored directly or in * [Node.additionalProblems]), starting with this [Node]. diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt index 9af1b5a797..325e0bbb39 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/scopes/Scope.kt @@ -26,11 +26,12 @@ package de.fraunhofer.aisec.cpg.graph.scopes import com.fasterxml.jackson.annotation.JsonBackReference +import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.graph.Node -import de.fraunhofer.aisec.cpg.graph.Node.Companion.TO_STRING_STYLE import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.ImportDeclaration +import de.fraunhofer.aisec.cpg.graph.firstScopeParentOrNull import de.fraunhofer.aisec.cpg.graph.statements.LabelStatement import de.fraunhofer.aisec.cpg.graph.statements.LookupScopeStatement import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference @@ -112,6 +113,10 @@ sealed class Scope( * current scope. This behaviour can be turned off with [thisScopeOnly]. This is useful for * qualified lookups, where we want to stay in our lookup-scope. * + * We need to consider the language trait [HasImplicitReceiver] here as well. If the language + * requires explicit member access, we must not consider symbols from record scopes unless we + * are in a qualified lookup. + * * @param symbol the symbol to lookup * @param thisScopeOnly whether we should stay in the current scope for lookup or traverse to * its parents if no match was found. @@ -166,11 +171,19 @@ sealed class Scope( // If we do not have a hit, we can go up one scope, unless thisScopeOnly is set to true // (or we had a modified scope) - if (thisScopeOnly || modifiedScoped != null) { - break - } else { - scope = scope.parent - } + scope = + if (thisScopeOnly || modifiedScoped != null) { + break + } else { + // If our language needs explicit lookup for fields (and other class members), + // we need to skip record scopes unless we are in a qualified lookup + if (languageOnly !is HasImplicitReceiver && scope.parent is RecordScope) { + scope.firstScopeParentOrNull { it !is RecordScope } + } else { + // Otherwise, we can just go to the next parent + scope.parent + } + } } return list ?: listOf() diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt index 6d97a8decc..4c634c8c64 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt @@ -42,7 +42,7 @@ import kotlin.reflect.KClass * a specific one. */ open class TestLanguage(final override var namespaceDelimiter: String = "::") : - Language() { + Language(), HasImplicitReceiver { override val fileExtensions: List = listOf() override val frontend: KClass = TestLanguageFrontend::class override val compoundAssignmentOperators = @@ -60,6 +60,8 @@ open class TestLanguage(final override var namespaceDelimiter: String = "::") : "double" to FloatingPointType("double", 64, this, NumericType.Modifier.SIGNED), "string" to StringType("string", this), ) + override val receiverName: String + get() = "this" } class StructTestLanguage(namespaceDelimiter: String = "::") : diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt index 261615ec5b..f556615cb9 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt @@ -29,10 +29,13 @@ import de.fraunhofer.aisec.cpg.TranslationConfiguration import de.fraunhofer.aisec.cpg.TranslationManager import de.fraunhofer.aisec.cpg.TranslationResult import de.fraunhofer.aisec.cpg.frontends.CompilationDatabase -import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.ContextProvider +import de.fraunhofer.aisec.cpg.graph.LanguageProvider +import de.fraunhofer.aisec.cpg.graph.Node import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.get import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.test.TestUtils.ENFORCE_MEMBER_EXPRESSION @@ -43,16 +46,7 @@ import java.nio.file.Path import java.util.function.Consumer import java.util.function.Predicate import java.util.stream.Collectors -import kotlin.collections.filter -import kotlin.collections.first -import kotlin.collections.firstOrNull -import kotlin.collections.flatMap -import kotlin.collections.isNotEmpty -import kotlin.collections.joinToString -import kotlin.jvm.Throws import kotlin.test.* -import kotlin.text.endsWith -import kotlin.toString object TestUtils { @@ -237,6 +231,15 @@ fun assertRefersTo(expression: Expression?, b: Declaration?) { } } +/** Asserts, that the expression given in [expression] does not refer to the declaration [b]. */ +fun assertNotRefersTo(expression: Expression?, b: Declaration?) { + if (expression is Reference) { + assertNotEquals(b, (expression as Reference?)?.refersTo) + } else { + fail("not a reference") + } +} + /** * Asserts, that the call expression given in [call] refers to the expected function declaration * [func]. diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt index a98e484491..1b4640926b 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt @@ -807,7 +807,12 @@ class StatementHandler(frontend: PythonLanguageFrontend) : for (s in stmt.body) { when (s) { - is Python.AST.FunctionDef -> handleFunctionDef(s, cls) + is Python.AST.FunctionDef -> { + val stmt = handleFunctionDef(s, cls) + // We need to manually set the astParent because we are not assigning it to our + // statements and therefore are not triggering our automagic parent setter + stmt.astParent = cls + } else -> cls.statements += handleNode(s) } } diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt index 593f79cacf..d6760829c4 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt @@ -100,22 +100,33 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx), L } // Look for a potential scope modifier for this reference - // lookupScope var targetScope = scopeManager.currentScope?.predefinedLookupScopes[ref.name.toString()]?.targetScope - // There are a couple of things to consider now + // Try to see whether our symbol already exists. There are basically three rules to follow + // here. var symbol = - // Since this is a WRITE access, we need - // - to look for a local symbol, unless - // - a global keyword is present for this symbol and scope - if (targetScope != null) { - scopeManager.lookupSymbolByNodeName(ref, targetScope) - } else { - scopeManager.lookupSymbolByNodeName(ref) { it.scope == scopeManager.currentScope } + when { + // When a target scope is set, then we have a `global` or `nonlocal` keyword for + // this symbol, and we need to start looking in this scope + targetScope != null -> scopeManager.lookupSymbolByNodeName(ref, targetScope) + // When we have a qualified reference (such as `self.a`), we do not have any + // specific restrictions, because the lookup will anyway be a qualified lookup, + // and it will consider only the scope of `self`. + ref.name.isQualified() -> scopeManager.lookupSymbolByNodeName(ref) + // In any other case, we need to restrict the lookup to the current scope. The + // main reason for this is that Python requires the `global` keyword in functions + // for assigning to global variables. See + // https://docs.python.org/3/reference/simple_stmts.html#the-global-statement. So + // basically we need to ignore all global variables at this point and only look + // for local ones. + else -> + scopeManager.lookupSymbolByNodeName(ref) { + it.scope == scopeManager.currentScope + } } - // Nothing to create + // If the symbol is already defined in the designed scope, there is nothing to create if (symbol.isNotEmpty()) return null // First, check if we need to create a field @@ -137,7 +148,7 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx), L scopeManager.isInRecord && !scopeManager.isInFunction -> { // We end up here for fields declared directly in the class body. These are // class attributes; more or less static fields. - newFieldDeclaration(ref.name) + newFieldDeclaration(scopeManager.currentNamespace.fqn(ref.name.localName)) } else -> { null diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/SymbolResolverTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/SymbolResolverTest.kt new file mode 100644 index 0000000000..ec7d2c01b7 --- /dev/null +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/SymbolResolverTest.kt @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2025, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontends.python + +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression +import de.fraunhofer.aisec.cpg.test.analyze +import de.fraunhofer.aisec.cpg.test.assertNotRefersTo +import de.fraunhofer.aisec.cpg.test.assertRefersTo +import java.io.File +import kotlin.test.* + +class SymbolResolverTest { + @Test + fun testFields() { + val topLevel = File("src/test/resources/python/fields.py") + val result = + analyze(listOf(topLevel), topLevel.toPath(), true) { + it.registerLanguage() + } + + val globalA = + result.namespaces["fields"] + .variables[{ it.name.localName == "a" && it !is FieldDeclaration }] + assertNotNull(globalA) + + // Make sure, we only have one (!) field a + val fieldsA = result.records["MyClass"]?.fields("a") + val fieldA = fieldsA?.singleOrNull() + assertNotNull(fieldA) + + val aRefs = result.refs("a") + aRefs.filterIsInstance().forEach { assertRefersTo(it, fieldA) } + aRefs.filter { it !is MemberExpression }.forEach { assertRefersTo(it, globalA) } + + // We should only have one reference to "os" -> the member expression "self.os" + val osRefs = result.refs("os") + assertEquals(1, osRefs.size) + assertIs(osRefs.singleOrNull()) + + // "os.name" is not a member expression but a reference to the field "name" of the "os" + // module, therefore it is a reference + val osNameRefs = result.refs("os.name") + assertEquals(1, osNameRefs.size) + assertIsNot(osNameRefs.singleOrNull()) + + // Same tests but for fields declared at the record level. + // A variable "declared" inside a class is considered a field in Python. + val fieldCopyA = result.records["MyClass"]?.fields["copyA"] + assertIs(fieldCopyA) + val baz = result.records["MyClass"]?.methods["baz"] + assertIs(baz) + val bazPrint = baz.calls("print").singleOrNull() + assertIs(bazPrint) + val bazPrintArgument = bazPrint.arguments.firstOrNull() + assertRefersTo(bazPrintArgument, fieldCopyA) + + // make sure, that this does not work without the receiver + val bazDoesNotWork = baz.calls("doesNotWork").singleOrNull() + assertIs(bazDoesNotWork) + val bazDoesNotWorkArgument = bazDoesNotWork.arguments.firstOrNull() + assertNotNull(bazDoesNotWorkArgument) + assertNotRefersTo(bazDoesNotWorkArgument, fieldCopyA) + } +} diff --git a/cpg-language-python/src/test/resources/python/fields.py b/cpg-language-python/src/test/resources/python/fields.py new file mode 100644 index 0000000000..1ff4a16817 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/fields.py @@ -0,0 +1,23 @@ +import os + +a = "Hello" + +class MyClass: + copyA = 1 + def foo(self): + self.a = 1 + print(a) + + def bar(self): + self.os = 1 + print(os.name) + self.a = 1 + + def baz(self): + print(self.copyA) + doesNotWork(copyA) # not defined + +m = MyClass() +m.foo() +m.bar() +m.baz()