From 5fb75ac9878cf9fd59cffa267520979dc6fce0da Mon Sep 17 00:00:00 2001 From: yoff Date: Mon, 1 Jun 2026 14:04:43 +0000 Subject: [PATCH 1/3] Python: simplify decorator-detection predicates to pure AST match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal predicates that identify `@staticmethod`, `@classmethod` and `@property` decorators previously required the decorator's `NameNode` to satisfy `isGlobal()` (i.e. no SSA def reaches the decorator's name use). That filter was correct but unnecessarily indirect: these three names are builtins, and even when a class body redefines one, the class body has not started executing at the decorator position, so Python uses the builtin. Match the decorator's AST `Name` directly instead, dropping the CFG/SSA detour. The slight semantic change — `isGlobal()` would have rejected module-level shadowing of these builtins — is negligible in practice and explicitly documented in the change note. `hasContextmanagerDecorator` and `hasOverloadDecorator` keep the `NameNode.isGlobal()` check because their target names (`contextmanager`, `overload`) are imported, not builtin, and local shadowing is a real concern. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...6-01-decorator-predicate-simplification.md | 4 ++++ .../new/internal/DataFlowDispatch.qll | 20 ++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 python/ql/lib/change-notes/2026-06-01-decorator-predicate-simplification.md diff --git a/python/ql/lib/change-notes/2026-06-01-decorator-predicate-simplification.md b/python/ql/lib/change-notes/2026-06-01-decorator-predicate-simplification.md new file mode 100644 index 000000000000..44ee5b5ff808 --- /dev/null +++ b/python/ql/lib/change-notes/2026-06-01-decorator-predicate-simplification.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Simplified the internal predicates that detect `@staticmethod`, `@classmethod` and `@property` decorators to match the decorator's AST `Name` directly, rather than going through the CFG and requiring the name to resolve globally. Code that shadows these three builtin decorators at the module-scope will now be classified by the decorator name alone; in practice, shadowing these names is extremely rare and the call-graph results are unchanged. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 1db6c08f5f43..4e3a011e8d10 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -256,9 +256,12 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { */ overlay[local] predicate isStaticmethod(Function func) { - exists(NameNode id | id.getId() = "staticmethod" and id.isGlobal() | - func.getADecorator() = id.getNode() - ) + // The decorator is *syntactically* a `Name` "staticmethod" — we don't + // care which variable it resolves to. `staticmethod` is a builtin and + // is almost never shadowed in a module-level scope; even if a class + // redefines `staticmethod` in its body, the class body has not started + // executing yet at the decorator position, so Python uses the builtin. + func.getADecorator().(Name).getId() = "staticmethod" } /** @@ -268,9 +271,9 @@ predicate isStaticmethod(Function func) { */ overlay[local] predicate isClassmethod(Function func) { - exists(NameNode id | id.getId() = "classmethod" and id.isGlobal() | - func.getADecorator() = id.getNode() - ) + // See `isStaticmethod` for the rationale for matching on the AST `Name` + // rather than going via the CFG and `isGlobal()`. + func.getADecorator().(Name).getId() = "classmethod" or exists(Class cls | cls.getAMethod() = func and @@ -285,9 +288,8 @@ predicate isClassmethod(Function func) { /** Holds if the function `func` has a `property` decorator. */ overlay[local] predicate hasPropertyDecorator(Function func) { - exists(NameNode id | id.getId() = "property" and id.isGlobal() | - func.getADecorator() = id.getNode() - ) + // See `isStaticmethod` for the rationale for matching on the AST `Name`. + func.getADecorator().(Name).getId() = "property" } /** From 677755edd45b74716f6cd71459d27345b9bd91b7 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:53:39 +0000 Subject: [PATCH 2/3] Python: remove AstNode.getAFlowNode() and rewrite callers Preparatory refactor for the shared-CFG dataflow migration. Removes the AstNode.getAFlowNode() cached predicate from the public Python QL API. All ~140 callers across lib/, src/, test/, and tools/ are rewritten from `expr.getAFlowNode() = cfgNode` to `cfgNode.getNode() = expr`, using ControlFlowNode.getNode() which already exists in Flow.qll. Semantic noop verified by: - All 361 lib/ + src/ queries compile clean. - All 122 ControlFlow + PointsTo library-tests pass. - All 64 dataflow library-tests pass. - All 113 Variables/Exceptions/Expressions/Statements/Functions/Imports/ Security/CWE-798/ModificationOfParameterWithDefault query-tests pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/ql/lib/LegacyPointsTo.qll | 33 ++++++++++++++----- python/ql/lib/analysis/DefinitionTracking.qll | 4 +-- .../2026-05-19-remove-getAFlowNode.md | 4 +++ python/ql/lib/semmle/python/AstExtended.qll | 11 ------- python/ql/lib/semmle/python/Exprs.qll | 18 ++-------- python/ql/lib/semmle/python/Flow.qll | 26 +++++++-------- python/ql/lib/semmle/python/Import.qll | 2 -- python/ql/lib/semmle/python/SelfAttribute.qll | 17 ++++++---- .../python/dataflow/new/BarrierGuards.qll | 30 ++++++++++------- .../dataflow/new/internal/Attributes.qll | 2 +- .../new/internal/DataFlowDispatch.qll | 6 ++-- .../dataflow/new/internal/DataFlowPrivate.qll | 4 +-- .../dataflow/new/internal/DataFlowPublic.qll | 2 +- .../dataflow/new/internal/VariableCapture.qll | 4 +-- .../python/dataflow/old/Implementation.qll | 14 ++++---- .../lib/semmle/python/essa/SsaDefinitions.qll | 6 ++-- .../lib/semmle/python/frameworks/Bottle.qll | 2 +- .../lib/semmle/python/frameworks/FastApi.qll | 2 +- .../ql/lib/semmle/python/frameworks/Flask.qll | 4 +-- .../semmle/python/frameworks/FlaskAdmin.qll | 4 +-- .../semmle/python/internal/CachedStages.qll | 2 +- .../lib/semmle/python/objects/Callables.qll | 10 +++--- .../ql/lib/semmle/python/objects/Classes.qll | 5 +-- .../ql/lib/semmle/python/objects/TObject.qll | 2 +- .../lib/semmle/python/pointsto/PointsTo.qll | 14 +++++--- .../lib/semmle/python/types/ClassObject.qll | 7 ++-- .../ql/lib/semmle/python/types/Exceptions.qll | 2 +- .../ql/lib/semmle/python/types/Extensions.qll | 4 +-- .../semmle/python/types/FunctionObject.qll | 2 +- python/ql/lib/semmle/python/types/Object.qll | 6 ++-- python/ql/src/Classes/ClassAttributes.qll | 22 ++++++++----- .../src/Exceptions/CatchingBaseException.ql | 4 ++- python/ql/src/Expressions/CallArgs.qll | 16 +++++---- .../DuplicateKeyInDictionaryLiteral.ql | 10 ++++-- .../Formatting/AdvancedFormatting.qll | 8 +++-- .../Expressions/IncorrectComparisonUsingIs.ql | 2 +- python/ql/src/Expressions/IsComparisons.qll | 2 +- .../ql/src/Expressions/TruncatedDivision.ql | 2 +- .../ql/src/Functions/ExplicitReturnInInit.ql | 4 ++- python/ql/src/Functions/ReturnValueIgnored.ql | 7 +++- python/ql/src/Resources/FileOpen.qll | 4 +-- .../Security/CWE-798/HardcodedCredentials.ql | 2 +- .../Statements/IterableStringOrSequence.ql | 2 +- .../NestedLoopsSameVariableWithReuse.ql | 2 +- .../ql/src/Statements/NonIteratorInForLoop.ql | 2 +- .../ql/src/Statements/SideEffectInAssert.ql | 12 ++++--- python/ql/src/Variables/Definition.qll | 6 +++- .../src/Variables/LeakingListComprehension.ql | 13 +++++--- python/ql/src/Variables/Loop.qll | 7 ++-- python/ql/src/Variables/MultiplyDefined.ql | 4 +-- .../SuspiciousUnusedLoopIterationVariable.ql | 4 ++- python/ql/src/Variables/Undefined.qll | 5 +-- python/ql/src/Variables/UndefinedGlobal.ql | 10 +++--- .../ql/src/Variables/UndefinedPlaceHolder.ql | 4 +-- .../ql/src/Variables/UnusedModuleVariable.ql | 2 +- .../src/analysis/CrossProjectDefinitions.qll | 8 +++-- python/ql/src/analysis/ImportFailure.ql | 7 ++-- python/ql/src/analysis/KeyPointsToFailure.ql | 4 +-- python/ql/src/analysis/PointsToFailure.ql | 2 +- ...onOfParameterWithDefaultCustomizations.qll | 2 +- .../comprehensions/ConsistencyCheck.ql | 4 ++- .../import-resolution/importflow.ql | 8 +++-- .../PointsToSupport/UseFromDefinition.ql | 2 +- .../ControlFlow/splitting/NodeCount.ql | 2 +- .../library-tests/PointsTo/global/Global.ql | 2 +- .../PointsTo/local/LocalPointsTo.ql | 2 +- .../dataflow/tainttracking/TestTaintLib.qll | 2 +- .../ql/lib/RecordedCalls.qll | 5 +-- 68 files changed, 257 insertions(+), 198 deletions(-) create mode 100644 python/ql/lib/change-notes/2026-05-19-remove-getAFlowNode.md diff --git a/python/ql/lib/LegacyPointsTo.qll b/python/ql/lib/LegacyPointsTo.qll index ffea2d93b66c..f5ad67a3c555 100644 --- a/python/ql/lib/LegacyPointsTo.qll +++ b/python/ql/lib/LegacyPointsTo.qll @@ -213,9 +213,11 @@ class ExprWithPointsTo extends Expr { * Gets what this expression might "refer-to" in the given `context`. */ predicate refersTo(Context context, Object obj, ClassObject cls, AstNode origin) { - this.getAFlowNode() - .(ControlFlowNodeWithPointsTo) - .refersTo(context, obj, cls, origin.getAFlowNode()) + exists(ControlFlowNode this_, ControlFlowNode origin_ | + this_.getNode() = this and origin_.getNode() = origin + | + this_.(ControlFlowNodeWithPointsTo).refersTo(context, obj, cls, origin_) + ) } /** @@ -226,7 +228,11 @@ class ExprWithPointsTo extends Expr { */ pragma[nomagic] predicate refersTo(Object obj, AstNode origin) { - this.getAFlowNode().(ControlFlowNodeWithPointsTo).refersTo(obj, origin.getAFlowNode()) + exists(ControlFlowNode this_, ControlFlowNode origin_ | + this_.getNode() = this and origin_.getNode() = origin + | + this_.(ControlFlowNodeWithPointsTo).refersTo(obj, origin_) + ) } /** @@ -240,16 +246,22 @@ class ExprWithPointsTo extends Expr { * in the given `context`. */ predicate pointsTo(Context context, Value value, AstNode origin) { - this.getAFlowNode() - .(ControlFlowNodeWithPointsTo) - .pointsTo(context, value, origin.getAFlowNode()) + exists(ControlFlowNode this_, ControlFlowNode origin_ | + this_.getNode() = this and origin_.getNode() = origin + | + this_.(ControlFlowNodeWithPointsTo).pointsTo(context, value, origin_) + ) } /** * Holds if this expression might "point-to" to `value` which is from `origin`. */ predicate pointsTo(Value value, AstNode origin) { - this.getAFlowNode().(ControlFlowNodeWithPointsTo).pointsTo(value, origin.getAFlowNode()) + exists(ControlFlowNode this_, ControlFlowNode origin_ | + this_.getNode() = this and origin_.getNode() = origin + | + this_.(ControlFlowNodeWithPointsTo).pointsTo(value, origin_) + ) } /** @@ -475,7 +487,10 @@ class FunctionMetricsWithPointsTo extends FunctionMetrics { not non_coupling_method(result) and exists(Call call | call.getScope() = this | exists(FunctionObject callee | callee.getFunction() = result | - call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee) + exists(CallNode call_ | + call_.getNode() = call and + call_.getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee) + ) ) or exists(Attribute a | call.getFunc() = a | diff --git a/python/ql/lib/analysis/DefinitionTracking.qll b/python/ql/lib/analysis/DefinitionTracking.qll index 21155970375b..583a7807ff27 100644 --- a/python/ql/lib/analysis/DefinitionTracking.qll +++ b/python/ql/lib/analysis/DefinitionTracking.qll @@ -64,7 +64,7 @@ private predicate jump_to_defn(ControlFlowNode use, Definition defn) { private predicate preferred_jump_to_defn(Expr use, Definition def) { not use instanceof ClassExpr and not use instanceof FunctionExpr and - jump_to_defn(use.getAFlowNode(), def) + exists(ControlFlowNode useNode | useNode.getNode() = use | jump_to_defn(useNode, def)) } private predicate unique_jump_to_defn(Expr use, Definition def) { @@ -452,7 +452,7 @@ private predicate self_parameter_jump_to_defn_attribute( * This exists primarily for testing use `getPreferredDefinition()` instead. */ Definition getADefinition(Expr use) { - jump_to_defn(use.getAFlowNode(), result) and + exists(ControlFlowNode useNode | useNode.getNode() = use | jump_to_defn(useNode, result)) and not use instanceof Call and not use.isArtificial() and // Not the use itself diff --git a/python/ql/lib/change-notes/2026-05-19-remove-getAFlowNode.md b/python/ql/lib/change-notes/2026-05-19-remove-getAFlowNode.md new file mode 100644 index 000000000000..82cedf579047 --- /dev/null +++ b/python/ql/lib/change-notes/2026-05-19-remove-getAFlowNode.md @@ -0,0 +1,4 @@ +--- +category: breaking +--- +* The `AstNode.getAFlowNode()` predicate has been removed. Use `ControlFlowNode.getNode()` from the other direction instead: replace `e.getAFlowNode() = n` with `n.getNode() = e`. This is a preparatory refactor for migrating the dataflow library off the legacy CFG; it has no semantic effect. diff --git a/python/ql/lib/semmle/python/AstExtended.qll b/python/ql/lib/semmle/python/AstExtended.qll index 13da4e899a71..32b9ce6eee7c 100644 --- a/python/ql/lib/semmle/python/AstExtended.qll +++ b/python/ql/lib/semmle/python/AstExtended.qll @@ -16,17 +16,6 @@ abstract class AstNode extends AstNode_ { /** Gets the scope that this node occurs in */ abstract Scope getScope(); - /** - * Gets a flow node corresponding directly to this node. - * NOTE: For some statements and other purely syntactic elements, - * there may not be a `ControlFlowNode` - */ - cached - ControlFlowNode getAFlowNode() { - Stages::AST::ref() and - py_flow_bb_node(result, this, _, _) - } - /** Gets the location for this AST node */ cached Location getLocation() { none() } diff --git a/python/ql/lib/semmle/python/Exprs.qll b/python/ql/lib/semmle/python/Exprs.qll index 6ab9f8d8340d..9ce6f9e6680a 100644 --- a/python/ql/lib/semmle/python/Exprs.qll +++ b/python/ql/lib/semmle/python/Exprs.qll @@ -28,7 +28,9 @@ class Expr extends Expr_, AstNode { /** Whether this expression may have a side effect (as determined purely from its syntax) */ predicate hasSideEffects() { /* If an exception raised by this expression handled, count that as a side effect */ - this.getAFlowNode().getASuccessor().getNode() instanceof ExceptStmt + exists(ControlFlowNode n | n.getNode() = this | + n.getASuccessor().getNode() instanceof ExceptStmt + ) or this.getASubExpression().hasSideEffects() } @@ -68,8 +70,6 @@ class Attribute extends Attribute_ { /* syntax: Expr.name */ override Expr getASubExpression() { result = this.getObject() } - override AttrNode getAFlowNode() { result = super.getAFlowNode() } - /** Gets the name of this attribute. That is the `name` in `obj.name` */ string getName() { result = Attribute_.super.getAttr() } @@ -96,8 +96,6 @@ class Subscript extends Subscript_ { } Expr getObject() { result = Subscript_.super.getValue() } - - override SubscriptNode getAFlowNode() { result = super.getAFlowNode() } } /** A call expression, such as `func(...)` */ @@ -113,8 +111,6 @@ class Call extends Call_ { override string toString() { result = this.getFunc().toString() + "()" } - override CallNode getAFlowNode() { result = super.getAFlowNode() } - /** Gets a tuple (*) argument of this call. */ Expr getStarargs() { result = this.getAPositionalArg().(Starred).getValue() } @@ -200,8 +196,6 @@ class IfExp extends IfExp_ { override Expr getASubExpression() { result = this.getTest() or result = this.getBody() or result = this.getOrelse() } - - override IfExprNode getAFlowNode() { result = super.getAFlowNode() } } /** A starred expression, such as the `*rest` in the assignment `first, *rest = seq` */ @@ -410,8 +404,6 @@ class PlaceHolder extends PlaceHolder_ { override Expr getASubExpression() { none() } override string toString() { result = "$" + this.getId() } - - override NameNode getAFlowNode() { result = super.getAFlowNode() } } /** A tuple expression such as `( 1, 3, 5, 7, 9 )` */ @@ -478,8 +470,6 @@ class Name extends Name_ { override string toString() { result = this.getId() } - override NameNode getAFlowNode() { result = super.getAFlowNode() } - override predicate isArtificial() { /* Artificial variable names in comprehensions all start with "." */ this.getId().charAt(0) = "." @@ -585,8 +575,6 @@ abstract class NameConstant extends Name, ImmutableLiteral { override predicate isConstant() { any() } - override NameConstantNode getAFlowNode() { result = Name.super.getAFlowNode() } - override predicate isArtificial() { none() } } diff --git a/python/ql/lib/semmle/python/Flow.qll b/python/ql/lib/semmle/python/Flow.qll index 94caf513aa98..bdb01f4c9e2e 100644 --- a/python/ql/lib/semmle/python/Flow.qll +++ b/python/ql/lib/semmle/python/Flow.qll @@ -555,27 +555,27 @@ class DefinitionNode extends ControlFlowNode { cached DefinitionNode() { Stages::AST::ref() and - exists(Assign a | a.getATarget().getAFlowNode() = this) + exists(Assign a | this.getNode() = a.getATarget()) or - exists(AssignExpr a | a.getTarget().getAFlowNode() = this) + exists(AssignExpr a | this.getNode() = a.getTarget()) or - exists(AnnAssign a | a.getTarget().getAFlowNode() = this and exists(a.getValue())) + exists(AnnAssign a | this.getNode() = a.getTarget() and exists(a.getValue())) or - exists(Alias a | a.getAsname().getAFlowNode() = this) + exists(Alias a | this.getNode() = a.getAsname()) or augstore(_, this) or // `x, y = 1, 2` where LHS is a combination of list or tuples - exists(Assign a | list_or_tuple_nested_element(a.getATarget()).getAFlowNode() = this) + exists(Assign a | this.getNode() = list_or_tuple_nested_element(a.getATarget())) or - exists(For for | for.getTarget().getAFlowNode() = this) + exists(For for | this.getNode() = for.getTarget()) or - exists(Parameter param | this = param.asName().getAFlowNode() and exists(param.getDefault())) + exists(Parameter param | this.getNode() = param.asName() and exists(param.getDefault())) } /** flow node corresponding to the value assigned for the definition corresponding to this flow node */ ControlFlowNode getValue() { - result = assigned_value(this.getNode()).getAFlowNode() and + result.getNode() = assigned_value(this.getNode()) and ( result.getBasicBlock().dominates(this.getBasicBlock()) or @@ -584,7 +584,7 @@ class DefinitionNode extends ControlFlowNode { // since the default value for a parameter is evaluated in the same basic block as // the function definition, but the parameter belongs to the basic block of the function, // there is no dominance relationship between the two. - exists(Parameter param | this = param.asName().getAFlowNode()) + exists(Parameter param | this.getNode() = param.asName()) ) } } @@ -901,7 +901,7 @@ class ExceptFlowNode extends ControlFlowNode { exists(ExceptStmt ex | this.getBasicBlock().dominates(result.getBasicBlock()) and ex = this.getNode() and - result = ex.getType().getAFlowNode() + result.getNode() = ex.getType() ) } @@ -913,7 +913,7 @@ class ExceptFlowNode extends ControlFlowNode { exists(ExceptStmt ex | this.getBasicBlock().dominates(result.getBasicBlock()) and ex = this.getNode() and - result = ex.getName().getAFlowNode() + result.getNode() = ex.getName() ) } } @@ -928,7 +928,7 @@ class ExceptGroupFlowNode extends ControlFlowNode { */ ControlFlowNode getType() { this.getBasicBlock().dominates(result.getBasicBlock()) and - result = this.getNode().(ExceptGroupStmt).getType().getAFlowNode() + result.getNode() = this.getNode().(ExceptGroupStmt).getType() } /** @@ -937,7 +937,7 @@ class ExceptGroupFlowNode extends ControlFlowNode { */ ControlFlowNode getName() { this.getBasicBlock().dominates(result.getBasicBlock()) and - result = this.getNode().(ExceptGroupStmt).getName().getAFlowNode() + result.getNode() = this.getNode().(ExceptGroupStmt).getName() } } diff --git a/python/ql/lib/semmle/python/Import.qll b/python/ql/lib/semmle/python/Import.qll index 2f7fae955399..d4f5109ed47c 100644 --- a/python/ql/lib/semmle/python/Import.qll +++ b/python/ql/lib/semmle/python/Import.qll @@ -162,8 +162,6 @@ class ImportMember extends ImportMember_ { string getImportedModuleName() { result = this.getModule().(ImportExpr).getImportedModuleName() + "." + this.getName() } - - override ImportMemberNode getAFlowNode() { result = super.getAFlowNode() } } /** An import statement */ diff --git a/python/ql/lib/semmle/python/SelfAttribute.qll b/python/ql/lib/semmle/python/SelfAttribute.qll index 90ef2b38401a..364e080dcdd7 100644 --- a/python/ql/lib/semmle/python/SelfAttribute.qll +++ b/python/ql/lib/semmle/python/SelfAttribute.qll @@ -46,20 +46,23 @@ class SelfAttributeRead extends SelfAttribute { } predicate guardedByHasattr() { - exists(Variable var, ControlFlowNode n | - var.getAUse() = this.getObject().getAFlowNode() and + exists(Variable var, ControlFlowNode n, ControlFlowNode this_, ControlFlowNode obj_ | + this_.getNode() = this and obj_.getNode() = this.getObject() + | + var.getAUse() = obj_ and hasattr(n, var.getAUse(), this.getName()) and - n.strictlyDominates(this.getAFlowNode()) + n.strictlyDominates(this_) ) } pragma[noinline] predicate locallyDefined() { - exists(SelfAttributeStore store | - this.getName() = store.getName() and - this.getScope() = store.getScope() + exists(SelfAttributeStore store, ControlFlowNode store_, ControlFlowNode this_ | + store_.getNode() = store and this_.getNode() = this | - store.getAFlowNode().strictlyDominates(this.getAFlowNode()) + this.getName() = store.getName() and + this.getScope() = store.getScope() and + store_.strictlyDominates(this_) ) } } diff --git a/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll b/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll index fefa30965cec..072098991bb4 100644 --- a/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll +++ b/python/ql/lib/semmle/python/dataflow/new/BarrierGuards.qll @@ -5,24 +5,30 @@ private import semmle.python.dataflow.new.DataFlow private predicate constCompare(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) { exists(CompareNode cn | cn = g | - exists(ImmutableLiteral const, Cmpop op | - op = any(Eq eq) and branch = true - or - op = any(NotEq ne) and branch = false + exists(ImmutableLiteral const, Cmpop op, ControlFlowNode c | + c.getNode() = const and + ( + op = any(Eq eq) and branch = true + or + op = any(NotEq ne) and branch = false + ) | - cn.operands(const.getAFlowNode(), op, node) + cn.operands(c, op, node) or - cn.operands(node, op, const.getAFlowNode()) + cn.operands(node, op, c) ) or - exists(NameConstant const, Cmpop op | - op = any(Is is_) and branch = true - or - op = any(IsNot isn) and branch = false + exists(NameConstant const, Cmpop op, ControlFlowNode c | + c.getNode() = const and + ( + op = any(Is is_) and branch = true + or + op = any(IsNot isn) and branch = false + ) | - cn.operands(const.getAFlowNode(), op, node) + cn.operands(c, op, node) or - cn.operands(node, op, const.getAFlowNode()) + cn.operands(node, op, c) ) or exists(IterableNode const_iterable, Cmpop op | diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll index 8778ae288667..76d2cb11e144 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll @@ -228,7 +228,7 @@ private class ClassDefinitionAsAttrWrite extends AttrWrite, CfgNode { override Node getValue() { result.asCfgNode() = node.getValue() } - override Node getObject() { result.asCfgNode() = cls.getAFlowNode() } + override Node getObject() { result.asCfgNode().getNode() = cls } override ExprNode getAttributeNameExpr() { none() } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 4e3a011e8d10..23b49882445e 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1913,8 +1913,8 @@ abstract class ReturnNode extends Node { class ExtractedReturnNode extends ReturnNode, CfgNode { // See `TaintTrackingImplementation::returnFlowStep` ExtractedReturnNode() { - node = any(Return ret).getValue().getAFlowNode() or - node = any(Yield yield).getAFlowNode() + node.getNode() = any(Return ret).getValue() or + node.getNode() = any(Yield yield) } override ReturnKind getKind() { any() } @@ -1932,7 +1932,7 @@ class ExtractedReturnNode extends ReturnNode, CfgNode { class YieldNodeInContextManagerFunction extends ReturnNode, CfgNode { YieldNodeInContextManagerFunction() { hasContextmanagerDecorator(node.getScope()) and - node = any(Yield yield).getValue().getAFlowNode() + node.getNode() = any(Yield yield).getValue() } override ReturnKind getKind() { any() } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index fffd0150008e..67963e7cd382 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -185,8 +185,8 @@ private predicate synthDictSplatArgumentNodeStoreStep( */ predicate yieldStoreStep(Node nodeFrom, Content c, Node nodeTo) { exists(Yield yield | - nodeTo.asCfgNode() = yield.getAFlowNode() and - nodeFrom.asCfgNode() = yield.getValue().getAFlowNode() and + nodeTo.asCfgNode().getNode() = yield and + nodeFrom.asCfgNode().getNode() = yield.getValue() and // TODO: Consider if this will also need to transfer dictionary content // once dictionary comprehensions are supported. c instanceof ListElementContent diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 8612d4a253e0..a9d73fe0527f 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -485,7 +485,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode { /** Gets a node that reads this variable, excluding reads that happen through `from ... import *`. */ Node getALocalRead() { - result.asCfgNode() = var.getALoad().getAFlowNode() and + result.asCfgNode().getNode() = var.getALoad() and not result.getScope() = mod } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll index fbe05979328c..b170f7d95d79 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/VariableCapture.qll @@ -61,7 +61,7 @@ private module CaptureInput implements Shared::InputSig limit @@ -211,7 +215,7 @@ predicate too_many_args(Call call, Value callable, int limit) { call = func.getAMethodCall().getNode() and limit = func.maxParameters() - 1 or callable instanceof ClassValue and - call.getAFlowNode() = get_a_call(callable) and + exists(ControlFlowNode callCfg | callCfg.getNode() = call | callCfg = get_a_call(callable)) and limit = func.maxParameters() - 1 ) and positional_arg_count_for_call(call, callable) > limit diff --git a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql index 166eae635fad..0c55a2ece587 100644 --- a/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql +++ b/python/ql/src/Expressions/DuplicateKeyInDictionaryLiteral.ql @@ -36,11 +36,15 @@ where exists(string s | dict_key(d, k1, s) and dict_key(d, k2, s) and k1 != k2) and ( exists(BasicBlock b, int i1, int i2 | - k1.getAFlowNode() = b.getNode(i1) and - k2.getAFlowNode() = b.getNode(i2) and + b.getNode(i1).getNode() = k1 and + b.getNode(i2).getNode() = k2 and i1 < i2 ) or - k1.getAFlowNode().getBasicBlock().strictlyDominates(k2.getAFlowNode().getBasicBlock()) + exists(ControlFlowNode k1Cfg, ControlFlowNode k2Cfg | + k1Cfg.getNode() = k1 and k2Cfg.getNode() = k2 + | + k1Cfg.getBasicBlock().strictlyDominates(k2Cfg.getBasicBlock()) + ) ) select k1, "Dictionary key " + repr(k1) + " is subsequently $@.", k2, "overwritten" diff --git a/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll index d98286d85faf..a5e0379685a3 100644 --- a/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll +++ b/python/ql/src/Expressions/Formatting/AdvancedFormatting.qll @@ -98,16 +98,18 @@ private predicate brace_pair(PossibleAdvancedFormatString fmt, int start, int en } private predicate advanced_format_call(Call format_expr, PossibleAdvancedFormatString fmt, int args) { - exists(CallNode call | call = format_expr.getAFlowNode() | + exists(CallNode call, ControlFlowNode fmtCfg | + call.getNode() = format_expr and fmtCfg.getNode() = fmt + | call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::named("format")) and - call.getArg(0).(ControlFlowNodeWithPointsTo).pointsTo(_, fmt.getAFlowNode()) and + call.getArg(0).(ControlFlowNodeWithPointsTo).pointsTo(_, fmtCfg) and args = count(format_expr.getAnArg()) - 1 or call.getFunction() .(AttrNode) .getObject("format") .(ControlFlowNodeWithPointsTo) - .pointsTo(_, fmt.getAFlowNode()) and + .pointsTo(_, fmtCfg) and args = count(format_expr.getAnArg()) ) } diff --git a/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql b/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql index fa0ca14669f6..a7336c625472 100644 --- a/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql +++ b/python/ql/src/Expressions/IncorrectComparisonUsingIs.ql @@ -15,7 +15,7 @@ import python /** Holds if the comparison `comp` uses `is` or `is not` (represented as `op`) to compare its `left` and `right` arguments. */ predicate comparison_using_is(Compare comp, ControlFlowNode left, Cmpop op, ControlFlowNode right) { - exists(CompareNode fcomp | fcomp = comp.getAFlowNode() | + exists(CompareNode fcomp | fcomp.getNode() = comp | fcomp.operands(left, op, right) and (op instanceof Is or op instanceof IsNot) ) diff --git a/python/ql/src/Expressions/IsComparisons.qll b/python/ql/src/Expressions/IsComparisons.qll index cb052ceca765..ee49f6c3337a 100644 --- a/python/ql/src/Expressions/IsComparisons.qll +++ b/python/ql/src/Expressions/IsComparisons.qll @@ -5,7 +5,7 @@ private import LegacyPointsTo /** Holds if the comparison `comp` uses `is` or `is not` (represented as `op`) to compare its `left` and `right` arguments. */ predicate comparison_using_is(Compare comp, ControlFlowNode left, Cmpop op, ControlFlowNode right) { - exists(CompareNode fcomp | fcomp = comp.getAFlowNode() | + exists(CompareNode fcomp | fcomp.getNode() = comp | fcomp.operands(left, op, right) and (op instanceof Is or op instanceof IsNot) ) diff --git a/python/ql/src/Expressions/TruncatedDivision.ql b/python/ql/src/Expressions/TruncatedDivision.ql index c731a21f7d26..d63ac056d3c2 100644 --- a/python/ql/src/Expressions/TruncatedDivision.ql +++ b/python/ql/src/Expressions/TruncatedDivision.ql @@ -19,7 +19,7 @@ where // Only relevant for Python 2, as all later versions implement true division major_version() = 2 and exists(BinaryExprNode bin, Value lval, Value rval | - bin = div.getAFlowNode() and + bin.getNode() = div and bin.getNode().getOp() instanceof Div and bin.getLeft().(ControlFlowNodeWithPointsTo).pointsTo(lval, left) and lval.getClass() = ClassValue::int_() and diff --git a/python/ql/src/Functions/ExplicitReturnInInit.ql b/python/ql/src/Functions/ExplicitReturnInInit.ql index f1300afbfd0a..25fc799fafae 100644 --- a/python/ql/src/Functions/ExplicitReturnInInit.ql +++ b/python/ql/src/Functions/ExplicitReturnInInit.ql @@ -19,7 +19,9 @@ where exists(Function init | init.isInitMethod() and r.getScope() = init) and r.getValue() = rv and not rv.pointsTo(Value::none_()) and - not exists(FunctionValue f | f.getACall() = rv.getAFlowNode() | f.neverReturns()) and + not exists(FunctionValue f, ControlFlowNode rvCfg | rvCfg.getNode() = rv | + f.getACall() = rvCfg and f.neverReturns() + ) and // to avoid double reporting, don't trigger if returning result from other __init__ function not exists(Attribute meth | meth = rv.(Call).getFunc() | meth.getName() = "__init__") select r, "Explicit return in __init__ method." diff --git a/python/ql/src/Functions/ReturnValueIgnored.ql b/python/ql/src/Functions/ReturnValueIgnored.ql index 3716b989d891..83af6304cb30 100644 --- a/python/ql/src/Functions/ReturnValueIgnored.ql +++ b/python/ql/src/Functions/ReturnValueIgnored.ql @@ -69,7 +69,12 @@ where returns_meaningful_value(callee) and not wrapped_in_try_except(call) and exists(int unused | - unused = count(ExprStmt e | e.getValue().getAFlowNode() = callee.getACall()) and + unused = + count(ExprStmt e | + exists(ControlFlowNode eValCfg | eValCfg.getNode() = e.getValue() | + eValCfg = callee.getACall() + ) + ) and total = count(callee.getACall()) | percentage_used = (100.0 * (total - unused) / total).floor() diff --git a/python/ql/src/Resources/FileOpen.qll b/python/ql/src/Resources/FileOpen.qll index dd952e732d42..1daecb6d0334 100644 --- a/python/ql/src/Resources/FileOpen.qll +++ b/python/ql/src/Resources/FileOpen.qll @@ -138,12 +138,12 @@ predicate function_opens_file(FunctionValue f) { f = Value::named("open") or exists(EssaVariable v, Return ret | ret.getScope() = f.getScope() | - ret.getValue().getAFlowNode() = v.getAUse() and + v.getNode() = ret.getValue().getAUse() and var_is_open(v, _) ) or exists(Return ret, FunctionValue callee | ret.getScope() = f.getScope() | - ret.getValue().getAFlowNode() = callee.getACall() and + callee.getNode() = ret.getValue().getACall() and function_opens_file(callee) ) } diff --git a/python/ql/src/Security/CWE-798/HardcodedCredentials.ql b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql index 1e7b4452a9a6..baeaf26514c4 100644 --- a/python/ql/src/Security/CWE-798/HardcodedCredentials.ql +++ b/python/ql/src/Security/CWE-798/HardcodedCredentials.ql @@ -94,7 +94,7 @@ class CredentialSink extends DataFlow::Node { this.(DataFlow::ArgumentNode).argumentOf(_, pos) ) or - exists(Keyword k | k.getArg() = name and k.getValue().getAFlowNode() = this.asCfgNode()) + exists(Keyword k | k.getArg() = name and this.asCfgNode().getNode() = k.getValue()) or exists(CompareNode cmp, NameNode n | n.getId() = name | cmp.operands(this.asCfgNode(), any(Eq eq), n) diff --git a/python/ql/src/Statements/IterableStringOrSequence.ql b/python/ql/src/Statements/IterableStringOrSequence.ql index d1c4a507f0d1..ad8b6beab290 100644 --- a/python/ql/src/Statements/IterableStringOrSequence.ql +++ b/python/ql/src/Statements/IterableStringOrSequence.ql @@ -25,7 +25,7 @@ from For loop, ControlFlowNodeWithPointsTo iter, Value str, Value seq, ControlFlowNode seq_origin, ControlFlowNode str_origin where - loop.getIter().getAFlowNode() = iter and + iter.getNode() = loop.getIter() and iter.pointsTo(str, str_origin) and iter.pointsTo(seq, seq_origin) and has_string_type(str) and diff --git a/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql index c4deb4e64277..a9c5a5fbbd98 100644 --- a/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql +++ b/python/ql/src/Statements/NestedLoopsSameVariableWithReuse.ql @@ -15,7 +15,7 @@ import python predicate loop_variable_ssa(For f, Variable v, SsaVariable s) { - f.getTarget().getAFlowNode() = s.getDefinition() and v = s.getVariable() + s.getDefinition().getNode() = f.getTarget() and v = s.getVariable() } predicate variableUsedInNestedLoops(For inner, For outer, Variable v, Name n) { diff --git a/python/ql/src/Statements/NonIteratorInForLoop.ql b/python/ql/src/Statements/NonIteratorInForLoop.ql index f8e6e51b55ff..b0cbc71130d0 100644 --- a/python/ql/src/Statements/NonIteratorInForLoop.ql +++ b/python/ql/src/Statements/NonIteratorInForLoop.ql @@ -16,7 +16,7 @@ private import LegacyPointsTo from For loop, ControlFlowNodeWithPointsTo iter, Value v, ClassValue t, ControlFlowNode origin where - loop.getIter().getAFlowNode() = iter and + iter.getNode() = loop.getIter() and iter.pointsTo(_, v, origin) and v.getClass() = t and not t.isIterable() and diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql index 7ac96030c04e..55c34144dced 100644 --- a/python/ql/src/Statements/SideEffectInAssert.ql +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -24,11 +24,13 @@ predicate func_with_side_effects(Expr e) { } predicate call_with_side_effect(Call e) { - e.getAFlowNode() = - API::moduleImport("subprocess") - .getMember(["call", "check_call", "check_output"]) - .getACall() - .asCfgNode() + exists(ControlFlowNode eCfg | eCfg.getNode() = e | + eCfg = + API::moduleImport("subprocess") + .getMember(["call", "check_call", "check_output"]) + .getACall() + .asCfgNode() + ) } predicate probable_side_effect(Expr e) { diff --git a/python/ql/src/Variables/Definition.qll b/python/ql/src/Variables/Definition.qll index be8c9490788c..9bd7130957b6 100644 --- a/python/ql/src/Variables/Definition.qll +++ b/python/ql/src/Variables/Definition.qll @@ -133,7 +133,11 @@ class ListComprehensionDeclaration extends ListComp { major_version() = 2 and this.getIterationVariable(_).getId() = result.getId() and result.getScope() = this.getScope() and - this.getAFlowNode().strictlyReaches(result.getAFlowNode()) and + exists(ControlFlowNode thisCfg, ControlFlowNode resultCfg | + thisCfg.getNode() = this and resultCfg.getNode() = result + | + thisCfg.strictlyReaches(resultCfg) + ) and result.isUse() } diff --git a/python/ql/src/Variables/LeakingListComprehension.ql b/python/ql/src/Variables/LeakingListComprehension.ql index 9b98fb43a313..a9baa21661da 100644 --- a/python/ql/src/Variables/LeakingListComprehension.ql +++ b/python/ql/src/Variables/LeakingListComprehension.ql @@ -13,18 +13,21 @@ import python import Definition -from ListComprehensionDeclaration l, Name use, Name defn +from + ListComprehensionDeclaration l, Name use, Name defn, ControlFlowNode lCfg, ControlFlowNode useCfg where use = l.getALeakedVariableUse() and defn = l.getDefinition() and - l.getAFlowNode().strictlyReaches(use.getAFlowNode()) and + lCfg.getNode() = l and + useCfg.getNode() = use and + lCfg.strictlyReaches(useCfg) and /* Make sure we aren't in a loop, as the variable may be redefined */ - not use.getAFlowNode().strictlyReaches(l.getAFlowNode()) and + not useCfg.strictlyReaches(lCfg) and not l.contains(use) and not use.deletes(_) and not exists(SsaVariable v | - v.getAUse() = use.getAFlowNode() and - not v.getDefinition().strictlyDominates(l.getAFlowNode()) + v.getAUse() = useCfg and + not v.getDefinition().strictlyDominates(lCfg) ) select use, use.getId() + " may have a different value in Python 3, as the $@ will not be in scope.", defn, diff --git a/python/ql/src/Variables/Loop.qll b/python/ql/src/Variables/Loop.qll index c7749fe476bf..e7c189cac354 100644 --- a/python/ql/src/Variables/Loop.qll +++ b/python/ql/src/Variables/Loop.qll @@ -26,8 +26,11 @@ private Stmt loop_probably_defines(Variable v) { /** Holds if the variable used by `use` is probably defined in a loop */ predicate probably_defined_in_loop(Name use) { - exists(Stmt loop | loop = loop_probably_defines(use.getVariable()) | - loop.getAFlowNode().strictlyReaches(use.getAFlowNode()) + exists(Stmt loop, ControlFlowNode loopCfg, ControlFlowNode useCfg | + loop = loop_probably_defines(use.getVariable()) and + loopCfg.getNode() = loop and + useCfg.getNode() = use and + loopCfg.strictlyReaches(useCfg) ) } diff --git a/python/ql/src/Variables/MultiplyDefined.ql b/python/ql/src/Variables/MultiplyDefined.ql index 3c26ff0b1eb1..ce8b5b316c21 100644 --- a/python/ql/src/Variables/MultiplyDefined.ql +++ b/python/ql/src/Variables/MultiplyDefined.ql @@ -24,8 +24,8 @@ predicate multiply_defined(AstNode asgn1, AstNode asgn2, Variable v) { forex(Definition def, Definition redef | def.getVariable() = v and - def = asgn1.getAFlowNode() and - redef = asgn2.getAFlowNode() + def.getNode() = asgn1 and + redef.getNode() = asgn2 | def.isUnused() and def.getARedef() = redef and diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql index d252742d67c2..f74fd4970ee4 100644 --- a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql @@ -88,7 +88,9 @@ predicate implicit_repeat(For f) { * E.g. gets `x` from `{ y for y in x }`. */ ControlFlowNode get_comp_iterable(For f) { - exists(Comp c | c.getFunction().getStmt(0) = f | c.getAFlowNode().getAPredecessor() = result) + exists(Comp c, ControlFlowNode cCfg | + c.getFunction().getStmt(0) = f and cCfg.getNode() = c and cCfg.getAPredecessor() = result + ) } from For f, Variable v, string msg diff --git a/python/ql/src/Variables/Undefined.qll b/python/ql/src/Variables/Undefined.qll index 42437a81340b..b320c2040b2d 100644 --- a/python/ql/src/Variables/Undefined.qll +++ b/python/ql/src/Variables/Undefined.qll @@ -19,9 +19,10 @@ private predicate loop_entry_variables(EssaVariable pred, EssaVariable succ) { private predicate loop_entry_edge(BasicBlock pred, BasicBlock loop) { pred = loop.getAPredecessor() and pred = loop.getImmediateDominator() and - exists(Stmt s | + exists(Stmt s, ControlFlowNode sCfg | loop_probably_executes_at_least_once(s) and - s.getAFlowNode().getBasicBlock() = loop + sCfg.getNode() = s and + sCfg.getBasicBlock() = loop ) } diff --git a/python/ql/src/Variables/UndefinedGlobal.ql b/python/ql/src/Variables/UndefinedGlobal.ql index 404ac64aa5a0..0c54b444ce30 100644 --- a/python/ql/src/Variables/UndefinedGlobal.ql +++ b/python/ql/src/Variables/UndefinedGlobal.ql @@ -27,7 +27,7 @@ predicate guarded_against_name_error(Name u) { | globals.getFunc().(Name).getId() = "globals" and guard.controls(controlled, _) and - controlled.contains(u.getAFlowNode()) + exists(ControlFlowNode uCfg | uCfg.getNode() = u | controlled.contains(uCfg)) ) } @@ -101,18 +101,18 @@ predicate undefined_use(Name u) { } private predicate first_use_in_a_block(Name use) { - exists(GlobalVariable v, BasicBlock b, int i | - i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = use.getAFlowNode() + exists(GlobalVariable v, BasicBlock b, int i, ControlFlowNode useCfg | useCfg.getNode() = use | + i = min(int j | b.getNode(j).getNode() = v.getALoad()) and b.getNode(i) = useCfg ) } predicate first_undefined_use(Name use) { undefined_use(use) and - exists(GlobalVariable v | v.getALoad() = use | + exists(GlobalVariable v, ControlFlowNode useCfg | v.getALoad() = use and useCfg.getNode() = use | first_use_in_a_block(use) and not exists(ControlFlowNode other | other.getNode() = v.getALoad() and - other.getBasicBlock().strictlyDominates(use.getAFlowNode().getBasicBlock()) + other.getBasicBlock().strictlyDominates(useCfg.getBasicBlock()) ) ) } diff --git a/python/ql/src/Variables/UndefinedPlaceHolder.ql b/python/ql/src/Variables/UndefinedPlaceHolder.ql index 29f9b3a1a510..9fa0cc7eaaae 100644 --- a/python/ql/src/Variables/UndefinedPlaceHolder.ql +++ b/python/ql/src/Variables/UndefinedPlaceHolder.ql @@ -18,8 +18,8 @@ private import semmle.python.types.ImportTime /* Local variable part */ predicate initialized_as_local(PlaceHolder use) { - exists(SsaVariableWithPointsTo l, Function f | - f = use.getScope() and l.getAUse() = use.getAFlowNode() + exists(SsaVariableWithPointsTo l, Function f, ControlFlowNode useCfg | + f = use.getScope() and useCfg.getNode() = use and l.getAUse() = useCfg | l.getVariable() instanceof LocalVariable and not l.maybeUndefined() diff --git a/python/ql/src/Variables/UnusedModuleVariable.ql b/python/ql/src/Variables/UnusedModuleVariable.ql index 24d6559d6fea..0443c3388c85 100644 --- a/python/ql/src/Variables/UnusedModuleVariable.ql +++ b/python/ql/src/Variables/UnusedModuleVariable.ql @@ -54,7 +54,7 @@ predicate unused_global(Name unused, GlobalVariable v) { u.uses(v) | // That is reachable from this definition, directly - defn.strictlyReaches(u.getAFlowNode()) + exists(ControlFlowNode uCfg | uCfg.getNode() = u | defn.strictlyReaches(uCfg)) or // indirectly defn.getBasicBlock().reachesExit() and u.getScope() != unused.getScope() diff --git a/python/ql/src/analysis/CrossProjectDefinitions.qll b/python/ql/src/analysis/CrossProjectDefinitions.qll index 64b30f566f15..61e12a09ec6b 100644 --- a/python/ql/src/analysis/CrossProjectDefinitions.qll +++ b/python/ql/src/analysis/CrossProjectDefinitions.qll @@ -48,15 +48,17 @@ class Symbol extends TSymbol { AstNode find() { this = TModule(result) or - exists(Symbol s, string name | this = TMember(s, name) | + exists(Symbol s, string name, ControlFlowNode resultCfg | + this = TMember(s, name) and resultCfg.getNode() = result + | exists(ClassObject cls | s.resolvesTo() = cls and - cls.attributeRefersTo(name, _, result.getAFlowNode()) + cls.attributeRefersTo(name, _, resultCfg) ) or exists(ModuleObject m | s.resolvesTo() = m and - m.attributeRefersTo(name, _, result.getAFlowNode()) + m.attributeRefersTo(name, _, resultCfg) ) ) } diff --git a/python/ql/src/analysis/ImportFailure.ql b/python/ql/src/analysis/ImportFailure.ql index 71967e6e04f7..760a3693d6ea 100644 --- a/python/ql/src/analysis/ImportFailure.ql +++ b/python/ql/src/analysis/ImportFailure.ql @@ -80,10 +80,11 @@ class VersionGuard extends ConditionBlock { VersionGuard() { this.getLastNode() instanceof VersionTest } } -from ImportExpr ie +from ImportExpr ie, ControlFlowNode ieCfg where + ieCfg.getNode() = ie and not ie.(ExprWithPointsTo).refersTo(_) and - exists(Context c | c.appliesTo(ie.getAFlowNode())) and + exists(Context c | c.appliesTo(ieCfg)) and not ok_to_fail(ie) and - not exists(VersionGuard guard | guard.controls(ie.getAFlowNode().getBasicBlock(), _)) + not exists(VersionGuard guard | guard.controls(ieCfg.getBasicBlock(), _)) select ie, "Unable to resolve import of '" + ie.getImportedModuleName() + "'." diff --git a/python/ql/src/analysis/KeyPointsToFailure.ql b/python/ql/src/analysis/KeyPointsToFailure.ql index f07e8638f385..e42e5ac0bdd4 100644 --- a/python/ql/src/analysis/KeyPointsToFailure.ql +++ b/python/ql/src/analysis/KeyPointsToFailure.ql @@ -11,13 +11,13 @@ import python import semmle.python.pointsto.PointsTo predicate points_to_failure(Expr e) { - exists(ControlFlowNode f | f = e.getAFlowNode() | not PointsTo::pointsTo(f, _, _, _)) + exists(ControlFlowNode f | f.getNode() = e | not PointsTo::pointsTo(f, _, _, _)) } predicate key_points_to_failure(Expr e) { points_to_failure(e) and not points_to_failure(e.getASubExpression()) and - not exists(SsaVariable ssa | ssa.getAUse() = e.getAFlowNode() | + not exists(SsaVariable ssa, ControlFlowNode eCfg | eCfg.getNode() = e and ssa.getAUse() = eCfg | points_to_failure(ssa.getAnUltimateDefinition().getDefinition().getNode()) ) and not exists(Assign a | a.getATarget() = e) diff --git a/python/ql/src/analysis/PointsToFailure.ql b/python/ql/src/analysis/PointsToFailure.ql index fee1e80d2f77..8d46cbd90952 100644 --- a/python/ql/src/analysis/PointsToFailure.ql +++ b/python/ql/src/analysis/PointsToFailure.ql @@ -12,5 +12,5 @@ import python private import LegacyPointsTo from Expr e -where exists(ControlFlowNodeWithPointsTo f | f = e.getAFlowNode() | not f.refersTo(_)) +where exists(ControlFlowNodeWithPointsTo f | f.getNode() = e | not f.refersTo(_)) select e, "Expression does not 'point-to' any object." diff --git a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll index c7aef20c09dd..83ba4df4e298 100644 --- a/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll +++ b/python/ql/src/semmle/python/functions/ModificationOfParameterWithDefaultCustomizations.qll @@ -131,7 +131,7 @@ module ModificationOfParameterWithDefault { exists(DeletionNode d | d.getTarget().(SubscriptNode).getObject() = this.asCfgNode()) or // augmented assignment to the value - exists(AugAssign a | a.getTarget().getAFlowNode() = this.asCfgNode()) + exists(AugAssign a | this.asCfgNode().getNode() = a.getTarget()) or // modifying function call exists(DataFlow::CallCfgNode c, DataFlow::AttrRead a | c.getFunction() = a | diff --git a/python/ql/test/2/library-tests/comprehensions/ConsistencyCheck.ql b/python/ql/test/2/library-tests/comprehensions/ConsistencyCheck.ql index 2f5191fb5475..c1a214e40c75 100644 --- a/python/ql/test/2/library-tests/comprehensions/ConsistencyCheck.ql +++ b/python/ql/test/2/library-tests/comprehensions/ConsistencyCheck.ql @@ -5,5 +5,7 @@ import python select count(Comprehension c | - count(c.toString()) != 1 or count(c.getLocation()) != 1 or not exists(c.getAFlowNode()) + count(c.toString()) != 1 or + count(c.getLocation()) != 1 or + not exists(ControlFlowNode n | n.getNode() = c) ) diff --git a/python/ql/test/experimental/import-resolution/importflow.ql b/python/ql/test/experimental/import-resolution/importflow.ql index a3e20123fc7c..15a4498aa113 100644 --- a/python/ql/test/experimental/import-resolution/importflow.ql +++ b/python/ql/test/experimental/import-resolution/importflow.ql @@ -45,13 +45,15 @@ private class VersionGuardedNode extends DataFlow::Node { VersionGuardedNode() { version in [2, 3] and - exists(If parent, CompareNode c | parent.getBody().contains(this.asExpr()) | + exists(If parent, CompareNode c, ControlFlowNode litCfg | + parent.getBody().contains(this.asExpr()) and + litCfg.getNode() = any(IntegerLiteral lit | lit.getValue() = version) + | c.operands(API::moduleImport("sys") .getMember("version_info") .getASubscript() .asSource() - .asCfgNode(), any(Eq eq), - any(IntegerLiteral lit | lit.getValue() = version).getAFlowNode()) + .asCfgNode(), any(Eq eq), litCfg) ) } diff --git a/python/ql/test/library-tests/ControlFlow/PointsToSupport/UseFromDefinition.ql b/python/ql/test/library-tests/ControlFlow/PointsToSupport/UseFromDefinition.ql index 8b52244478f1..6173331a2dd1 100644 --- a/python/ql/test/library-tests/ControlFlow/PointsToSupport/UseFromDefinition.ql +++ b/python/ql/test/library-tests/ControlFlow/PointsToSupport/UseFromDefinition.ql @@ -9,7 +9,7 @@ Expr assignedValue(Name n) { from Name def, DefinitionNode d where - d = def.getAFlowNode() and + d.getNode() = def and exists(assignedValue(def)) and not d.getValue().getNode() = assignedValue(def) select def.toString(), assignedValue(def) diff --git a/python/ql/test/library-tests/ControlFlow/splitting/NodeCount.ql b/python/ql/test/library-tests/ControlFlow/splitting/NodeCount.ql index c51707a65ffd..e41f73f5b884 100644 --- a/python/ql/test/library-tests/ControlFlow/splitting/NodeCount.ql +++ b/python/ql/test/library-tests/ControlFlow/splitting/NodeCount.ql @@ -8,4 +8,4 @@ where not a instanceof ExprStmt and a.getScope() = s and s instanceof Function -select a.getLocation().getStartLine(), s.getName(), a, count(a.getAFlowNode()) +select a.getLocation().getStartLine(), s.getName(), a, count(ControlFlowNode n | n.getNode() = a) diff --git a/python/ql/test/library-tests/PointsTo/global/Global.ql b/python/ql/test/library-tests/PointsTo/global/Global.ql index 4dc6d16d3797..9887b79fbfc7 100644 --- a/python/ql/test/library-tests/PointsTo/global/Global.ql +++ b/python/ql/test/library-tests/PointsTo/global/Global.ql @@ -3,6 +3,6 @@ private import LegacyPointsTo from ControlFlowNode f, PointsToContext ctx, Value obj, ControlFlowNode orig where - exists(ExprStmt s | s.getValue().getAFlowNode() = f) and + exists(ExprStmt s | f.getNode() = s.getValue()) and PointsTo::pointsTo(f, ctx, obj, orig) select ctx, f, obj.toString(), orig diff --git a/python/ql/test/library-tests/PointsTo/local/LocalPointsTo.ql b/python/ql/test/library-tests/PointsTo/local/LocalPointsTo.ql index c81bd0ed3deb..ecf67aa7b33e 100644 --- a/python/ql/test/library-tests/PointsTo/local/LocalPointsTo.ql +++ b/python/ql/test/library-tests/PointsTo/local/LocalPointsTo.ql @@ -4,6 +4,6 @@ import semmle.python.objects.ObjectInternal from ControlFlowNode f, ObjectInternal obj, ControlFlowNode orig where - exists(ExprStmt s | s.getValue().getAFlowNode() = f) and + exists(ExprStmt s | f.getNode() = s.getValue()) and PointsTo::pointsTo(f, _, obj, orig) select f, obj.toString(), orig diff --git a/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll b/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll index 67a9f576cc75..4d7d9201e3ec 100644 --- a/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll +++ b/python/ql/test/library-tests/dataflow/tainttracking/TestTaintLib.qll @@ -43,7 +43,7 @@ query predicate test_taint(string arg_location, string test_res, string scope_na // TODO: Replace with `hasFlowToExpr` once that is working if TestTaintTrackingFlow::flowTo(any(DataFlow::Node n | - n.(DataFlow::CfgNode).getNode() = arg.getAFlowNode() + n.(DataFlow::CfgNode).getNode().getNode() = arg )) then has_taint = true else has_taint = false diff --git a/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll b/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll index 55015ea5998e..79d01776728b 100644 --- a/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll +++ b/python/tools/recorded-call-graph-metrics/ql/lib/RecordedCalls.qll @@ -238,9 +238,10 @@ module PointsToBasedCallGraph { Value calleeValue; ResolvableRecordedCall() { - exists(Call call, XmlCallee xmlCallee | + exists(Call call, XmlCallee xmlCallee, ControlFlowNode callCfg | call = this.getACall() and - calleeValue.getACall() = call.getAFlowNode() and + callCfg.getNode() = call and + calleeValue.getACall() = callCfg and xmlCallee = this.getXmlCallee() and ( xmlCallee instanceof XmlPythonCallee and From 8e4f1279e3c12dcaefd0c8152eaafd2da35c7766 Mon Sep 17 00:00:00 2001 From: yoff Date: Mon, 1 Jun 2026 13:27:08 +0000 Subject: [PATCH 3/3] Python: remove Function.getAReturnValueFlowNode() and rewrite callers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the getAFlowNode removal in the same PR: same AST→legacy-CFG bridge pattern. Rewrite the 11 call sites (across objects/, types/, frameworks/, and TypeTrackingImpl) to bind a `Return ret` explicitly, then constrain via `ret.getScope() = f and n.getNode() = ret.getValue()`. Semantic noop. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/ql/lib/semmle/python/Function.qll | 8 -------- .../python/dataflow/new/internal/TypeTrackingImpl.qll | 6 ++++-- python/ql/lib/semmle/python/frameworks/Bottle.qll | 5 ++++- python/ql/lib/semmle/python/frameworks/Django.qll | 5 ++++- python/ql/lib/semmle/python/frameworks/FastApi.qll | 5 ++++- python/ql/lib/semmle/python/frameworks/Pyramid.qll | 5 ++++- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 5 +++-- python/ql/lib/semmle/python/frameworks/Twisted.qll | 5 ++++- python/ql/lib/semmle/python/objects/Callables.qll | 5 +++-- python/ql/lib/semmle/python/objects/ObjectAPI.qll | 7 ++++++- python/ql/lib/semmle/python/types/FunctionObject.qll | 5 ++++- 11 files changed, 40 insertions(+), 21 deletions(-) diff --git a/python/ql/lib/semmle/python/Function.qll b/python/ql/lib/semmle/python/Function.qll index c133275b8b78..54f6cec2e56b 100644 --- a/python/ql/lib/semmle/python/Function.qll +++ b/python/ql/lib/semmle/python/Function.qll @@ -153,14 +153,6 @@ class Function extends Function_, Scope, AstNode { override predicate contains(AstNode inner) { Scope.super.contains(inner) } - /** Gets a control flow node for a return value of this function */ - ControlFlowNode getAReturnValueFlowNode() { - exists(Return ret | - ret.getScope() = this and - ret.getValue() = result.getNode() - ) - } - /** Gets the minimum number of positional arguments that can be correctly passed to this function. */ int getMinPositionalArguments() { result = count(this.getAnArg()) - count(this.getDefinition().getArgs().getADefault()) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 95434b05451d..b2a5f127ea32 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -94,8 +94,10 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { Node returnOf(Node callable, SummaryComponent return) { return = FlowSummaryImpl::Private::SummaryComponent::return() and // `result` should be the return value of a callable expression (lambda or function) referenced by `callable` - result.asCfgNode() = - callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getAReturnValueFlowNode() + exists(Return ret | + ret.getScope() = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope() and + result.asCfgNode().getNode() = ret.getValue() + ) } // Relating callables to nodes diff --git a/python/ql/lib/semmle/python/frameworks/Bottle.qll b/python/ql/lib/semmle/python/frameworks/Bottle.qll index aa2c906948dc..d4fc54f95687 100644 --- a/python/ql/lib/semmle/python/frameworks/Bottle.qll +++ b/python/ql/lib/semmle/python/frameworks/Bottle.qll @@ -73,7 +73,10 @@ module Bottle { /** A response returned by a view callable. */ class BottleReturnResponse extends Http::Server::HttpResponse::Range { BottleReturnResponse() { - this.asCfgNode() = any(View::ViewCallable vc).getAReturnValueFlowNode() + exists(Return ret | + ret.getScope() = any(View::ViewCallable vc) and + this.asCfgNode().getNode() = ret.getValue() + ) } override DataFlow::Node getBody() { result = this } diff --git a/python/ql/lib/semmle/python/frameworks/Django.qll b/python/ql/lib/semmle/python/frameworks/Django.qll index ee0ed4a84dd0..4f3fdbff01ae 100644 --- a/python/ql/lib/semmle/python/frameworks/Django.qll +++ b/python/ql/lib/semmle/python/frameworks/Django.qll @@ -2872,7 +2872,10 @@ module PrivateDjango { DataFlow::CfgNode { DjangoRedirectViewGetRedirectUrlReturn() { - node = any(GetRedirectUrlFunction f).getAReturnValueFlowNode() + exists(Return ret | + ret.getScope() = any(GetRedirectUrlFunction f) and + node.getNode() = ret.getValue() + ) } override DataFlow::Node getRedirectLocation() { result = this } diff --git a/python/ql/lib/semmle/python/frameworks/FastApi.qll b/python/ql/lib/semmle/python/frameworks/FastApi.qll index 9d52e9b4f57b..db8cd8e78961 100644 --- a/python/ql/lib/semmle/python/frameworks/FastApi.qll +++ b/python/ql/lib/semmle/python/frameworks/FastApi.qll @@ -309,7 +309,10 @@ module FastApi { FastApiRouteSetup routeSetup; FastApiRequestHandlerReturn() { - node = routeSetup.getARequestHandler().getAReturnValueFlowNode() + exists(Return ret | + ret.getScope() = routeSetup.getARequestHandler() and + node.getNode() = ret.getValue() + ) } override DataFlow::Node getBody() { result = this } diff --git a/python/ql/lib/semmle/python/frameworks/Pyramid.qll b/python/ql/lib/semmle/python/frameworks/Pyramid.qll index 63e19363fe86..dbfd684f02ce 100644 --- a/python/ql/lib/semmle/python/frameworks/Pyramid.qll +++ b/python/ql/lib/semmle/python/frameworks/Pyramid.qll @@ -166,7 +166,10 @@ module Pyramid { /** A response returned by a view callable. */ private class PyramidReturnResponse extends Http::Server::HttpResponse::Range { PyramidReturnResponse() { - this.asCfgNode() = any(View::ViewCallable vc).getAReturnValueFlowNode() and + exists(Return ret | + ret.getScope() = any(View::ViewCallable vc) and + this.asCfgNode().getNode() = ret.getValue() + ) and not this = instance() } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 5d3b994880a1..41857457dd16 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2254,8 +2254,9 @@ module StdlibPrivate { DataFlow::CfgNode { WsgirefSimpleServerApplicationReturn() { - exists(WsgirefSimpleServerApplication requestHandler | - node = requestHandler.getAReturnValueFlowNode() + exists(WsgirefSimpleServerApplication requestHandler, Return ret | + ret.getScope() = requestHandler and + node.getNode() = ret.getValue() ) } diff --git a/python/ql/lib/semmle/python/frameworks/Twisted.qll b/python/ql/lib/semmle/python/frameworks/Twisted.qll index 60aedd8fb582..510c7738a9df 100644 --- a/python/ql/lib/semmle/python/frameworks/Twisted.qll +++ b/python/ql/lib/semmle/python/frameworks/Twisted.qll @@ -182,7 +182,10 @@ private module Twisted { DataFlow::CfgNode { TwistedResourceRenderMethodReturn() { - this.asCfgNode() = any(TwistedResourceRenderMethod meth).getAReturnValueFlowNode() + exists(Return ret | + ret.getScope() = any(TwistedResourceRenderMethod meth) and + this.asCfgNode().getNode() = ret.getValue() + ) } override DataFlow::Node getBody() { result = this } diff --git a/python/ql/lib/semmle/python/objects/Callables.qll b/python/ql/lib/semmle/python/objects/Callables.qll index c42393cc3c62..4aba4298c941 100644 --- a/python/ql/lib/semmle/python/objects/Callables.qll +++ b/python/ql/lib/semmle/python/objects/Callables.qll @@ -81,11 +81,12 @@ class PythonFunctionObjectInternal extends CallableObjectInternal, TPythonFuncti pragma[nomagic] override predicate callResult(PointsToContext callee, ObjectInternal obj, CfgOrigin origin) { - exists(Function func, ControlFlowNode rval, ControlFlowNode forigin | + exists(Function func, Return ret, ControlFlowNode rval, ControlFlowNode forigin | func = this.getScope() and callee.appliesToScope(func) | - rval = func.getAReturnValueFlowNode() and + ret.getScope() = func and + rval.getNode() = ret.getValue() and PointsToInternal::pointsTo(rval, callee, obj, forigin) and origin = CfgOrigin::fromCfgNode(forigin) ) diff --git a/python/ql/lib/semmle/python/objects/ObjectAPI.qll b/python/ql/lib/semmle/python/objects/ObjectAPI.qll index a5d4d91cc7ab..d6f3b5f1fcac 100644 --- a/python/ql/lib/semmle/python/objects/ObjectAPI.qll +++ b/python/ql/lib/semmle/python/objects/ObjectAPI.qll @@ -745,7 +745,12 @@ class PythonFunctionValue extends FunctionValue { override int maxParameters() { result = this.getScope().getMaxPositionalArguments() } /** Gets a control flow node corresponding to a return statement in this function */ - ControlFlowNode getAReturnedNode() { result = this.getScope().getAReturnValueFlowNode() } + ControlFlowNode getAReturnedNode() { + exists(Return ret | + ret.getScope() = this.getScope() and + result.getNode() = ret.getValue() + ) + } override ClassValue getARaisedType() { scope_raises(result, this.getScope()) } diff --git a/python/ql/lib/semmle/python/types/FunctionObject.qll b/python/ql/lib/semmle/python/types/FunctionObject.qll index d03f08623009..55ac1b25538d 100644 --- a/python/ql/lib/semmle/python/types/FunctionObject.qll +++ b/python/ql/lib/semmle/python/types/FunctionObject.qll @@ -137,7 +137,10 @@ class PyFunctionObject extends FunctionObject { /** Gets a control flow node corresponding to the value of a return statement */ ControlFlowNodeWithPointsTo getAReturnedNode() { - result = this.getFunction().getAReturnValueFlowNode() + exists(Return ret | + ret.getScope() = this.getFunction() and + result.getNode() = ret.getValue() + ) } override string descriptiveString() {