diff --git a/src/library_analyzer/cli/_run_api.py b/src/library_analyzer/cli/_run_api.py index 2689eb9e..ca259cc3 100644 --- a/src/library_analyzer/cli/_run_api.py +++ b/src/library_analyzer/cli/_run_api.py @@ -36,4 +36,6 @@ def _run_api_command( api_purity = get_purity_results(src_dir_path) out_file_api_purity = out_dir_path.joinpath(f"{package}__api_purity.json") - api_purity.to_json_file(out_file_api_purity) + api_purity.to_json_file( + out_file_api_purity, + ) # Shorten is set to True by default, therefore the results will only contain the count of each reason. diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index 32cf34b1..458e8db6 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -11,6 +11,7 @@ Parameter, Reasons, Symbol, + UnknownProto, ) @@ -21,21 +22,21 @@ class CallGraphBuilder: Attributes ---------- - classes : dict[str, ClassScope] + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons : dict[NodeID, Reasons] + raw_reasons : The raw reasons for impurity for all functions. Keys are the ids of the functions. - call_graph_forest : CallGraphForest + call_graph_forest : The call graph forest for the given functions. - visited : set[NodeID] + visited : A set of all visited nodes. Parameters ---------- - classes : dict[str, ClassScope] + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons : dict[NodeID, Reasons] + raw_reasons : The raw reasons for impurity for all functions. Keys are the ids of the functions. """ @@ -59,7 +60,7 @@ def _build_call_graph_forest(self) -> CallGraphForest: Returns ------- - call_graph_forest : CallGraphForest + call_graph_forest : The call graph forest for the given functions. """ # Prepare the classes for the call graph. @@ -131,7 +132,7 @@ def _built_call_graph(self, reason: Reasons) -> None: Parameters ---------- - reason : Reasons + reason : The raw reasons of the function. """ # If the node has already been visited, return @@ -144,13 +145,16 @@ def _built_call_graph(self, reason: Reasons) -> None: # If the node is already inside the forest and does not have any calls left, it is considered to be finished. if self.call_graph_forest.has_graph(reason.id) and not reason.calls: return - + # If the node is already inside the forest but still has calls left, it needs to be updated. + if self.call_graph_forest.has_graph(reason.id): + cgn = self.call_graph_forest.get_graph(reason.id) # Create a new node and add it to the forest. - cgn = CallGraphNode( - symbol=reason.function_scope.symbol, # type: ignore[union-attr] # function_scope is never None here - reasons=reason, - ) - self.call_graph_forest.add_graph(reason.id, cgn) + else: + cgn = CallGraphNode( + symbol=reason.function_scope.symbol, # type: ignore[union-attr] # function_scope is never None here + reasons=reason, + ) + self.call_graph_forest.add_graph(reason.id, cgn) # The node has calls, which need to be added to the forest and to the children of the current node. # They are sorted to ensure a deterministic order of the children (especially but not only for testing). @@ -172,14 +176,14 @@ def _built_call_graph(self, reason: Reasons) -> None: # Check if the node was declared inside the current module. elif call.id not in self.raw_reasons: - self._handle_unknown_call(call, reason.id) + self._handle_unknown_call(call, reason) # Build the call graph for the child function and add it to the children of the current node. else: self._built_call_graph(self.raw_reasons[call.id]) self.call_graph_forest.get_graph(reason.id).add_child(self.call_graph_forest.get_graph(call.id)) - def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None: + def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: """Handle unknown calls. Deal with unknown calls and add them to the forest. @@ -188,10 +192,10 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None: Parameters ---------- - call : Symbol + call : The call that is unknown. - reason_id : NodeID - The id of the function that the call is in. + reason : + The reason of the function that contains the unknown call. """ # Deal with the case that the call calls an imported function. if isinstance(call, Import): @@ -200,26 +204,32 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None: reasons=Reasons(id=call.id), ) self.call_graph_forest.add_graph(call.id, imported_cgn) - self.call_graph_forest.get_graph(reason_id).add_child(self.call_graph_forest.get_graph(call.id)) + self.call_graph_forest.get_graph(reason.id).add_child(self.call_graph_forest.get_graph(call.id)) # If the call was used as a member of an MemberAccessValue, it needs to be removed from the unknown_calls. # This is due to the improved analysis that can determine the module through the receiver of that call. # Hence, the call is handled as a call of an imported function and not as an unknown_call # when inferring the purity later. - for unknown_call in self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls: - if unknown_call.node == call.call: + for unknown_call in self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls.copy().values(): + if unknown_call.symbol.node == call.call: ( - self.call_graph_forest.get_graph(reason_id).reasons.remove_unknown_call( + self.call_graph_forest.get_graph(reason.id).reasons.remove_unknown_call( NodeID.calc_node_id(call.call), ) ) # Deal with the case that the call calls a function parameter. elif isinstance(call, Parameter): - self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls.add(call) + self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( + symbol=call, + origin=reason.function_scope.symbol if reason.function_scope else None, + ) else: - self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls.add(call) + self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( + symbol=call, + origin=reason.function_scope.symbol if reason.function_scope else None, + ) def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: """Handle cycles in the call graph. @@ -231,7 +241,7 @@ def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: Parameters ---------- - removed_nodes : set[NodeID] | None + removed_nodes : A set of all removed nodes. If not given, a new set is created. """ @@ -262,16 +272,16 @@ def _test_cgn_for_cycles( Parameters ---------- - cgn : CallGraphNode + cgn : The current node in the graph that is visited. - visited_nodes : set[NewCallGraphNode] | None + visited_nodes : A set of all visited nodes. - path : list[NodeID] | None + path : A list of all nodes in the current path. Returns ------- - cycle : dict[NodeID, NewCallGraphNode] + cycle : Dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. Returns an empty dict if no cycle is found. @@ -316,7 +326,7 @@ def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None: Parameters ---------- - cycle : dict[NodeID, CallGraphNode] + cycle : A dict of all nodes in the cycle. Keys are the NodeIDs of the CallGraphNodes. """ @@ -368,10 +378,10 @@ def _update_pointers(self, cycle: dict[NodeID, CallGraphNode], combined_node: Co Parameters ---------- - cycle : dict[NodeID, CallGraphNode] + cycle : A dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. - combined_node : CombinedCallGraphNode + combined_node : The combined node that replaces all nodes in the cycle. """ for graph in self.call_graph_forest.graphs.values(): @@ -386,15 +396,15 @@ def build_call_graph(classes: dict[str, ClassScope], raw_reasons: dict[NodeID, R Parameters ---------- - classes : dict[str, ClassScope] + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons : dict[NodeID, Reasons] + raw_reasons : The raw reasons for impurity for all functions. Keys are the ids of the functions. Returns ------- - call_graph_forest : CallGraphForest + call_graph_forest : The call graph forest for the given functions. """ return CallGraphBuilder(classes, raw_reasons).call_graph_forest diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index 4bdc742a..fbab8d5d 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -40,33 +40,33 @@ class ModuleDataBuilder: Attributes ---------- - current_node_stack : list[Scope] + current_node_stack : Stack of nodes that are currently visited by the ASTWalker. The last node in the stack is the current node. It Is only used while walking the AST. - current_function_def : list[FunctionScope] + current_function_def : Stack of FunctionScopes that are currently visited by the ASTWalker. The top of the stack is the current function definition. It is only used while walking the AST. - children : list[Scope] + children : All found children nodes are stored in children until their scope is determined. After the AST is completely walked, the resulting "Module"- Scope is stored in children. (children[0]) - targets : list[Symbol] + targets : All found targets are stored in targets until their scope is determined. - values : list[Reference] + values : All found names are stored in names until their scope is determined. It Is only used while walking the AST. - calls : list[Reference] + calls : All calls found on function level are stored in calls until their scope is determined. It Is only used while walking the AST. - classes : dict[str, ClassScope] + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - functions : dict[str, list[FunctionScope]] + functions : Function names in the module as key and a list of their corresponding FunctionScope instances as value. - global_variables : dict[str, Scope] + global_variables : All global variables and their corresponding Scope instance. - imports : dict[str, Import] + imports : All imports and their corresponding Import instance. """ @@ -92,7 +92,7 @@ def has_assignattr_parent(node: astroid.Attribute) -> bool: Parameters ---------- - node : astroid.Attribute + node : The node whose parents are to be checked. Returns @@ -116,9 +116,9 @@ def get_symbol(self, node: astroid.NodeNG, current_scope: astroid.NodeNG | None) Parameters ---------- - node : astroid.NodeNG + node : The node whose symbol is to be determined. - current_scope : astroid.NodeNG | None + current_scope : The current scope of the node (is None if the node is the module node). """ match current_scope: @@ -220,7 +220,7 @@ def _detect_scope(self, current_node: astroid.NodeNG) -> None: Parameters ---------- - current_node : astroid.NodeNG + current_node : The node whose scope is to be determined. """ outer_scope_children: list[Scope] = [] @@ -245,7 +245,8 @@ def _detect_scope(self, current_node: astroid.NodeNG) -> None: # add all children of the try-finally node and remove the try-except node. if isinstance(current_node, astroid.TryFinally) and isinstance(child.symbol.node, astroid.TryExcept): inner_scope_children.extend(child.children) - inner_scope_children.remove(child) + if child in inner_scope_children: + inner_scope_children.remove(child) self.current_node_stack[-1].children = inner_scope_children # Set the children of the current node. self.children = outer_scope_children # Keep the children that are not in the scope of the current node. @@ -275,7 +276,7 @@ def _analyze_class(self, current_node: astroid.ClassDef) -> None: Parameters ---------- - current_node : astroid.ClassDef + current_node : The node to analyze. """ if not isinstance(current_node, astroid.ClassDef): @@ -303,7 +304,7 @@ def _analyze_function(self, current_node: astroid.FunctionDef) -> None: Parameters ---------- - current_node : astroid.FunctionDef + current_node : The node to analyze. """ if not isinstance(current_node, astroid.FunctionDef): @@ -518,7 +519,7 @@ def find_first_parent_function(self, node: astroid.NodeNG | MemberAccess) -> ast Parameters ---------- - node : astroid.NodeNG + node : The node to start the search from. Returns @@ -542,9 +543,9 @@ def handle_arg(self, node: astroid.AssignName, kind: ParameterKind) -> None: Parameters ---------- - node : astroid.AssignName + node : The node that is to be handled. - kind : ParameterKind + kind : The kind of the parameter. """ scope_node = Scope( @@ -561,9 +562,9 @@ def add_arg_to_function_scope_parameters(self, argument: astroid.AssignName, kin Parameters ---------- - argument : astroid.AssignName + argument : The argument node to add to the parameter dict. - kind : ParameterKind + kind : The kind of the parameter. """ if isinstance(self.current_node_stack[-1], FunctionScope): @@ -579,9 +580,9 @@ def is_annotated(self, node: astroid.NodeNG | MemberAccess, found_annotation_nod Parameters ---------- - node : astroid.Name + node : The node to check. - found_annotation_node : bool + found_annotation_node : A bool that indicates if an annotation node is found. Returns @@ -620,9 +621,9 @@ def check_if_global(self, name: str, node: astroid.NodeNG) -> list[astroid.Assig Parameters ---------- - name : str + name : The variable name to check. - node : astroid.NodeNG + node : The node whose root is to be checked. Returns @@ -648,7 +649,7 @@ def find_base_classes(self, node: astroid.ClassDef) -> list[ClassScope]: Parameters ---------- - node : astroid.ClassDef + node : The class whose base classes are to be found. Returns @@ -675,7 +676,7 @@ def enter_module(self, node: astroid.Module) -> None: Parameters ---------- - node : astroid.Module + node : The module node to enter. """ self.current_node_stack.append( @@ -1217,11 +1218,11 @@ def get_module_data(code: str, module_name: str = "", path: str | None = None) - Parameters ---------- - code : str + code : The source code of the module whose module data is to be found. - module_name : str, optional + module_name : The name of the module, by default "". - path : str, optional + path : The path of the module, by default None. Returns diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index 3346c10e..4d00f535 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -26,8 +26,6 @@ ImpurityReason, NativeCall, NodeID, - NonLocalVariableRead, - NonLocalVariableWrite, OpenMode, PackageData, Parameter, @@ -52,20 +50,20 @@ class PurityAnalyzer: Attributes ---------- - module_id : NodeID + module_id : The ID of the module to analyze. - visited_nodes : set[NodeID] + visited_nodes : A set of all nodes that have been visited during the analysis. - call_graph_forest : CallGraphForest + call_graph_forest : The call graph forest of the module. - current_purity_results : dict[NodeID, dict[NodeID, PurityResult]] + current_purity_results : The purity results of the functions in the module. - separated_nodes : dict[NodeID, CallGraphNode] + separated_nodes : If the module has cycles, they will be found by the CallGraphBuilder and combined to a single node. Since these combined nodes are not part of the module but needed for the analysis, their purity results will be propagated to the original nodes during the analysis. This attribute stores the original nodes inside after the combined node was analyzed. - cached_module_results : dict[NodeID, dict[NodeID, PurityResult]] + cached_module_results : The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. @@ -74,18 +72,18 @@ class PurityAnalyzer: Parameters ---------- - code : str | None + code : The source code of the module. If None is provided, the package data must be provided (or else an exception is raised). - module_name : str + module_name : The name of the module. - path : str | None + path : The path of the module. - results : dict[NodeID, dict[NodeID, PurityResult]] | None + results : The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. - package_data : PackageData | None + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. @@ -127,7 +125,7 @@ def _handle_open_like_functions(call: astroid.Call) -> PurityResult: Parameters ---------- - call: astrid.Call + call : The call to check. Returns @@ -212,7 +210,7 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: Parameters ---------- - reasons : Reasons + reasons : The node to process containing the raw reasons for impurity collected. Returns @@ -228,101 +226,66 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: # Check if the function has any non-local variable writes. if reasons.writes_to: - for write in reasons.writes_to: - impurity_reasons.add( - NonLocalVariableWrite( - symbol=write, - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), - ), - ) + for write in reasons.writes_to.values(): + impurity_reasons.add(write) # Check if the function has any non-local variable reads. if reasons.reads_from: - for read in reasons.reads_from: + for read in reasons.reads_from.values(): # Check if the read reads from an imported module. - if isinstance(read, Import): - if read.inferred_node: + if isinstance(read.symbol, Import): + if read.symbol.inferred_node: # If the inferred node is a function, it must be analyzed to determine its purity. - if isinstance(read.inferred_node, astroid.FunctionDef): + if isinstance(read.symbol.inferred_node, astroid.FunctionDef): impurity_reasons.add( - UnknownCall(UnknownFunctionCall(call=read.call, inferred_def=read.inferred_node)), + UnknownCall( + UnknownFunctionCall(call=read.symbol.call, inferred_def=read.symbol.inferred_node), + ), ) - elif isinstance(read.inferred_node, astroid.ClassDef): + elif isinstance(read.symbol.inferred_node, astroid.ClassDef): impurity_reasons.add( - UnknownCall(UnknownClassInit(call=read.call, inferred_def=read.inferred_node)), + UnknownCall( + UnknownClassInit(call=read.symbol.call, inferred_def=read.symbol.inferred_node), + ), ) # If the inferred node is a module, it will not count towards the impurity of the function. # If this was added, nearly anything would be impure. # Also, since the imported symbols are analyzed in much more detail, this can be omitted. - elif isinstance(read.inferred_node, astroid.Module): + elif isinstance(read.symbol.inferred_node, astroid.Module): pass # Default case for symbols that could not be inferred. else: # TODO: what type of nodes are allowed here? - impurity_reasons.add( - NonLocalVariableRead( - symbol=read, - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else ( - reasons.function_scope.symbol - if reasons.function_scope is not None - else None - ) - ), - ), - ) + impurity_reasons.add(read) else: - raise ValueError(f"Imported node {read.name} has no inferred node.") from None + raise ValueError(f"Imported node {read.symbol.name} has no inferred node.") from None else: - impurity_reasons.add( - NonLocalVariableRead( - symbol=read, - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), - ), - ) + impurity_reasons.add(read) # Check if the function has any unknown calls. if reasons.unknown_calls: - for unknown_call in reasons.unknown_calls: + for unknown_call in reasons.unknown_calls.values(): # Handle calls of code where no definition was found. - if isinstance(unknown_call, Reference): + if isinstance(unknown_call.symbol, Reference): # This checks special cases of unknown calls. # These are cases where a function is not a true builtin, but also not a user-defined function. # Cases like dict.pop(), list.remove(), set.union(), etc. - if unknown_call.name in BUILTIN_SPECIALS: + if unknown_call.symbol.name in BUILTIN_SPECIALS: pass else: impurity_reasons.add( UnknownCall( - expression=UnknownFunctionCall(call=unknown_call.node), - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), + expression=UnknownFunctionCall(call=unknown_call.symbol.node), + origin=unknown_call.origin, ), ) # Handle parameter calls - elif isinstance(unknown_call, Parameter): + elif isinstance(unknown_call.symbol, Parameter): impurity_reasons.add( CallOfParameter( - expression=ParameterAccess(unknown_call), - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), + expression=ParameterAccess(unknown_call.symbol), + origin=unknown_call.origin, ), ) # Do not handle imported calls here since they are handled separately. @@ -346,7 +309,7 @@ def _process_imported_node(self, imported_node: ImportedCallGraphNode) -> Purity Parameters ---------- - imported_node : ImportedCallGraphNode + imported_node : The imported node to process. Returns @@ -493,7 +456,7 @@ def _process_node(self, node: CallGraphNode) -> PurityResult: Parameters ---------- - node : CallGraphNode + node : The node to process. Returns @@ -620,27 +583,27 @@ def infer_purity( Parameters ---------- - code : str | None + code : The source code of the module. If None is provided, the package data must be provided (or else an exception is raised). - module_name : str, optional + module_name : The name of the module, by default "". - path : str, optional + path : The path of the module, by default None. - results : dict[NodeID, dict[NodeID, PurityResult]] | None + results : The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. After the analysis of the module, the results are saved in this dictionary. All imported modules are saved in this dictionary too for further runtime reduction. Is None if no results are available. - package_data : PackageData | None + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. Returns ------- - purity_results : dict[NodeID, dict[NodeID, PurityResult]] + purity_results : The purity results of the functions in the module. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. """ @@ -658,7 +621,7 @@ def get_purity_results( Parameters ---------- - src_dir_path : Path + src_dir_path : The path of the source directory of the package. Returns diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index 73e33530..baab8923 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -21,6 +21,8 @@ MemberAccessValue, ModuleAnalysisResult, NodeID, + NonLocalVariableRead, + NonLocalVariableWrite, PackageData, ParameterKind, Reasons, @@ -28,6 +30,7 @@ ReferenceNode, Symbol, TargetReference, + UnknownProto, ValueReference, ) @@ -39,26 +42,26 @@ class ReferenceResolver: Attributes ---------- - functions : dict[str, list[FunctionScope]] + functions : The functions of the module. - classes : dict[str, ClassScope] + classes : The classes of the module. - imports : dict[str, Import] + imports : The imports of the module. - module_analysis_result : ModuleAnalysisResult + module_analysis_result : The result of the reference resolving. - package_data_is_provided : bool + package_data_is_provided : True if package data is given, False otherwise. Parameters ---------- - code : str + code : The code of the module. - module_name : str + module_name : The name of the module if any. - path : str | None + path : The path of the module if any. - package_data : PackageData | None + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. @@ -109,9 +112,9 @@ def is_function_of_class(function: astroid.FunctionDef, klass: ClassScope) -> bo Parameters ---------- - function : astroid.FunctionDef + function : The function to check. - klass : ClassScope + klass : The class to check. Returns @@ -137,14 +140,14 @@ def merge_dicts( Parameters ---------- - d1 : dict[str, list[ReferenceNode]] + d1 : The first dict. - d2 : dict[str, list[ReferenceNode]] + d2 : The second dict. Returns ------- - d3 : dict[str, list[ReferenceNode]] + d3 : The merged dict. """ d3 = d1.copy() @@ -166,9 +169,9 @@ def compare_parameters(function: FunctionScope, call: astroid.Call) -> bool: Parameters ---------- - function : FunctionScope + function : The function to compare. - call : astroid.Call + call : The call to compare. Returns @@ -250,9 +253,9 @@ def _find_call_references( Parameters ---------- - call_reference : Reference + call_reference : The call reference which should be analyzed. - function : FunctionScope + function : The function in which the call is made. Returns @@ -278,6 +281,8 @@ def _find_call_references( if isinstance(class_iterator, astroid.ClassDef): klass = self.classes.get(class_iterator.name) break + if isinstance(class_iterator, astroid.Module): + break class_iterator = class_iterator.parent if klass and klass.super_classes: @@ -381,9 +386,9 @@ def _find_value_references( Parameters ---------- - value_reference : Reference + value_reference : The value reference which should be analyzed. - function : FunctionScope + function : The function in which the value is used. Returns @@ -471,6 +476,8 @@ def _find_value_references( import_def, inferred_node=inferred_node_def, # type: ignore[type-var] # import def is not None. ) + specified_import_def.id.name = specified_import_def.id.name + "." + specified_import_def.name # type: ignore[union-attr] # specified_import_def is not None. + if specified_import_def: result_value_reference.referenced_symbols.append(specified_import_def) @@ -502,17 +509,20 @@ def _find_value_references( # are resolved is much more effort and would require to change the data structure. # Therefore, all calls of imported functions are handled as MemberAccessValue. # Because of this, a check at the point where the referenced_symbols are added to the raw_reasons is needed. - if value_reference.node.receiver is None: + try: + if value_reference.node.receiver is None: + receiver_name = "UNKNOWN" + elif isinstance(value_reference.node.receiver, astroid.Attribute): + receiver_name = value_reference.node.receiver.attrname + elif isinstance(value_reference.node.receiver, astroid.Call) and hasattr( + value_reference.node.receiver.func, + "name", + ): + receiver_name = value_reference.node.receiver.func.name + else: + receiver_name = value_reference.node.receiver.name + except AttributeError: receiver_name = "UNKNOWN" - elif isinstance(value_reference.node.receiver, astroid.Attribute): - receiver_name = value_reference.node.receiver.attrname - elif isinstance(value_reference.node.receiver, astroid.Call) and isinstance( - value_reference.node.receiver.func, - astroid.Name, - ): - receiver_name = value_reference.node.receiver.func.name - else: - receiver_name = value_reference.node.receiver.name # In references imported via "import" statements, the symbols of the imported module are not known yet. # The symbol is accessed via its name, which is of type MemberAccessValue. @@ -554,6 +564,7 @@ def _find_value_references( name=value_reference.node.member, inferred_node=inferred_node_def, ) + specified_import_def.id.name = specified_import_def.id.name + "." + specified_import_def.name # If the member is a call, add the call node to the specified_import_def as fallback for the case # that the purity of the called function cannot be inferred. @@ -578,9 +589,9 @@ def _find_target_references( Parameters ---------- - target_reference : Symbol + target_reference : The target reference which should be analyzed. - function : FunctionScope + function : The function in which the value is used. Returns @@ -743,7 +754,9 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node # If no referenced symbols are found, add the call to the list of unknown_calls # of the raw_reasons dict for this function elif call_references_result.node not in raw_reasons[function.symbol.id].unknown_calls: - raw_reasons[function.symbol.id].unknown_calls.add(call_references_result.node) + raw_reasons[function.symbol.id].unknown_calls[call_references_result.node.id] = ( + UnknownProto(symbol=call_references_result.node, origin=function.symbol) + ) # Check if the function has value_references (References from a value node to a target node). if function.value_references: @@ -781,8 +794,10 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node else: continue # Add the referenced symbol to the list of symbols whom are read from. - if referenced_symbol not in raw_reasons[function.symbol.id].reads_from: - raw_reasons[function.symbol.id].reads_from.add(referenced_symbol) + if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: + raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( + NonLocalVariableRead(symbol=referenced_symbol, origin=function.symbol) + ) elif isinstance(referenced_symbol, Import): # Since calls of imported functions are treated within _find_value_references # as MemberAccessValue, they need to be added to the calls of the raw_reasons dict @@ -794,14 +809,21 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node if referenced_symbol not in raw_reasons[function.symbol.id].calls: raw_reasons[function.symbol.id].calls.add(referenced_symbol) else: # noqa: PLR5501 - if referenced_symbol not in raw_reasons[function.symbol.id].reads_from: - raw_reasons[function.symbol.id].reads_from.add(referenced_symbol) + if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: + raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( + NonLocalVariableRead( + symbol=referenced_symbol, + origin=function.symbol, + ) + ) # If no referenced symbols are found, add the call to the list of unknown_calls # of the raw_reasons dict for this function elif value_reference_result.node not in raw_reasons[ function.symbol.id ].unknown_calls and isinstance(value_reference_result.node.node, astroid.Call): - raw_reasons[function.symbol.id].unknown_calls.add(value_reference_result.node) + raw_reasons[function.symbol.id].unknown_calls[value_reference_result.node.id] = ( + UnknownProto(symbol=value_reference_result.node, origin=function.symbol) + ) # Check if the function has target_references (References from a target node to another target node). if function.target_symbols: @@ -834,7 +856,9 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node continue # Add the referenced symbol to the list of symbols whom are written to. if referenced_symbol not in raw_reasons[function.symbol.id].writes_to: - raw_reasons[function.symbol.id].writes_to.add(referenced_symbol) + raw_reasons[function.symbol.id].writes_to[referenced_symbol.id] = ( + NonLocalVariableWrite(symbol=referenced_symbol, origin=function.symbol) + ) name_references: dict[str, list[ReferenceNode]] = self.merge_dicts(value_references, target_references) resolved_references: dict[str, list[ReferenceNode]] = self.merge_dicts(call_references, name_references) @@ -852,13 +876,13 @@ def resolve_references( Parameters ---------- - code : str + code : The code of the module. - module_name : str + module_name : The name of the module if any. - path : str | None + path : The path of the module if any. - package_data : PackageData | None + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. diff --git a/src/library_analyzer/processing/api/purity_analysis/model/__init__.py b/src/library_analyzer/processing/api/purity_analysis/model/__init__.py index c48d5da5..14c8f93e 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/__init__.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/__init__.py @@ -49,6 +49,7 @@ UnknownCall, UnknownClassInit, UnknownFunctionCall, + UnknownProto, ) from library_analyzer.processing.api.purity_analysis.model._purity_builtins import ( BUILTIN_CLASSSCOPES, @@ -118,4 +119,5 @@ "BUILTIN_SPECIALS", "PackageData", "ParameterKind", + "UnknownProto", ] diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py index da75e4c2..43e13ebc 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py @@ -3,8 +3,6 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING -from library_analyzer.processing.api.purity_analysis.model._purity import Impure, UnknownCall, UnknownFunctionCall - if TYPE_CHECKING: from library_analyzer.processing.api.purity_analysis.model._module_data import ( Import, @@ -22,7 +20,7 @@ class CallGraphForest: Attributes ---------- - graphs : dict[str, CallGraphNode] + graphs : The dictionary of call graph trees. The key is the name of the tree, the value is the root CallGraphNode of the tree. """ @@ -34,9 +32,9 @@ def add_graph(self, graph_id: NodeID, graph: CallGraphNode) -> None: Parameters ---------- - graph_id : NodeID + graph_id : The NodeID of the tree node. - graph : CallGraphNode + graph : The root of the tree. """ # if graph_id in self.forest: @@ -48,7 +46,7 @@ def get_graph(self, graph_id: NodeID) -> CallGraphNode: Parameters ---------- - graph_id : NodeID + graph_id : The NodeID of the tree node to get. Raises @@ -66,7 +64,7 @@ def has_graph(self, graph_id: NodeID) -> bool: Parameters ---------- - graph_id : NodeID + graph_id : The NodeID of the tree to check for. Returns @@ -81,7 +79,7 @@ def delete_graph(self, graph_id: NodeID) -> None: Parameters ---------- - graph_id : NodeID + graph_id : The NodeID of the tree to delete. """ del self.graphs[graph_id] @@ -95,12 +93,12 @@ class CallGraphNode: Attributes ---------- - symbol : Symbol + symbol : The symbol of the function that the node represents. - reasons : Reasons + reasons : The raw Reasons for the node. After the call graph is built, this only contains reads_from and writes_to as well as unknown_calls. - children : dict[NodeID, CallGraphNode] + children : The set of children of the node, (i.e., the set of nodes that this node calls) """ @@ -122,7 +120,7 @@ def add_child(self, child: CallGraphNode) -> None: Parameters ---------- - child : CallGraphNode + child : The child to add. """ self.children[child.symbol.id] = child @@ -132,7 +130,7 @@ def get_child(self, child_id: NodeID) -> CallGraphNode: Parameters ---------- - child_id : NodeID + child_id : The NodeID of the child to get. Raises @@ -150,7 +148,7 @@ def has_child(self, child_id: NodeID) -> bool: Parameters ---------- - child_id : NodeID + child_id : The NodeID of the child to check for. Returns @@ -165,7 +163,7 @@ def delete_child(self, child_id: NodeID) -> None: Parameters ---------- - child_id : NodeID + child_id : The NodeID of the child to delete. """ del self.children[child_id] @@ -193,7 +191,7 @@ class CombinedCallGraphNode(CallGraphNode): Attributes ---------- - combines : dict[NodeID, CallGraphNode] + combines : A dictionary of all nodes that are combined into this node. This is later used for transferring the reasons of the combined node to the original nodes. """ @@ -225,27 +223,6 @@ def separate(self) -> dict[NodeID, CallGraphNode]: original_nodes[node_id] = node original_nodes[node_id].reasons.result = self.reasons.result - # The results need to be assigned an origin to be able to trace back the result. - if ( - original_nodes[node_id].reasons is not None - and isinstance(original_nodes[node_id].reasons.result, Impure) - and hasattr(original_nodes[node_id].reasons.result, "reasons") - ): - for reason in original_nodes[node_id].reasons.result.reasons: # type: ignore[union-attr] # it is cheked above - if ( - isinstance(reason, UnknownCall) - and isinstance(reason.expression, UnknownFunctionCall) - and reason.origin is None - ): - for nod in self.combines.values(): - for unknown_call in nod.reasons.unknown_calls: - if ( - unknown_call.node == reason.expression.call - and nod.reasons.function_scope is not None - ): - reason.origin = nod.reasons.function_scope.symbol - break - return original_nodes diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py b/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py index 59bd4062..e7c74fb5 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py @@ -18,14 +18,14 @@ class ModuleData: Attributes ---------- - scope : Scope + scope : The module's scope, this contains all child scopes. - classes : dict[str, ClassScope] + classes : All classes and their ClassScope. - functions : dict[str, list[FunctionScope]] + functions : All functions and a list of their FunctionScopes. The value is a list since there can be multiple functions with the same name. - imports : dict[str, Import] + imports : All imported symbols. """ @@ -42,9 +42,9 @@ class PackageData: Attributes ---------- - package_name : str + package_name : The name of the package. - modules : dict[str, tuple[str, ModuleData]] + modules : All modules and their ModuleData. The key is the name of the module. The value is a tuple of the path to the module and the ModuleData. @@ -94,18 +94,18 @@ class MemberAccess(astroid.NodeNG): Attributes ---------- - node : astroid.Attribute | astroid.AssignAttr + node : The original node that represents the member access. Needed as fallback when determining the parent node if the receiver is None. - receiver : MemberAccess | astroid.NodeNG | None + receiver : The receiver is the node that is accessed, it can be nested, e.g. `a` in `a.b` or `a.b` in `a.b.c`. The receiver can be nested. Is None if the receiver is not of type Name, Call or Attribute - member : str + member : The member is the name of the node that accesses the receiver, e.g. `b` in `a.b`. parent : astroid.NodeNG | None The parent node of the member access. - name : str + name : The name of the member access, e.g. `a.b`. Is set in __post_init__, after the member access has been created. If the MemberAccess is nested, the name of the receiver will be set to "UNKNOWN" since it is hard to determine @@ -152,7 +152,7 @@ def construct_member_access_target(cls, node: astroid.Attribute | astroid.Assign Parameters ---------- - node : astroid.Attribute | astroid.AssignAttr + node : The node to construct the MemberAccessTarget node from. Returns @@ -203,7 +203,7 @@ def construct_member_access_value(cls, node: astroid.Attribute) -> MemberAccessV Parameters ---------- - node : astrid.Attribute + node : The node to construct the MemberAccessValue node from. Returns @@ -234,15 +234,15 @@ class NodeID: Attributes ---------- - module : str | None + module : The module of the node. Is None for combined nodes. - name : str + name : The name of the node. - line : int + line : The line of the node in the source code. Is None for combined nodes, builtins or any other node that do not have a line. - col : int | None + col : The column of the node in the source code. Is None for combined nodes, builtins or any other node that do not have a line. """ @@ -326,7 +326,7 @@ def calc_node_id( Parameters ---------- - node : astroid.NodeNG | astroid.Module | astroid.ClassDef | astroid.FunctionDef | astroid.AssignName | astroid.Name | astroid.AssignAttr | astroid.Import | astroid.ImportFrom | astroid.Call | astroid.Lambda | astroid.ListComp | MemberAccess + node : Returns ------- @@ -386,11 +386,11 @@ class Symbol(ABC): Attributes ---------- - node : astroid.NodeNG | MemberAccess + node : The node that defines the symbol. - id : NodeID + id : The id of that node. - name : str + name : The name of the symbol (for easier access). """ @@ -414,7 +414,7 @@ class UnknownSymbol(Symbol): Attributes ---------- - node : None + node : """ node: None = None @@ -470,7 +470,7 @@ class ClassVariable(Symbol): Attributes ---------- - klass : astroid.ClassDef | None + klass : The class that defines the class variable. """ @@ -491,7 +491,7 @@ class InstanceVariable(Symbol): Attributes ---------- - klass : astroid.ClassDef | None + klass : The class that defines the instance variable. """ @@ -512,22 +512,22 @@ class Import(Symbol): Attributes ---------- - node : astroid.ImportFrom | astroid.Import + node : The node that defines the import. - name : str + name : The name of the symbol that is imported if any is given. Else it is equal to the module name. - module : str + module : The name of the module that is imported. - alias : str | None + alias : If the node is of type Import alias is the alias name for the module name if any is given. If the node is of type ImportFrom alias is the alias name for the name of the symbol if any is given. - inferred_node : astroid.NodeNG | None + inferred_node : When the import is used as a reference (or a symbol) the inferred_node is the node of the used reference (or symbol) in the original module. It was inferred by the reference analysis by using astroids safe_infer method. If the method could not infer the node, the inferred_node is None. - call: astroid.Call | None + call : The original call node as fallback for the case, that the purity of the inferred_node cannot be inferred. Only is set if the symbol represents a call. """ @@ -558,7 +558,7 @@ class Builtin(Symbol): Attributes ---------- - call : astroid.Call + call : The call node of the function. """ @@ -579,7 +579,7 @@ class BuiltinOpen(Builtin): Attributes ---------- - call : astroid.Call + call : The call node of the open-like function. """ @@ -602,7 +602,7 @@ class CombinedSymbol(Symbol): Attributes ---------- - node : None + node : """ @@ -625,11 +625,11 @@ class Reference: Attributes ---------- - node : astroid.Call | astroid.Name | MemberAccessValue + node : The node that defines the symbol. - id : NodeID + id : The id of that node. - name : str + name : The name of the symbol (for easier access). """ @@ -656,12 +656,12 @@ class Scope: Attributes ---------- - _symbol : Symbol + _symbol : The symbol that defines the scope. - _children : list[Scope | ClassScope] + _children : The list of Scope or ClassScope instances that are defined in the scope of the Symbol node. Is None if the node is a leaf node. - _parent : Scope | ClassScope | None + _parent : The parent node in the scope tree, there is None if the node is the root node. """ @@ -744,16 +744,16 @@ class ClassScope(Scope): Attributes ---------- - class_variables : dict[str, list[Symbol]] + class_variables : The name of the class variable and a list of its Symbols (which represent a declaration). There can be multiple declarations of the same class variable, e.g. `a = 1` and `a = 2` since we cannot determine which one is used since we do not analyze the control flow. Also, it is impossible to distinguish between a declaration and a reassignment. - instance_variables : dict[str, list[Symbol]] + instance_variables : The name of the instance variable and a list of its Symbols (which represent a declaration). - init_function : FunctionScope | None + init_function : The init function of the class if it exists else None. - super_classes : list[ClassScope] + super_classes : The list of superclasses of the class if any. """ @@ -771,18 +771,18 @@ class FunctionScope(Scope): Attributes ---------- - target_symbols : dict[str, list[Symbol]] + target_symbols : The dict of all target nodes used inside the corresponding function. Target nodes are specified as all nodes that can be written to and which can be represented as a Symbol. This includes assignments, parameters, - value_references : dict[str, list[Reference]] + value_references : The dict of all value nodes used inside the corresponding function. - call_references : dict[str, list[Reference]] + call_references : The dict of all function calls inside the corresponding function. The key is the name of the call node, the value is a list of all References of call nodes with that name. - parameters : dict[str, Parameter] + parameters : The parameters of the function. - globals_used : dict[str, list[GlobalVariable]] + globals_used : The global variables used inside the function. It stores the globally assigned nodes (Assignment of the used variable). """ @@ -801,7 +801,7 @@ def remove_call_reference_by_id(self, call_id: str) -> None: Parameters ---------- - call_id : str + call_id : The name of the call node to remove. """ self.call_references.pop(call_id, None) diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 82da8ae9..80649a3a 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -12,6 +12,7 @@ from library_analyzer.processing.api.purity_analysis.model._module_data import ( MemberAccessValue, NodeID, + Reference, Symbol, UnknownSymbol, ) @@ -34,7 +35,7 @@ class PurityResult(ABC): Purity results are either pure, impure or unknown. - is_class : bool + is_class : Whether the result is for a class or not. """ @@ -44,7 +45,7 @@ def __hash__(self) -> int: return hash(str(self)) @abstractmethod - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: pass @abstractmethod @@ -61,7 +62,7 @@ class Pure(PurityResult): Attributes ---------- - is_class : bool + is_class : Whether the result is for a class or not. """ @@ -72,7 +73,7 @@ def update(self, other: PurityResult | None) -> PurityResult: Parameters ---------- - other : PurityResult | None + other : The result to update with. Returns @@ -104,7 +105,7 @@ def update(self, other: PurityResult | None) -> PurityResult: def clone() -> Pure: return Pure() - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: # noqa: ARG002 return {"purity": self.__class__.__name__} def __hash__(self) -> int: @@ -128,9 +129,9 @@ class Impure(PurityResult): Attributes ---------- - reasons : set[ImpurityReason] + reasons : The reasons why the function is impure. - is_class : bool + is_class : Whether the result is for a class or not. """ @@ -142,7 +143,7 @@ def update(self, other: PurityResult | None) -> PurityResult: Parameters ---------- - other : PurityResult | None + other : The result to update with. Returns @@ -172,17 +173,58 @@ def update(self, other: PurityResult | None) -> PurityResult: def clone(self) -> Impure: return Impure(reasons=self.reasons.copy()) - def to_dict(self) -> dict[str, Any]: - reasons = [] + def to_dict(self, shorten: bool = False) -> dict[str, Any]: seen = set() + non_local_variable_reads = [] + non_local_variable_writes = [] + file_reads = [] + file_writes = [] + unknown_calls = [] + native_calls = [] + parameter_calls = [] for reason in self.reasons: if str(reason) not in seen: - reasons.append(reason.to_dict()) seen.add(str(reason)) - + match reason: + case NonLocalVariableRead(): + non_local_variable_reads.append(reason.to_dict()) + case NonLocalVariableWrite(): + non_local_variable_writes.append(reason.to_dict()) + case FileRead(): + file_reads.append(reason.to_dict()) + case FileWrite(): + file_writes.append(reason.to_dict()) + case UnknownCall(): + unknown_calls.append(reason.to_dict()) + case NativeCall(): + native_calls.append(reason.to_dict()) + case CallOfParameter(): + parameter_calls.append(reason.to_dict()) + case _: + raise TypeError(f"Unknown reason type: {reason}") + if not shorten: + combined_reasons: dict[str, Any] = { + "NonLocalVariableRead": non_local_variable_reads, + "NonLocalVariableWrite": non_local_variable_writes, + "FileRead": file_reads, + "FileWrite": file_writes, + "UnknownCall": unknown_calls, + "NativeCall": native_calls, + "CallOfParameter": parameter_calls, + } + else: + combined_reasons = { + "NonLocalVariableRead": len(non_local_variable_reads), + "NonLocalVariableWrite": len(non_local_variable_writes), + "FileRead": len(file_reads), + "FileWrite": len(file_writes), + "UnknownCall": len(unknown_calls), + "NativeCall": len(native_calls), + "CallOfParameter": len(parameter_calls), + } return { "purity": self.__class__.__name__, - "reasons": reasons, + "reasons": {reason: value for reason, value in combined_reasons.items() if value}, } def __hash__(self) -> int: @@ -220,9 +262,9 @@ class NonLocalVariableRead(Read): Attributes ---------- - symbol : GlobalVariable | ClassVariable | InstanceVariable | Import + symbol : The symbol that is read. - origin : Symbol | NodeID | None + origin : The origin of the read. """ @@ -240,7 +282,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.symbol.__class__.__name__}.{self.symbol.name}", } @@ -252,14 +293,13 @@ class FileRead(Read): Attributes ---------- - source : Expression | None + source : The source of the read. - This is None if the source is unknown. - origin : Symbol | NodeID | None + origin : The origin of the read. """ - source: Expression | None = None # TODO: this should never be None + source: Expression origin: Symbol | NodeID | None = field(default=None) def __hash__(self) -> int: @@ -275,7 +315,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.source.__str__()}", } @@ -291,9 +330,9 @@ class NonLocalVariableWrite(Write): Attributes ---------- - symbol : GlobalVariable | ClassVariable | InstanceVariable | Import + symbol : The symbol that is written to. - origin : Symbol | NodeID | None + origin : The origin of the write. """ @@ -311,7 +350,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.symbol.__class__.__name__}.{self.symbol.name}", } @@ -323,14 +361,13 @@ class FileWrite(Write): Attributes ---------- - source : Expression | None + source : The source of the write. - This is None if the source is unknown. # TODO: see above LARS - origin : Symbol | NodeID | None + origin : The origin of the write. """ - source: Expression | None = None + source: Expression origin: Symbol | NodeID | None = field(default=None) def __hash__(self) -> int: @@ -346,7 +383,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.source.__str__()}", } @@ -356,6 +392,37 @@ class Unknown(ImpurityReason, ABC): """Superclass for unknown type impurity reasons.""" +@dataclass +class UnknownProto(Unknown): + """Class for UnknownCalls which are not fully determined. + + Attributes + ---------- + symbol : + The symbol or reference object which is not fully determined. + origin : + The origin of the unknown call. + """ + + symbol: Symbol | Reference + origin: Symbol | NodeID | None = field(default=None) # TODO: remove NodeID + + def __hash__(self) -> int: + return hash(str(self)) + + def __str__(self) -> str: + return f"{self.__class__.__name__}: {self.symbol.__class__.__name__}.{self.symbol.name}" + + def to_dict(self) -> dict[str, Any]: + origin = ( + self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) + ) + return { + "origin": f"{origin}", + "reason": f"{self.symbol.name}", + } + + @dataclass class UnknownCall(Unknown): """Class for calling unknown code. @@ -364,9 +431,9 @@ class UnknownCall(Unknown): Attributes ---------- - expression : Expression + expression : The expression that is called. - origin : Symbol | NodeID | None + origin : The origin of the call. """ @@ -384,7 +451,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.expression.__str__()}", } @@ -398,9 +464,9 @@ class NativeCall(Unknown): # ExternalCall Attributes ---------- - expression : Expression + expression : The expression that is called. - origin : Symbol | NodeID | None + origin : The origin of the call. """ @@ -418,7 +484,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.expression.__str__()}", } @@ -436,9 +501,9 @@ class CallOfParameter(Unknown): # ParameterCall Attributes ---------- - expression : Expression + expression : The expression that is called. - origin : Symbol | NodeID | None + origin : The origin of the call. """ @@ -456,7 +521,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.expression.__str__()}", } @@ -479,7 +543,7 @@ class ParameterAccess(Expression): Attributes ---------- - parameter : Parameter + parameter : The parameter that is accessed. """ @@ -497,7 +561,7 @@ class StringLiteral(Expression): Attributes ---------- - value : str + value : The name of the string literal. """ @@ -513,11 +577,11 @@ class UnknownFunctionCall(Expression): Attributes ---------- - call : astroid.Call + call : The call node. - inferred_def : astroid.FunctionDef | None + inferred_def : The inferred function definition for the call if it is known. - name : str + name : The name of the call. """ @@ -549,11 +613,11 @@ class UnknownClassInit(Expression): Attributes ---------- - call : astroid.Call + call : The call node. - inferred_def : astroid.ClassDef | None + inferred_def : The inferred class definition for the call if it is known. - name : str + name : The name of the call. """ @@ -588,15 +652,15 @@ class APIPurity: purity_results: typing.ClassVar[dict[NodeID, dict[NodeID, PurityResult]]] = {} - def to_json_file(self, path: Path) -> None: + def to_json_file(self, path: Path, shorten: bool = True) -> None: ensure_file_exists(path) with path.open("w") as f: - json.dump(self.to_dict(), f, indent=2) + json.dump(self.to_dict(shorten), f, indent=2) - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: return { module_name.__str__(): { - function_id.__str__(): purity.to_dict() + function_id.__str__(): purity.to_dict(shorten) for function_id, purity in purity_result.items() if not purity.is_class } diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index 157bdc5b..18d819e6 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -8,11 +8,7 @@ from library_analyzer.processing.api.purity_analysis.model._module_data import ( ClassScope, - ClassVariable, FunctionScope, - GlobalVariable, - Import, - InstanceVariable, MemberAccessTarget, MemberAccessValue, NodeID, @@ -22,9 +18,14 @@ ) if TYPE_CHECKING: - from collections.abc import Iterator - from library_analyzer.processing.api.purity_analysis.model import CallGraphForest, PurityResult + from library_analyzer.processing.api.purity_analysis.model import ( + CallGraphForest, + NonLocalVariableRead, + NonLocalVariableWrite, + PurityResult, + UnknownProto, + ) @dataclass @@ -36,11 +37,11 @@ class ReferenceNode(ABC): Attributes ---------- - node : astroid.Name | astroid.AssignName | astroid.Call | MemberAccessTarget | MemberAccessValue + node : The node that references the symbols. - scope : Scope + scope : The scope of the node. - referenced_symbols : list[Symbol] + referenced_symbols : The list of referenced symbols. These are the symbols of the nodes that node references. """ @@ -94,17 +95,17 @@ class ModuleAnalysisResult: Attributes ---------- - resolved_references : dict[str, list[ValueReference | TargetReference]] + resolved_references : The dictionary of references. The key is the name of the reference node, the value is the list of ReferenceNodes. - raw_reasons : dict[NodeID, Reasons] + raw_reasons : The dictionary of function references. The key is the NodeID of the function, the value is the Reasons for the function. - classes : dict[str, ClassScope] + classes : All classes and their ClassScope. call_graph_forest : CallGraphForest The call graph forest of the module. - module_id : NodeID | None + module_id : The NodeID of the module which the analysis result belongs to. """ @@ -124,33 +125,33 @@ class Reasons: Attributes ---------- - function_scope : FunctionScope | None + function_scope : The scope of the function which the reasons belong to. Is None if the reasons are not for a FunctionDef node. This is the case when either a builtin or a combined node is created, or a ClassScope is used to propagate reasons. - writes_to : set[GlobalVariable | ClassVariable | InstanceVariable | Import] - A set of all nodes that are written to. - reads_from : set[GlobalVariable | ClassVariable | InstanceVariable | Import] - A set of all nodes that are read from. - calls : set[Symbol] + writes_to : + A dict of all nodes that are written to. + reads_from : + A dict of all nodes that are read from. + calls : A set of all nodes that are called. - result : PurityResult | None + result : The result of the purity analysis This also works as a flag to determine if the purity analysis has already been performed: If it is None, the purity analysis has not been performed - unknown_calls : set[Symbol | Reference] - A list of all unknown calls. + unknown_calls : + A dict of all unknown calls. Unknown calls are calls to functions that are not defined in the module or are parameters. """ id: NodeID function_scope: FunctionScope | None = field(default=None) - writes_to: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) - reads_from: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) - calls: set[Symbol] = field(default_factory=set) + writes_to: dict[NodeID, NonLocalVariableWrite] = field(default_factory=dict) + reads_from: dict[NodeID, NonLocalVariableRead] = field(default_factory=dict) + calls: set[Symbol] = field(default_factory=set) # TODO: SORTED SET oder LIST result: PurityResult | None = field(default=None) - unknown_calls: set[Symbol | Reference] = field(default_factory=set) + unknown_calls: dict[NodeID, UnknownProto] = field(default_factory=dict) def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: """Join a list of Reasons objects. @@ -159,7 +160,7 @@ def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: Parameters ---------- - reasons_list : list[Reasons] + reasons_list : The list of Reasons objects. @@ -181,9 +182,6 @@ def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: result.join_reasons(reason) return result - def __iter__(self) -> Iterator[Symbol]: - return iter(self.writes_to.union(self.reads_from).union(self.calls)) - def join_reasons(self, other: Reasons) -> Reasons: """Join two Reasons objects. @@ -192,7 +190,7 @@ def join_reasons(self, other: Reasons) -> Reasons: Parameters ---------- - other : Reasons + other : The other Reasons object. Returns @@ -212,7 +210,7 @@ def remove_unknown_call(self, node_id: NodeID) -> None: Parameters ---------- - node_id : NodeID + node_id : The NodeID of the unknown call to remove. """ - self.unknown_calls = {call for call in self.unknown_calls if call.id != node_id} + del self.unknown_calls[node_id] diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py b/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py index ef111f29..692c5fca 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py @@ -490,6 +490,34 @@ def value(self): "value.line10": SimpleImpure({"NonLocalVariableRead.InstanceVariable.A._value"}), }, ), + ( # language=Python "Assign Instance Attribute via property with propagation" + """ +from abc import ABC + +class A(ABC): + def __init__(self, value): + self._value = value + if impure(): + pass + + @property + def value(self): + return self._value + +class B(A): + def __init__(self, value): + super().__init__(value) + +def impure(): + print("test") + """, # language=none + { + "__init__.line5": SimpleImpure({"FileWrite.StringLiteral.stdout"}), + "value.line11": SimpleImpure({"NonLocalVariableRead.InstanceVariable.A._value"}), + "__init__.line15": SimpleImpure({"FileWrite.StringLiteral.stdout"}), + "impure.line18": SimpleImpure({"FileWrite.StringLiteral.stdout"}), + }, + ), ], ids=[ "Trivial function", @@ -516,6 +544,7 @@ def value(self): "Builtins for list", "Builtins for set", "Assign Instance Attribute via property", + "Assign Instance Attribute via property with propagation", ], # TODO: class inits in cycles ) def test_infer_purity_pure(code: str, expected: list[ImpurityReason]) -> None: diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py index f012c9e5..6b511eb3 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py @@ -246,27 +246,28 @@ def transform_reasons(reasons: dict[NodeID, Reasons]) -> dict[str, SimpleReasons function_references.function_scope.symbol.name, # type: ignore[union-attr] # function_scope is not None { ( - f"{target_reference.__class__.__name__}.{target_reference.klass.name}.{target_reference.node.name}.line{target_reference.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(target_reference, ClassVariable) and target_reference.klass is not None + f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(target_reference.symbol, ClassVariable) + and target_reference.symbol.klass is not None else ( - f"{target_reference.__class__.__name__}.{target_reference.klass.name}.{target_reference.node.member}.line{target_reference.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(target_reference, InstanceVariable) - else f"{target_reference.__class__.__name__}.{target_reference.node.name}.line{target_reference.node.fromlineno}" + f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.member}.line{target_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(target_reference.symbol, InstanceVariable) + else f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine ) ) - for target_reference in function_references.writes_to + for target_reference in function_references.writes_to.values() }, { ( - f"{value_reference.__class__.__name__}.{value_reference.klass.name}.{value_reference.node.name}.line{value_reference.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(value_reference, ClassVariable) and value_reference is not None + f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.klass.name}.{value_reference.symbol.node.name}.line{value_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(value_reference.symbol, ClassVariable) and value_reference.symbol is not None else ( - f"{value_reference.__class__.__name__}.{value_reference.klass.name}.{value_reference.node.member}.line{value_reference.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(value_reference, InstanceVariable) - else f"{value_reference.__class__.__name__}.{value_reference.node.name}.line{value_reference.node.fromlineno}" + f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.klass.name}.{value_reference.symbol.node.member}.line{value_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(value_reference.symbol, InstanceVariable) + else f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.node.name}.line{value_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine ) ) - for value_reference in function_references.reads_from + for value_reference in function_references.reads_from.values() }, ), },