From 3628803e9d32794f87de065c3429e98436a4339d Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Thu, 2 May 2024 12:35:32 +0200 Subject: [PATCH 01/17] added TODOs --- .../api/purity_analysis/_get_module_data.py | 3 ++- .../purity_analysis/_resolve_references.py | 25 +++++++++++-------- .../api/purity_analysis/model/_purity.py | 2 +- .../api/purity_analysis/model/_reference.py | 4 +-- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index 51a9624a..5c4f5853 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -245,7 +245,8 @@ def _detect_scope(self, current_node: astroid.NodeNG) -> None: # add all children of the try-finally node and remove the try-except node. if isinstance(current_node, astroid.TryFinally) and isinstance(child.symbol.node, astroid.TryExcept): inner_scope_children.extend(child.children) - inner_scope_children.remove(child) + if child in inner_scope_children: + inner_scope_children.remove(child) self.current_node_stack[-1].children = inner_scope_children # Set the children of the current node. self.children = outer_scope_children # Keep the children that are not in the scope of the current node. diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index a58dbe4f..f2014957 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -278,6 +278,8 @@ def _find_call_references( if isinstance(class_iterator, astroid.ClassDef): klass = self.classes.get(class_iterator.name) break + if isinstance(class_iterator, astroid.Module): + break class_iterator = class_iterator.parent if klass and klass.super_classes: @@ -502,17 +504,20 @@ def _find_value_references( # are resolved is much more effort and would require to change the data structure. # Therefore, all calls of imported functions are handled as MemberAccessValue. # Because of this, a check at the point where the referenced_symbols are added to the raw_reasons is needed. - if value_reference.node.receiver is None: + try: + if value_reference.node.receiver is None: + receiver_name = "UNKNOWN" + elif isinstance(value_reference.node.receiver, astroid.Attribute): + receiver_name = value_reference.node.receiver.attrname + elif isinstance(value_reference.node.receiver, astroid.Call) and hasattr( + value_reference.node.receiver.func, + "name", + ): + receiver_name = value_reference.node.receiver.func.name + else: + receiver_name = value_reference.node.receiver.name + except AttributeError: receiver_name = "UNKNOWN" - elif isinstance(value_reference.node.receiver, astroid.Attribute): - receiver_name = value_reference.node.receiver.attrname - elif isinstance(value_reference.node.receiver, astroid.Call) and isinstance( - value_reference.node.receiver.func, - astroid.Name, - ): - receiver_name = value_reference.node.receiver.func.name - else: - receiver_name = value_reference.node.receiver.name # In references imported via "import" statements, the symbols of the imported module are not known yet. # The symbol is accessed via its name, which is of type MemberAccessValue. diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 82da8ae9..e99ae825 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -207,7 +207,7 @@ def __hash__(self) -> int: @abstractmethod def to_dict(self) -> dict[str, Any]: - pass + pass # TODO: combine all origins and reasons of instances with the same class in one dict class Read(ImpurityReason, ABC): diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index 157bdc5b..6e864840 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -146,9 +146,9 @@ class Reasons: id: NodeID function_scope: FunctionScope | None = field(default=None) - writes_to: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) + writes_to: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) # TODO: add origin here reads_from: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) - calls: set[Symbol] = field(default_factory=set) + calls: set[Symbol] = field(default_factory=set) # TODO: SORTED SET oder LIST result: PurityResult | None = field(default=None) unknown_calls: set[Symbol | Reference] = field(default_factory=set) From 2590df06dade7987ca8f147602160fa2fac4b052 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Thu, 2 May 2024 13:42:46 +0200 Subject: [PATCH 02/17] added evaluation file --- .../api/test_infer_purity_package.py | 622 ++++++++++++++++++ 1 file changed, 622 insertions(+) create mode 100644 tests/library_analyzer/processing/api/test_infer_purity_package.py diff --git a/tests/library_analyzer/processing/api/test_infer_purity_package.py b/tests/library_analyzer/processing/api/test_infer_purity_package.py new file mode 100644 index 00000000..d7c60a34 --- /dev/null +++ b/tests/library_analyzer/processing/api/test_infer_purity_package.py @@ -0,0 +1,622 @@ +import json +from pathlib import Path + +import astroid +from library_analyzer.cli._run_api import _run_api_command +from library_analyzer.processing.api.docstring_parsing import DocstringStyle +from library_analyzer.processing.api.purity_analysis import get_purity_results +from library_analyzer.processing.api.purity_analysis.model import ( + ClassScope, + ClassVariable, + GlobalVariable, + LocalVariable, + NodeID, + Symbol, +) +from library_analyzer.utils import ASTWalker + + +def test_run_api_command_safe_ds() -> None: + _run_api_command("safe-ds", + Path(r"C:\Users\Lukas Radermacher\AppData\Local\pypoetry\Cache\virtualenvs\library-analyzer-FK1WveJV-py3.11\Lib\site-packages\safeds"), + Path(r"D:\Ergebnisse BA\Results\SafeDS"), + DocstringStyle.NUMPY, + ) + +def test_run_api_command_pandas() -> None: + _run_api_command("pandas", + Path(r"D:\Ergebnisse BA\Results\Pandas\pandas_v2.0.3"), + Path(r"D:\Ergebnisse BA\Results\Pandas"), + DocstringStyle.NUMPY, + ) + +def test_run_api_command_scikit() -> None: + _run_api_command("scikit", + Path(r"D:\Ergebnisse BA\Results\SciKit\sklearn_v1.3.0"), + Path(r"D:\Ergebnisse BA\Results\SciKit"), + DocstringStyle.NUMPY, + ) + + +def test_run_api_command_pytorch() -> None: + _run_api_command("pytorch", + Path(r"D:\Ergebnisse BA\Results\Pytorch\pytorch_v2.0.1"), + Path(r"D:\Ergebnisse BA\Results\Pytorch"), + DocstringStyle.NUMPY, + ) + +def test_run_api_command_seaborn() -> None: + _run_api_command("seaborn", + Path(r"D:\Ergebnisse BA\Results\Seaborn\seaborn_v0.12.2"), + Path(r"D:\Ergebnisse BA\Results\Seaborn"), + DocstringStyle.NUMPY, + ) + +def test_run_api_command_small_module() -> None: + _run_api_command("tracemalloce", + Path(r"D:\Ergebnisse BA\Results"), + Path(r"D:\Ergebnisse BA\Results"), + DocstringStyle.NUMPY, + ) + + +def test_single_ds_file() -> None: + res = get_purity_results(Path(r"C:\Users\Lukas Radermacher\AppData\Local\pypoetry\Cache\virtualenvs\library-analyzer-FK1WveJV-py3.11\Lib\site-packages\safeds\data\tabular\containers")) + out_file_api_purity = Path(r"D:\Ergebnisse BA\Results\Tests").joinpath("single_api_purity.json") + res.to_json_file(out_file_api_purity) + +class_dict = { + "ArithmeticError": "", + "AssertionError": "", + "AttributeError": "", + "BaseException": "", + "BaseExceptionGroup": "", + "BlockingIOError": "", + "BrokenPipeError": "", + "BufferError": "", + "BytesWarning": "", + "ChildProcessError": "", + "ConnectionAbortedError": "", + "ConnectionError": "", + "ConnectionRefusedError": "", + "ConnectionResetError": "", + "DeprecationWarning": "", + "EOFError": "", + "Ellipsis": "", + "EncodingWarning": "", + "EnvironmentError": "", + "Exception": "", + "ExceptionGroup": "", + "False": "", + "FileExistsError": "", + "FileNotFoundError": "", + "FloatingPointError": "", + "FutureWarning": "", + "GeneratorExit": "", + "IOError": "", + "ImportError": "", + "ImportWarning": "", + "IndentationError": "", + "IndexError": "", + "InterruptedError": "", + "IsADirectoryError": "", + "KeyError": "", + "KeyboardInterrupt": "", + "LookupError": "", + "MemoryError": "", + "ModuleNotFoundError": "", + "NameError": "", + "None": "", + "NotADirectoryError": "", + "NotImplemented": "", + "NotImplementedError": "", + "OSError": "", + "OverflowError": "", + "PendingDeprecationWarning": "", + "PermissionError": "", + "ProcessLookupError": "", + "RecursionError": "", + "ReferenceError": "", + "ResourceWarning": "", + "RuntimeError": "", + "RuntimeWarning": "", + "StopAsyncIteration": "", + "StopIteration": "", + "SyntaxError": "", + "SyntaxWarning": "", + "SystemError": "", + "SystemExit": "", + "TabError": "", + "TimeoutError": "", + "True": "", + "TypeError": "", + "UnboundLocalError": "", + "UnicodeDecodeError": "", + "UnicodeEncodeError": "", + "UnicodeError": "", + "UnicodeTranslateError": "", + "UnicodeWarning": "", + "UserWarning": "", + "ValueError": "", + "Warning": "", + "WindowsError": "", + "ZeroDivisionError": "", +} + + +def test_build_class_scopes() -> dict[str, ClassScope]: + global class_dict + class ScopesBuilder: + def __init__(self) -> None: + self.scopes: dict[str, ClassScope] = {} + self.current_class: str | None = None + + def enter_classdef(self, node: astroid.ClassDef) -> None: + symbol = GlobalVariable(node=node, id=NodeID("BUILTIN", node.name, node.lineno, node.col_offset), name=node.name) + self.scopes[node.name] = ClassScope(symbol, [], None, {}) + self.current_class = node.name + + def leave_classdef(self, node: astroid.ClassDef) -> None: + self.current_class = None + + def enter_functiondef(self, node: astroid.FunctionDef) -> None: + if not self.current_class: + return + symbol = ClassVariable(node=node, + id=NodeID("BUILTIN", node.name, node.lineno, node.col_offset), + name=node.name, + klass=self.scopes[self.current_class].symbol.node) + self.scopes[self.current_class].class_variables[node.name] = [symbol] + + def get_code_from_file(file_path): + with open(file_path, 'r') as file: + code = file.read() + return code + + def to_str(d: dict[str, ClassScope]) -> dict: + return {"'" + ke + "'": repr(va) for ke, va in d.items()} + + sc = ScopesBuilder() + walker = ASTWalker(sc) + + code = get_code_from_file(r"C:\Users\Lukas Radermacher\AppData\Local\JetBrains\PyCharm2023.3\python_stubs\-1907337602\builtins.py") + module = astroid.parse(code) + + walker.walk(module) + + res = {} + for k, v in sc.scopes.items(): + if k in class_dict: + res[k] = v + + with open(r"C:\Users\Lukas Radermacher\Desktop\Results\Tests\class_scopes.json", 'w') as file: + json.dump(to_str(res), file, indent=2) + # for key, value in res_dict.items(): + # if key in class_dict: + # file.write(f"'{key}': ClassScope(GlobalVariable({value['symbol']}),\n [],\n None,\n LocalVariable({{{value['class_variables']}}})\n)\n") + + print("") + +import builtins +import json +from pathlib import Path +from typing import Any + +import ijson +import pandas as pd + +_BUILTINS = set(dir(builtins)) + + +def evaluate_results(data: Any, file: str, to_console: bool = False) -> dict[str, ]: + """Evaluate the results of the purity analysis. + + Parameters + ---------- + data : str + The path to the purity analysis results file. + """ + count_pure: int = 0 + count_impure: int = 0 + count_reasons: dict[str, int] = {} + count_reasons_specified: dict[str, int] = {} + count_reasons_without_propagation: dict[str, int] = {} + count_reasons_specified_without_propagation: dict[str, int] = {} + + impure_because_unknown_call: dict[str, bool] = {} + unknown_calls: dict[str, int] = {} + unknown_calls_unknown: dict[str] = {} + total_reasons: int = 0 + missing_origin: int = 0 + + + for module in data.values(): + for fun_name, function in module.items(): + if function["purity"] == "Pure": + count_pure += 1 + elif function["purity"] == "Impure": + count_impure += 1 + + for reason in function["reasons"]: + total_reasons += 1 + res = reason["result"] + count_reasons[res] = count_reasons.get(res, 0) + 1 + if res == "UnknownCall": + reason_name = reason["reason"].split(".")[1] + unknown_calls[reason_name] = unknown_calls.get(reason_name, 0) + 1 + if reason_name == "UNKNOWN": + unknown_calls_unknown[fun_name] = reason_name + impure_because_unknown_call[fun_name] = True + else: + impure_because_unknown_call[fun_name] = False + + specified_res = reason["reason"].split(".")[0] + count_reasons_specified[specified_res] = count_reasons_specified.get(specified_res, 0) + 1 + + if reason["origin"] is None: + missing_origin += 1 + + if reason["origin"] == fun_name: + count_reasons_without_propagation[res] = count_reasons_without_propagation.get(res, 0) + 1 + count_reasons_specified_without_propagation[specified_res] = count_reasons_specified_without_propagation.get(specified_res, 0) + 1 + + unknown_calls = dict(sorted(unknown_calls.items(), key=lambda item: item[1], reverse=True)) + total_reasons_without_propagation = sum(count_reasons_without_propagation.values()) + + file_results = {"Name": file, + "Number of modules": len(data), + "Total functions": count_pure + count_impure, + "Pure functions": count_pure, + "Impure functions": count_impure, + "Reasons": count_reasons, + "Specified Reasons": count_reasons_specified, + "Reasons without propagation": count_reasons_without_propagation, + "Specified Reasons without propagation": count_reasons_specified_without_propagation, + "UnknownCalls Reasons": unknown_calls, + "UNKNOWN UnknownCalls": unknown_calls_unknown, + "Impure because UnknownCall": len({k: v for k, v in impure_because_unknown_call.items() if v}), + "Total Reasons": total_reasons, + "Total Reasons (without propagation)": total_reasons_without_propagation, + "Missing origin": missing_origin, + "Missing origin percentage": missing_origin / total_reasons * 100 if total_reasons > 0 else 0} + + if to_console: + print(f"Results for {file}:") + print(f"Number of modules: {len(data)}") + print(f"Total functions: {count_pure + count_impure}") + print(f"Pure functions: {count_pure}") + print(f"Impure functions: {count_impure}") + print("\nReasons:") + for reason, count in count_reasons.items(): + print(f"{reason}: {count}") + + print("\nSpecified Reasons:") + for reason, count in count_reasons_specified.items(): + print(f"{reason}: {count}") + + print("\nReasons without propagation:") + for reason, count in count_reasons_without_propagation.items(): + print(f"{reason}: {count}") + + print("\nSpecified Reasons without propagation:") + for reason, count in count_reasons_specified_without_propagation.items(): + print(f"{reason}: {count}") + + print("\nUnknownCalls Reasons:") + for reason, count in unknown_calls.items(): + print(f"{reason}: {count}") + + res = {k: v for k, v in impure_because_unknown_call.items() if v} + print(f"\nImpure because UnknownCall: {len(res)}") + + print(f"\nTotal Reasons: {total_reasons}, \nTotal Reasons (without propagation): {total_reasons_without_propagation}") + print(f"\nMissing origin: {missing_origin} => {missing_origin / total_reasons * 100:.2f}%") + + return file_results + + +def clear_results(file: str) -> None: + with open(file) as f: + results = json.load(f) + new_results = {} + for module_name, module in results.items(): + new_results[module_name] = {} + for function in module: + new_results[module_name][function] = { + "purity": "Pure", + } + + path = Path(r"C:\Users\Lukas Radermacher\Desktop\Results").joinpath("cleared_" + file) + with path.open("w") as f: + json.dump(new_results, f, indent=2) + + +def compare_results(expected: Any, actual: Any, result_name: str, to_console: bool = False) -> dict[str | Any, str | int | float | Any]: + tn = 0 + tp = 0 + fn = 0 + fp = 0 + + for module_name, module in expected.items(): + for function_name, function in module.items(): + if function["purity"] == actual[module_name][function_name]["purity"]: + if function["purity"] == "Pure": + tp += 1 # Expected pure, actual pure + else: + tn += 1 # Expected impure, actual impure + + if function["purity"] != actual[module_name][function_name]["purity"]: + if function["purity"] == "Pure": + fn += 1 # Expected pure, actual impure + else: + fp += 1 # Expected impure, actual pure + + if to_console: + print(f"Total equal results: {tn + tp} (True negatives: {tn}, True positives: {tp})") + print(f"Total different results: {fn + fp} (False negatives: {fn}, False positives: {fp})") + print(f"Accuracy: {(tp + tn) / (tp + tn + fp + fn) * 100:.2f}%") + print(f"Precision: {tp / (tp + fp) * 100:.2f}%") + print(f"Recall: {tp / (tp + fn) * 100:.2f}%") + print(f"F1-Score: {2 * tp / (2 * tp + fp + fn) * 100:.2f}%") + + return {"Name": result_name, + "Total equal results": tn + tp, + "True negatives": tn, + "True positives": tp, + "Total different results": fn + fp, + "False negatives": fn, + "False positives": fp, + "Accuracy": (tp + tn) / (tp + tn + fp + fn) * 100, + "Precision": tp / (tp + fp) * 100, + "Recall": tp / (tp + fn) * 100, + "F1-Score": 2 * tp / (2 * tp + fp + fn) * 100} + + +def compare_reasons(expected: Any, expected_name: str, actual: Any, actual_name: str, to_console: bool = False) -> dict[str, int | str]: + missing_reasons = 0 + missing_reasons_wrong_purity = 0 + extra_reasons = 0 + extra_reasons_wrong_purity = 0 + + # print the names of the missing functions + for module_name, module in actual.items(): + for function_name, function in module.items(): + if function_name not in expected[module_name]: + print(f"MISSING FUNCTION IN RESULT: {function_name}") + + + # Check the reasons that were expected but are missing + for module_name, module in expected.items(): + for function_name, function in module.items(): + if function["purity"] == "Impure": + if function["purity"] == actual[module_name][function_name]["purity"]: # both impure + for reason in function["reasons"]: + short_reason = (reason["result"], reason["reason"]) + short_other = [(x["result"], x["reason"]) for x in actual[module_name][function_name]["reasons"]] + if short_reason not in short_other: + # print(f"MISSING REASON IN RESULT {function_name}: {reason}") + missing_reasons += 1 + + + elif function["purity"] != actual[module_name][function_name]["purity"]: # expected impure, actual pure + for reason in function["reasons"]: + # print(f"MISSING REASON IN RESULT AND WRONG PURITY !!!VERY BAD!!! {function_name}: {reason}") + missing_reasons_wrong_purity += 1 + print(f"MISSING REASON IN RESULT AND WRONG PURITY !!!VERY BAD!!! {function_name}: {reason}") + + for module_name, module in actual.items(): + for function_name, function in module.items(): + if function["purity"] == "Impure": + if function["purity"] == expected[module_name][function_name]["purity"]: # both impure + for reason in function["reasons"]: + short_reason = (reason["result"], reason["reason"]) + short_other = [(x["result"], x["reason"]) for x in + expected[module_name][function_name]["reasons"]] + if short_reason not in short_other: + # print(f"EXTRA REASON IN RESULT {function_name}: {reason}") + extra_reasons += 1 + + + elif function["purity"] != expected[module_name][function_name]["purity"]: # expected pure, actual impure + for reason in actual[module_name][function_name]["reasons"]: + # print(f"EXTRA REASON IN RESULT AND WRONG PURITY {function_name}: {reason}") + extra_reasons_wrong_purity += 1 + + + + if to_console: + print(f"\n\nResults for {expected_name} and {actual_name}:") + if missing_reasons_wrong_purity > 0: + print("!!!FALSE POSITIVE ALARM!!!") + print(f"Missing reasons: {missing_reasons}") + print(f"Missing reasons with wrong purity: {missing_reasons_wrong_purity}") + print(f"Extra reasons: {extra_reasons}") + print(f"Extra reasons with wrong purity: {extra_reasons_wrong_purity}") + + return {"Name": actual_name, + "Missing reasons": missing_reasons, + "Missing reasons with wrong purity": missing_reasons_wrong_purity, + "Extra reasons": extra_reasons, + "Extra reasons with wrong purity": extra_reasons_wrong_purity} + + +def flatten_dict(d, parent_key="", sep="_"): + items = [] + for k, v in d.items(): + new_key = parent_key + sep + k if parent_key else k + if isinstance(v, dict): + items.extend(flatten_dict(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + +def to_excel(files: list[tuple[str, str]], out_path: str) -> None: + df = pd.DataFrame() + res_d: dict[str, pd.DataFrame] = {} + for file in files: + result = get_data(file[1]) + comp_res, comp_reasons = None, None + eval_res = evaluate_results(result, file[1]) + if file[0] != "": + expected = get_data(file[0]) + try: + comp_res = compare_results(expected, result, file[1], True) + except KeyError: + comp_res = None + try: + comp_reasons = compare_reasons(expected, file[0], result, file[1], True) + except KeyError: + comp_reasons = None + flattened_res = flatten_dict(eval_res) + res_d[file[1]] = pd.DataFrame(flattened_res, index=[0]).T + # print(df) + if comp_res: + eval_res = {**eval_res, **comp_res} + if comp_reasons: + eval_res = {**eval_res, **comp_reasons} + + df = df._append(eval_res, ignore_index=True) + + with pd.ExcelWriter(f"{out_path}results_2.xlsx") as writer: + df.to_excel(writer, sheet_name="Results", index=False) + for f, result in res_d.items().__reversed__(): + sheet_name = f.split("/")[-1] + result.to_excel(writer, sheet_name=sheet_name) + +def get_data(file: str, simple_mode: bool = True) -> Any: + if not simple_mode: + result = [] + with open(file + ".json") as f: + + objects = ijson.items(f, "safeds.data.image.containers._image") + + for obj in objects: + result.append(obj) + + parser = ijson.parse(f) + # + # # Initialize variables to track the current context + # current_key = None + # current_object = None + # + # try: + # # Iterate over each event in the parser + # for prefix, event, value in parser: + # # Check if the current prefix represents an object key + # if event == 'start_map': + # current_key = prefix + # current_object = {} + # elif event == 'map_key': + # current_key = value + # # Check if the current prefix represents a string value + # elif event == 'string': + # if current_object is not None: + # current_object[current_key] = value + # # Check if the current prefix represents the end of an object + # elif event == 'end_map': + # if current_key == "safeds.data.image.containers._image": + # result.append(current_object) + # current_object = None + # except ijson.common.IncompleteJSONError as e: + # print("Encountered incomplete JSON:", e) + + + # Convert the processed data to a DataFrame + return pd.DataFrame(result) + + else: + with open(file + ".json") as f: + return json.load(f) + +if __name__ == "__main__": + + def analyze_safe_ds(): + # evaluate_results("safe-ds__api_purity_4.json") + # print("\n__________________________\n") + # evaluate_results("safe-ds__api_purity_5.json") + # print("\n__________________________\n") + # evaluate_results("expected_safe-ds__api_purity_6.json") + # print("\n__________________________\n") + # evaluate_results("safe-ds__api_purity_6.json") + # print("\n__________________________\n") + # evaluate_results("safe-ds__api_purity_8.json") # Implemented Builtin superclasses (hardcoded) + # print("\n__________________________\n") + # evaluate_results("safe-ds__api_purity_9.json") # Added purity results for all Builtin functions (hardcoded) + # print("\n__________________________\n") + # evaluate_results("safe-ds__api_purity_15.json", True) # Added purity results for set, list, dict methods (hardcoded) + # clear_results("safe-ds__api_purity_22.json") + # evaluate_results("safe-ds__api_purity_20.json", True) # Added purity results for set, list, dict methods (hardcoded) + + + + # print("\n__________________________\n") + # compare_results("expected_safe-ds__api_purity_6.json", "safe-ds__api_purity_6.json") + # print("\n__________________________\n") + # compare_results("expected_safe-ds__api_purity_4.json", "safe-ds__api_purity_4.json") + # print("\n__________________________\n") + # compare_results("expected_safe-ds__api_purity_6.json", "safe-ds__api_purity_6.json") + # print("\n__________________________\n") + # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_8.json") + # print("\n__________________________\n") + # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_9.json") + # print("\n__________________________\n") + # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_10.json") + # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_9.json", True) + # compare_reasons("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_9.json", True) + # print("\n__________________________\n") + # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_24.json", True) + # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_24.json", True) + # print("\n__________________________\n") + # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_25.json", True) + # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_25.json", True) + # print("\n__________________________\n") + # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_26.json", True) + # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_26.json", True) + # # print("\n__________________________\n") + # # compare_results("expected_safe-ds__api_purity_22.json", "safe-ds__api_purity_27.json", True) + # # compare_reasons("expected_safe-ds__api_purity_22.json", "safe-ds__api_purity_27.json", True) + # print("\n__________________________\n") + # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_29.json", True) + # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_29.json", True) + + # compare_results("expected_test_module__api_purity.json", "test_module__api_purity.json") + # compare_reasons("expected_test_module__api_purity.json", "test_module__api_purity.json") + + files = [ + # "safe-ds__api_purity_4", + # "safe-ds__api_purity_5", + ("SafeDs/expected_safe-ds__api_purity_6", "SafeDs/safe-ds__api_purity_6"), + ("SafeDs/expected_safe-ds__api_purity_8", "SafeDs/safe-ds__api_purity_8"), # Implemented Builtin superclasses (hardcoded) + ("SafeDs/expected_safe-ds__api_purity_8", "SafeDs/safe-ds__api_purity_9"), # Added purity results for all Builtin functions (hardcoded) + ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_10"), # Added purity results for set, list, dict methods (hardcoded) + ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_11"), # Test run to check determinism + ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_12"), # Test run to check determinism + ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_13"), # Test run to check determinism + ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_14"), # These are the results without the super cycle bug + ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_15"), # Package Analysis for initial files + ("SafeDs/expected_safe-ds__api_purity_16", "SafeDs/safe-ds__api_purity_16"), # Package Analysis for initial files with bugged return + ("SafeDs/expected_safe-ds__api_purity_16", "SafeDs/safe-ds__api_purity_17"), # Package Analysis for initial files with empty files + ("SafeDs/expected_safe-ds__api_purity_16", "SafeDs/safe-ds__api_purity_18"), # Package Analysis for initial files with empty files in debug mode + ("SafeDs/expected_safe-ds__api_purity_19", "SafeDs/safe-ds__api_purity_19"), # Removed duplicate reasons + ("SafeDs/expected_safe-ds__api_purity_19", "SafeDs/safe-ds__api_purity_20"), # Removed UnknownCalls for successfully imported classes + ("SafeDs/expected_safe-ds__api_purity_19", "SafeDs/safe-ds__api_purity_21"), # Added fallback for origin + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_22"), # Fixed call graph for nested cycles + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_23"), # Added @ Origin to Builtins + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_24"), # Detect calls of __new__ and __post_init__ on class calls + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_25"), # Added purity results for str methods (hardcoded) + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_26"), # Added purity results for str methods (hardcoded) + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_27"), # Arity detection astroid 3.1 (with Attribute Errors) + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_28"), # Arity detection astroid 2.15.6 + ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_29"), # Call graph for functions with the same name + ] + + to_excel(files, r"D:/Ergebnisse BA/Results/SafeDs/") + + # analyze_safe_ds() + # to_excel([("", "Seaborn/seaborn__api_purity")], "D:/Ergebnisse BA/Results/Seaborn/") + # to_excel([("", "Scikit/scikit__api_purity")], "D:/Ergebnisse BA/Results/SciKit/") + # to_excel([("", "Pandas/pandas__api_purity")], "D:/Ergebnisse BA/Results/Pandas/") + to_excel([("SafeDs/safe-ds__api_purity_28", "SafeDs/safe-ds__api_purity_29")], "D:/Ergebnisse BA/Results/SafeDS/") + + + From d0a73fd6d37ecd511dabb5ae0a9e84498cbf48bc Mon Sep 17 00:00:00 2001 From: lukarade Date: Sat, 4 May 2024 10:25:17 +0200 Subject: [PATCH 03/17] feat: add origin at `resolve_references` --- .../api/purity_analysis/_build_call_graph.py | 25 +++--- .../api/purity_analysis/_infer_purity.py | 81 +++++------------ .../purity_analysis/_resolve_references.py | 25 ++++-- .../api/purity_analysis/model/__init__.py | 2 + .../api/purity_analysis/model/_call_graph.py | 21 ----- .../api/purity_analysis/model/_purity.py | 89 +++++++++++++++---- .../api/purity_analysis/model/_reference.py | 27 +++--- .../test_resolve_references.py | 24 ++--- 8 files changed, 156 insertions(+), 138 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index 32cf34b1..bf4d839d 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -11,6 +11,7 @@ Parameter, Reasons, Symbol, + UnknownProto, ) @@ -172,14 +173,14 @@ def _built_call_graph(self, reason: Reasons) -> None: # Check if the node was declared inside the current module. elif call.id not in self.raw_reasons: - self._handle_unknown_call(call, reason.id) + self._handle_unknown_call(call, reason) # Build the call graph for the child function and add it to the children of the current node. else: self._built_call_graph(self.raw_reasons[call.id]) self.call_graph_forest.get_graph(reason.id).add_child(self.call_graph_forest.get_graph(call.id)) - def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None: + def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: """Handle unknown calls. Deal with unknown calls and add them to the forest. @@ -190,8 +191,8 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None: ---------- call : Symbol The call that is unknown. - reason_id : NodeID - The id of the function that the call is in. + reason : Reasons + The reason of the function that contains the unknown call. """ # Deal with the case that the call calls an imported function. if isinstance(call, Import): @@ -200,26 +201,30 @@ def _handle_unknown_call(self, call: Symbol, reason_id: NodeID) -> None: reasons=Reasons(id=call.id), ) self.call_graph_forest.add_graph(call.id, imported_cgn) - self.call_graph_forest.get_graph(reason_id).add_child(self.call_graph_forest.get_graph(call.id)) + self.call_graph_forest.get_graph(reason.id).add_child(self.call_graph_forest.get_graph(call.id)) # If the call was used as a member of an MemberAccessValue, it needs to be removed from the unknown_calls. # This is due to the improved analysis that can determine the module through the receiver of that call. # Hence, the call is handled as a call of an imported function and not as an unknown_call # when inferring the purity later. - for unknown_call in self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls: - if unknown_call.node == call.call: + for unknown_call in self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls.copy().values(): + if unknown_call.symbol.node == call.call: ( - self.call_graph_forest.get_graph(reason_id).reasons.remove_unknown_call( + self.call_graph_forest.get_graph(reason.id).reasons.remove_unknown_call( NodeID.calc_node_id(call.call), ) ) # Deal with the case that the call calls a function parameter. elif isinstance(call, Parameter): - self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls.add(call) + self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( + symbol=call, origin=reason.function_scope.symbol + ) else: - self.call_graph_forest.get_graph(reason_id).reasons.unknown_calls.add(call) + self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( + symbol=call, origin=reason.function_scope.symbol + ) def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: """Handle cycles in the call graph. diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index 3346c10e..92ed0604 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -228,101 +228,62 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: # Check if the function has any non-local variable writes. if reasons.writes_to: - for write in reasons.writes_to: - impurity_reasons.add( - NonLocalVariableWrite( - symbol=write, - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), - ), - ) + for write in reasons.writes_to.values(): + impurity_reasons.add(write) # Check if the function has any non-local variable reads. if reasons.reads_from: - for read in reasons.reads_from: + for read in reasons.reads_from.values(): # Check if the read reads from an imported module. - if isinstance(read, Import): - if read.inferred_node: + if isinstance(read.symbol, Import): + if read.symbol.inferred_node: # If the inferred node is a function, it must be analyzed to determine its purity. - if isinstance(read.inferred_node, astroid.FunctionDef): + if isinstance(read.symbol.inferred_node, astroid.FunctionDef): impurity_reasons.add( - UnknownCall(UnknownFunctionCall(call=read.call, inferred_def=read.inferred_node)), + UnknownCall(UnknownFunctionCall(call=read.symbol.call, inferred_def=read.symbol.inferred_node)), ) - elif isinstance(read.inferred_node, astroid.ClassDef): + elif isinstance(read.symbol.inferred_node, astroid.ClassDef): impurity_reasons.add( - UnknownCall(UnknownClassInit(call=read.call, inferred_def=read.inferred_node)), + UnknownCall(UnknownClassInit(call=read.symbol.call, inferred_def=read.symbol.inferred_node)), ) # If the inferred node is a module, it will not count towards the impurity of the function. # If this was added, nearly anything would be impure. # Also, since the imported symbols are analyzed in much more detail, this can be omitted. - elif isinstance(read.inferred_node, astroid.Module): + elif isinstance(read.symbol.inferred_node, astroid.Module): pass # Default case for symbols that could not be inferred. else: # TODO: what type of nodes are allowed here? - impurity_reasons.add( - NonLocalVariableRead( - symbol=read, - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else ( - reasons.function_scope.symbol - if reasons.function_scope is not None - else None - ) - ), - ), - ) + impurity_reasons.add(read) else: - raise ValueError(f"Imported node {read.name} has no inferred node.") from None + raise ValueError(f"Imported node {read.symbol.name} has no inferred node.") from None else: - impurity_reasons.add( - NonLocalVariableRead( - symbol=read, - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), - ), - ) + impurity_reasons.add(read) # Check if the function has any unknown calls. if reasons.unknown_calls: - for unknown_call in reasons.unknown_calls: + for unknown_call in reasons.unknown_calls.values(): # Handle calls of code where no definition was found. - if isinstance(unknown_call, Reference): + if isinstance(unknown_call.symbol, Reference): # This checks special cases of unknown calls. # These are cases where a function is not a true builtin, but also not a user-defined function. # Cases like dict.pop(), list.remove(), set.union(), etc. - if unknown_call.name in BUILTIN_SPECIALS: + if unknown_call.symbol.name in BUILTIN_SPECIALS: pass else: impurity_reasons.add( UnknownCall( - expression=UnknownFunctionCall(call=unknown_call.node), - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), + expression=UnknownFunctionCall(call=unknown_call.symbol.node), + origin=unknown_call.origin ), ) # Handle parameter calls - elif isinstance(unknown_call, Parameter): + elif isinstance(unknown_call.symbol, Parameter): impurity_reasons.add( CallOfParameter( - expression=ParameterAccess(unknown_call), - origin=( - reasons.id - if reasons.function_scope is None and reasons.id is not None - else (reasons.function_scope.symbol if reasons.function_scope is not None else None) - ), + expression=ParameterAccess(unknown_call.symbol), + origin=unknown_call.origin ), ) # Do not handle imported calls here since they are handled separately. diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index f2014957..a4c76d37 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -29,6 +29,9 @@ Symbol, TargetReference, ValueReference, + NonLocalVariableWrite, + NonLocalVariableRead, + UnknownProto, ) _BUILTINS = dir(builtins) @@ -473,6 +476,8 @@ def _find_value_references( import_def, inferred_node=inferred_node_def, # type: ignore[type-var] # import def is not None. ) + specified_import_def.id.name = specified_import_def.id.name + "." + specified_import_def.name + if specified_import_def: result_value_reference.referenced_symbols.append(specified_import_def) @@ -559,6 +564,7 @@ def _find_value_references( name=value_reference.node.member, inferred_node=inferred_node_def, ) + specified_import_def.id.name = specified_import_def.id.name + "." + specified_import_def.name # If the member is a call, add the call node to the specified_import_def as fallback for the case # that the purity of the called function cannot be inferred. @@ -748,7 +754,8 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node # If no referenced symbols are found, add the call to the list of unknown_calls # of the raw_reasons dict for this function elif call_references_result.node not in raw_reasons[function.symbol.id].unknown_calls: - raw_reasons[function.symbol.id].unknown_calls.add(call_references_result.node) + raw_reasons[function.symbol.id].unknown_calls[call_references_result.node.id] = ( + UnknownProto(symbol=call_references_result.node, origin=function.symbol)) # Check if the function has value_references (References from a value node to a target node). if function.value_references: @@ -776,8 +783,9 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node if isinstance(referenced_symbol.node, astroid.ClassDef | astroid.FunctionDef): continue # Add the referenced symbol to the list of symbols whom are read from. - if referenced_symbol not in raw_reasons[function.symbol.id].reads_from: - raw_reasons[function.symbol.id].reads_from.add(referenced_symbol) + if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: + raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( + NonLocalVariableRead(symbol=referenced_symbol, origin=function.symbol)) elif isinstance(referenced_symbol, Import): # Since calls of imported functions are treated within _find_value_references # as MemberAccessValue, they need to be added to the calls of the raw_reasons dict @@ -789,14 +797,16 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node if referenced_symbol not in raw_reasons[function.symbol.id].calls: raw_reasons[function.symbol.id].calls.add(referenced_symbol) else: # noqa: PLR5501 - if referenced_symbol not in raw_reasons[function.symbol.id].reads_from: - raw_reasons[function.symbol.id].reads_from.add(referenced_symbol) + if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: + raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( + NonLocalVariableRead(symbol=referenced_symbol, origin=function.symbol)) # If no referenced symbols are found, add the call to the list of unknown_calls # of the raw_reasons dict for this function elif value_reference_result.node not in raw_reasons[ function.symbol.id ].unknown_calls and isinstance(value_reference_result.node.node, astroid.Call): - raw_reasons[function.symbol.id].unknown_calls.add(value_reference_result.node) + raw_reasons[function.symbol.id].unknown_calls[value_reference_result.node.id] = ( + UnknownProto(symbol=value_reference_result.node, origin=function.symbol)) # Check if the function has target_references (References from a target node to another target node). if function.target_symbols: @@ -829,7 +839,8 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node continue # Add the referenced symbol to the list of symbols whom are written to. if referenced_symbol not in raw_reasons[function.symbol.id].writes_to: - raw_reasons[function.symbol.id].writes_to.add(referenced_symbol) + raw_reasons[function.symbol.id].writes_to[referenced_symbol.id] = ( + NonLocalVariableWrite(symbol=referenced_symbol, origin=function.symbol)) name_references: dict[str, list[ReferenceNode]] = self.merge_dicts(value_references, target_references) resolved_references: dict[str, list[ReferenceNode]] = self.merge_dicts(call_references, name_references) diff --git a/src/library_analyzer/processing/api/purity_analysis/model/__init__.py b/src/library_analyzer/processing/api/purity_analysis/model/__init__.py index c48d5da5..14c8f93e 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/__init__.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/__init__.py @@ -49,6 +49,7 @@ UnknownCall, UnknownClassInit, UnknownFunctionCall, + UnknownProto, ) from library_analyzer.processing.api.purity_analysis.model._purity_builtins import ( BUILTIN_CLASSSCOPES, @@ -118,4 +119,5 @@ "BUILTIN_SPECIALS", "PackageData", "ParameterKind", + "UnknownProto", ] diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py index da75e4c2..f06e6c4d 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py @@ -225,27 +225,6 @@ def separate(self) -> dict[NodeID, CallGraphNode]: original_nodes[node_id] = node original_nodes[node_id].reasons.result = self.reasons.result - # The results need to be assigned an origin to be able to trace back the result. - if ( - original_nodes[node_id].reasons is not None - and isinstance(original_nodes[node_id].reasons.result, Impure) - and hasattr(original_nodes[node_id].reasons.result, "reasons") - ): - for reason in original_nodes[node_id].reasons.result.reasons: # type: ignore[union-attr] # it is cheked above - if ( - isinstance(reason, UnknownCall) - and isinstance(reason.expression, UnknownFunctionCall) - and reason.origin is None - ): - for nod in self.combines.values(): - for unknown_call in nod.reasons.unknown_calls: - if ( - unknown_call.node == reason.expression.call - and nod.reasons.function_scope is not None - ): - reason.origin = nod.reasons.function_scope.symbol - break - return original_nodes diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index e99ae825..8bdd9f1a 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -14,6 +14,7 @@ NodeID, Symbol, UnknownSymbol, + Reference, ) from library_analyzer.utils import ensure_file_exists @@ -173,16 +174,49 @@ def clone(self) -> Impure: return Impure(reasons=self.reasons.copy()) def to_dict(self) -> dict[str, Any]: - reasons = [] seen = set() + non_local_variable_reads = [] + non_local_variable_writes = [] + file_reads = [] + file_writes = [] + unknown_calls = [] + native_calls = [] + parameter_calls = [] for reason in self.reasons: if str(reason) not in seen: - reasons.append(reason.to_dict()) seen.add(str(reason)) - + match reason: + case NonLocalVariableRead(): + non_local_variable_reads.append(reason.to_dict()) + case NonLocalVariableWrite(): + non_local_variable_writes.append(reason.to_dict()) + case FileRead(): + file_reads.append(reason.to_dict()) + case FileWrite(): + file_writes.append(reason.to_dict()) + case UnknownCall(): + unknown_calls.append(reason.to_dict()) + case NativeCall(): + native_calls.append(reason.to_dict()) + case CallOfParameter(): + parameter_calls.append(reason.to_dict()) + case _: + raise TypeError(f"Unknown reason type: {reason}") + + combined_reasons = { + "NonLocalVariableRead": non_local_variable_reads, + "NonLocalVariableWrite": non_local_variable_writes, + "FileRead": file_reads, + "FileWrite": file_writes, + "UnknownCall": unknown_calls, + "NativeCall": native_calls, + "CallOfParameter": parameter_calls, + } return { "purity": self.__class__.__name__, - "reasons": reasons, + "reasons": { + reason: value for reason, value in combined_reasons.items() if value + }, } def __hash__(self) -> int: @@ -207,7 +241,7 @@ def __hash__(self) -> int: @abstractmethod def to_dict(self) -> dict[str, Any]: - pass # TODO: combine all origins and reasons of instances with the same class in one dict + pass class Read(ImpurityReason, ABC): @@ -240,7 +274,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.symbol.__class__.__name__}.{self.symbol.name}", } @@ -254,12 +287,11 @@ class FileRead(Read): ---------- source : Expression | None The source of the read. - This is None if the source is unknown. origin : Symbol | NodeID | None The origin of the read. """ - source: Expression | None = None # TODO: this should never be None + source: Expression origin: Symbol | NodeID | None = field(default=None) def __hash__(self) -> int: @@ -275,7 +307,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.source.__str__()}", } @@ -311,7 +342,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.symbol.__class__.__name__}.{self.symbol.name}", } @@ -323,14 +353,13 @@ class FileWrite(Write): Attributes ---------- - source : Expression | None + source : Expression The source of the write. - This is None if the source is unknown. # TODO: see above LARS origin : Symbol | NodeID | None The origin of the write. """ - source: Expression | None = None + source: Expression origin: Symbol | NodeID | None = field(default=None) def __hash__(self) -> int: @@ -346,7 +375,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.source.__str__()}", } @@ -356,6 +384,36 @@ class Unknown(ImpurityReason, ABC): """Superclass for unknown type impurity reasons.""" +@dataclass +class UnknownProto(Unknown): + """Class for UnknownCalls which are not fully determined. + + Attributes + ---------- + symbol : Symbol | Reference + The symbol or reference object which is not fully determined. + origin : Symbol | NodeID | None + The origin of the unknown call. + """ + symbol: Symbol | Reference + origin: Symbol | NodeID | None = field(default=None) + + def __hash__(self) -> int: + return hash(str(self)) + + def __str__(self) -> str: + return f"{self.__class__.__name__}: {self.symbol.__class__.__name__}.{self.symbol.name}" + + def to_dict(self) -> dict[str, Any]: + origin = ( + self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) + ) + return { + "origin": f"{origin}", + "reason": f"{self.symbol.name}", + } + + @dataclass class UnknownCall(Unknown): """Class for calling unknown code. @@ -384,7 +442,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.expression.__str__()}", } @@ -418,7 +475,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.expression.__str__()}", } @@ -456,7 +512,6 @@ def to_dict(self) -> dict[str, Any]: self.origin.id if isinstance(self.origin, Symbol) else (self.origin if self.origin is not None else None) ) return { - "result": f"{self.__class__.__name__}", "origin": f"{origin}", "reason": f"{self.expression.__str__()}", } diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index 6e864840..d93d66d2 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -24,7 +24,12 @@ if TYPE_CHECKING: from collections.abc import Iterator - from library_analyzer.processing.api.purity_analysis.model import CallGraphForest, PurityResult + from library_analyzer.processing.api.purity_analysis.model import ( + CallGraphForest, + PurityResult, + NonLocalVariableRead, + NonLocalVariableWrite, + UnknownProto) @dataclass @@ -129,28 +134,28 @@ class Reasons: Is None if the reasons are not for a FunctionDef node. This is the case when either a builtin or a combined node is created, or a ClassScope is used to propagate reasons. - writes_to : set[GlobalVariable | ClassVariable | InstanceVariable | Import] - A set of all nodes that are written to. - reads_from : set[GlobalVariable | ClassVariable | InstanceVariable | Import] - A set of all nodes that are read from. + writes_to : dict[NodeID, NonLocalVariableWrite] + A dict of all nodes that are written to. + reads_from : dict[NodeID, NonLocalVariableRead] + A dict of all nodes that are read from. calls : set[Symbol] A set of all nodes that are called. result : PurityResult | None The result of the purity analysis This also works as a flag to determine if the purity analysis has already been performed: If it is None, the purity analysis has not been performed - unknown_calls : set[Symbol | Reference] - A list of all unknown calls. + unknown_calls : dict[NodeID, UnknownProto] + A dict of all unknown calls. Unknown calls are calls to functions that are not defined in the module or are parameters. """ id: NodeID function_scope: FunctionScope | None = field(default=None) - writes_to: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) # TODO: add origin here - reads_from: set[GlobalVariable | ClassVariable | InstanceVariable | Import] = field(default_factory=set) + writes_to: dict[NodeID, NonLocalVariableWrite] = field(default_factory=dict) + reads_from: dict[NodeID, NonLocalVariableRead] = field(default_factory=dict) calls: set[Symbol] = field(default_factory=set) # TODO: SORTED SET oder LIST result: PurityResult | None = field(default=None) - unknown_calls: set[Symbol | Reference] = field(default_factory=set) + unknown_calls: dict[NodeID, UnknownProto] = field(default_factory=dict) def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: """Join a list of Reasons objects. @@ -215,4 +220,4 @@ def remove_unknown_call(self, node_id: NodeID) -> None: node_id : NodeID The NodeID of the unknown call to remove. """ - self.unknown_calls = {call for call in self.unknown_calls if call.id != node_id} + del self.unknown_calls[node_id] diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py index 6c33e707..38dd1a1b 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py @@ -246,27 +246,27 @@ def transform_reasons(reasons: dict[NodeID, Reasons]) -> dict[str, SimpleReasons function_references.function_scope.symbol.name, # type: ignore[union-attr] # function_scope is not None { ( - f"{target_reference.__class__.__name__}.{target_reference.klass.name}.{target_reference.node.name}.line{target_reference.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(target_reference, ClassVariable) and target_reference.klass is not None + f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(target_reference.symbol, ClassVariable) and target_reference.symbol.klass is not None else ( - f"{target_reference.__class__.__name__}.{target_reference.klass.name}.{target_reference.node.member}.line{target_reference.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(target_reference, InstanceVariable) - else f"{target_reference.__class__.__name__}.{target_reference.node.name}.line{target_reference.node.fromlineno}" + f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.member}.line{target_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(target_reference.symbol, InstanceVariable) + else f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" ) ) - for target_reference in function_references.writes_to + for target_reference in function_references.writes_to.values() }, { ( - f"{value_reference.__class__.__name__}.{value_reference.klass.name}.{value_reference.node.name}.line{value_reference.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(value_reference, ClassVariable) and value_reference is not None + f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.klass.name}.{value_reference.symbol.node.name}.line{value_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(value_reference.symbol, ClassVariable) and value_reference.symbol is not None else ( - f"{value_reference.__class__.__name__}.{value_reference.klass.name}.{value_reference.node.member}.line{value_reference.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(value_reference, InstanceVariable) - else f"{value_reference.__class__.__name__}.{value_reference.node.name}.line{value_reference.node.fromlineno}" + f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.klass.name}.{value_reference.symbol.node.member}.line{value_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine + if isinstance(value_reference.symbol, InstanceVariable) + else f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.node.name}.line{value_reference.symbol.node.fromlineno}" ) ) - for value_reference in function_references.reads_from + for value_reference in function_references.reads_from.values() }, ), }, From b5fa598e4f7f801f390cbeb5e509e240d7d2aeb2 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Fri, 10 May 2024 19:40:01 +0200 Subject: [PATCH 04/17] feat: added flag to shorten results --- .../api/purity_analysis/_build_call_graph.py | 4 +- .../api/purity_analysis/_infer_purity.py | 4 +- .../api/purity_analysis/model/_purity.py | 47 +- .../api/test_infer_purity_package.py | 622 ------------------ 4 files changed, 33 insertions(+), 644 deletions(-) delete mode 100644 tests/library_analyzer/processing/api/test_infer_purity_package.py diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index bf4d839d..949263ea 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -218,12 +218,12 @@ def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: # Deal with the case that the call calls a function parameter. elif isinstance(call, Parameter): self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( - symbol=call, origin=reason.function_scope.symbol + symbol=call, origin=reason.function_scope.symbol, ) else: self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( - symbol=call, origin=reason.function_scope.symbol + symbol=call, origin=reason.function_scope.symbol, ) def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index 92ed0604..cc0be955 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -275,7 +275,7 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: impurity_reasons.add( UnknownCall( expression=UnknownFunctionCall(call=unknown_call.symbol.node), - origin=unknown_call.origin + origin=unknown_call.origin, ), ) # Handle parameter calls @@ -283,7 +283,7 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: impurity_reasons.add( CallOfParameter( expression=ParameterAccess(unknown_call.symbol), - origin=unknown_call.origin + origin=unknown_call.origin, ), ) # Do not handle imported calls here since they are handled separately. diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 8bdd9f1a..401b747f 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -45,7 +45,7 @@ def __hash__(self) -> int: return hash(str(self)) @abstractmethod - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: pass @abstractmethod @@ -105,7 +105,7 @@ def update(self, other: PurityResult | None) -> PurityResult: def clone() -> Pure: return Pure() - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: # noqa: ARG002 return {"purity": self.__class__.__name__} def __hash__(self) -> int: @@ -173,7 +173,7 @@ def update(self, other: PurityResult | None) -> PurityResult: def clone(self) -> Impure: return Impure(reasons=self.reasons.copy()) - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: seen = set() non_local_variable_reads = [] non_local_variable_writes = [] @@ -202,16 +202,26 @@ def to_dict(self) -> dict[str, Any]: parameter_calls.append(reason.to_dict()) case _: raise TypeError(f"Unknown reason type: {reason}") - - combined_reasons = { - "NonLocalVariableRead": non_local_variable_reads, - "NonLocalVariableWrite": non_local_variable_writes, - "FileRead": file_reads, - "FileWrite": file_writes, - "UnknownCall": unknown_calls, - "NativeCall": native_calls, - "CallOfParameter": parameter_calls, - } + if not shorten: + combined_reasons = { + "NonLocalVariableRead": non_local_variable_reads, + "NonLocalVariableWrite": non_local_variable_writes, + "FileRead": file_reads, + "FileWrite": file_writes, + "UnknownCall": unknown_calls, + "NativeCall": native_calls, + "CallOfParameter": parameter_calls, + } + else: + combined_reasons = { + "NonLocalVariableRead": len(non_local_variable_reads), + "NonLocalVariableWrite": len(non_local_variable_writes), + "FileRead": len(file_reads), + "FileWrite": len(file_writes), + "UnknownCall": len(unknown_calls), + "NativeCall": len(native_calls), + "CallOfParameter": len(parameter_calls), + } return { "purity": self.__class__.__name__, "reasons": { @@ -395,8 +405,9 @@ class UnknownProto(Unknown): origin : Symbol | NodeID | None The origin of the unknown call. """ + symbol: Symbol | Reference - origin: Symbol | NodeID | None = field(default=None) + origin: Symbol | NodeID | None = field(default=None) # TODO: remove NodeID def __hash__(self) -> int: return hash(str(self)) @@ -643,15 +654,15 @@ class APIPurity: purity_results: typing.ClassVar[dict[NodeID, dict[NodeID, PurityResult]]] = {} - def to_json_file(self, path: Path) -> None: + def to_json_file(self, path: Path, shorten: bool = False) -> None: ensure_file_exists(path) with path.open("w") as f: - json.dump(self.to_dict(), f, indent=2) + json.dump(self.to_dict(shorten), f, indent=2) - def to_dict(self) -> dict[str, Any]: + def to_dict(self, shorten: bool = False) -> dict[str, Any]: return { module_name.__str__(): { - function_id.__str__(): purity.to_dict() + function_id.__str__(): purity.to_dict(shorten) for function_id, purity in purity_result.items() if not purity.is_class } diff --git a/tests/library_analyzer/processing/api/test_infer_purity_package.py b/tests/library_analyzer/processing/api/test_infer_purity_package.py deleted file mode 100644 index d7c60a34..00000000 --- a/tests/library_analyzer/processing/api/test_infer_purity_package.py +++ /dev/null @@ -1,622 +0,0 @@ -import json -from pathlib import Path - -import astroid -from library_analyzer.cli._run_api import _run_api_command -from library_analyzer.processing.api.docstring_parsing import DocstringStyle -from library_analyzer.processing.api.purity_analysis import get_purity_results -from library_analyzer.processing.api.purity_analysis.model import ( - ClassScope, - ClassVariable, - GlobalVariable, - LocalVariable, - NodeID, - Symbol, -) -from library_analyzer.utils import ASTWalker - - -def test_run_api_command_safe_ds() -> None: - _run_api_command("safe-ds", - Path(r"C:\Users\Lukas Radermacher\AppData\Local\pypoetry\Cache\virtualenvs\library-analyzer-FK1WveJV-py3.11\Lib\site-packages\safeds"), - Path(r"D:\Ergebnisse BA\Results\SafeDS"), - DocstringStyle.NUMPY, - ) - -def test_run_api_command_pandas() -> None: - _run_api_command("pandas", - Path(r"D:\Ergebnisse BA\Results\Pandas\pandas_v2.0.3"), - Path(r"D:\Ergebnisse BA\Results\Pandas"), - DocstringStyle.NUMPY, - ) - -def test_run_api_command_scikit() -> None: - _run_api_command("scikit", - Path(r"D:\Ergebnisse BA\Results\SciKit\sklearn_v1.3.0"), - Path(r"D:\Ergebnisse BA\Results\SciKit"), - DocstringStyle.NUMPY, - ) - - -def test_run_api_command_pytorch() -> None: - _run_api_command("pytorch", - Path(r"D:\Ergebnisse BA\Results\Pytorch\pytorch_v2.0.1"), - Path(r"D:\Ergebnisse BA\Results\Pytorch"), - DocstringStyle.NUMPY, - ) - -def test_run_api_command_seaborn() -> None: - _run_api_command("seaborn", - Path(r"D:\Ergebnisse BA\Results\Seaborn\seaborn_v0.12.2"), - Path(r"D:\Ergebnisse BA\Results\Seaborn"), - DocstringStyle.NUMPY, - ) - -def test_run_api_command_small_module() -> None: - _run_api_command("tracemalloce", - Path(r"D:\Ergebnisse BA\Results"), - Path(r"D:\Ergebnisse BA\Results"), - DocstringStyle.NUMPY, - ) - - -def test_single_ds_file() -> None: - res = get_purity_results(Path(r"C:\Users\Lukas Radermacher\AppData\Local\pypoetry\Cache\virtualenvs\library-analyzer-FK1WveJV-py3.11\Lib\site-packages\safeds\data\tabular\containers")) - out_file_api_purity = Path(r"D:\Ergebnisse BA\Results\Tests").joinpath("single_api_purity.json") - res.to_json_file(out_file_api_purity) - -class_dict = { - "ArithmeticError": "", - "AssertionError": "", - "AttributeError": "", - "BaseException": "", - "BaseExceptionGroup": "", - "BlockingIOError": "", - "BrokenPipeError": "", - "BufferError": "", - "BytesWarning": "", - "ChildProcessError": "", - "ConnectionAbortedError": "", - "ConnectionError": "", - "ConnectionRefusedError": "", - "ConnectionResetError": "", - "DeprecationWarning": "", - "EOFError": "", - "Ellipsis": "", - "EncodingWarning": "", - "EnvironmentError": "", - "Exception": "", - "ExceptionGroup": "", - "False": "", - "FileExistsError": "", - "FileNotFoundError": "", - "FloatingPointError": "", - "FutureWarning": "", - "GeneratorExit": "", - "IOError": "", - "ImportError": "", - "ImportWarning": "", - "IndentationError": "", - "IndexError": "", - "InterruptedError": "", - "IsADirectoryError": "", - "KeyError": "", - "KeyboardInterrupt": "", - "LookupError": "", - "MemoryError": "", - "ModuleNotFoundError": "", - "NameError": "", - "None": "", - "NotADirectoryError": "", - "NotImplemented": "", - "NotImplementedError": "", - "OSError": "", - "OverflowError": "", - "PendingDeprecationWarning": "", - "PermissionError": "", - "ProcessLookupError": "", - "RecursionError": "", - "ReferenceError": "", - "ResourceWarning": "", - "RuntimeError": "", - "RuntimeWarning": "", - "StopAsyncIteration": "", - "StopIteration": "", - "SyntaxError": "", - "SyntaxWarning": "", - "SystemError": "", - "SystemExit": "", - "TabError": "", - "TimeoutError": "", - "True": "", - "TypeError": "", - "UnboundLocalError": "", - "UnicodeDecodeError": "", - "UnicodeEncodeError": "", - "UnicodeError": "", - "UnicodeTranslateError": "", - "UnicodeWarning": "", - "UserWarning": "", - "ValueError": "", - "Warning": "", - "WindowsError": "", - "ZeroDivisionError": "", -} - - -def test_build_class_scopes() -> dict[str, ClassScope]: - global class_dict - class ScopesBuilder: - def __init__(self) -> None: - self.scopes: dict[str, ClassScope] = {} - self.current_class: str | None = None - - def enter_classdef(self, node: astroid.ClassDef) -> None: - symbol = GlobalVariable(node=node, id=NodeID("BUILTIN", node.name, node.lineno, node.col_offset), name=node.name) - self.scopes[node.name] = ClassScope(symbol, [], None, {}) - self.current_class = node.name - - def leave_classdef(self, node: astroid.ClassDef) -> None: - self.current_class = None - - def enter_functiondef(self, node: astroid.FunctionDef) -> None: - if not self.current_class: - return - symbol = ClassVariable(node=node, - id=NodeID("BUILTIN", node.name, node.lineno, node.col_offset), - name=node.name, - klass=self.scopes[self.current_class].symbol.node) - self.scopes[self.current_class].class_variables[node.name] = [symbol] - - def get_code_from_file(file_path): - with open(file_path, 'r') as file: - code = file.read() - return code - - def to_str(d: dict[str, ClassScope]) -> dict: - return {"'" + ke + "'": repr(va) for ke, va in d.items()} - - sc = ScopesBuilder() - walker = ASTWalker(sc) - - code = get_code_from_file(r"C:\Users\Lukas Radermacher\AppData\Local\JetBrains\PyCharm2023.3\python_stubs\-1907337602\builtins.py") - module = astroid.parse(code) - - walker.walk(module) - - res = {} - for k, v in sc.scopes.items(): - if k in class_dict: - res[k] = v - - with open(r"C:\Users\Lukas Radermacher\Desktop\Results\Tests\class_scopes.json", 'w') as file: - json.dump(to_str(res), file, indent=2) - # for key, value in res_dict.items(): - # if key in class_dict: - # file.write(f"'{key}': ClassScope(GlobalVariable({value['symbol']}),\n [],\n None,\n LocalVariable({{{value['class_variables']}}})\n)\n") - - print("") - -import builtins -import json -from pathlib import Path -from typing import Any - -import ijson -import pandas as pd - -_BUILTINS = set(dir(builtins)) - - -def evaluate_results(data: Any, file: str, to_console: bool = False) -> dict[str, ]: - """Evaluate the results of the purity analysis. - - Parameters - ---------- - data : str - The path to the purity analysis results file. - """ - count_pure: int = 0 - count_impure: int = 0 - count_reasons: dict[str, int] = {} - count_reasons_specified: dict[str, int] = {} - count_reasons_without_propagation: dict[str, int] = {} - count_reasons_specified_without_propagation: dict[str, int] = {} - - impure_because_unknown_call: dict[str, bool] = {} - unknown_calls: dict[str, int] = {} - unknown_calls_unknown: dict[str] = {} - total_reasons: int = 0 - missing_origin: int = 0 - - - for module in data.values(): - for fun_name, function in module.items(): - if function["purity"] == "Pure": - count_pure += 1 - elif function["purity"] == "Impure": - count_impure += 1 - - for reason in function["reasons"]: - total_reasons += 1 - res = reason["result"] - count_reasons[res] = count_reasons.get(res, 0) + 1 - if res == "UnknownCall": - reason_name = reason["reason"].split(".")[1] - unknown_calls[reason_name] = unknown_calls.get(reason_name, 0) + 1 - if reason_name == "UNKNOWN": - unknown_calls_unknown[fun_name] = reason_name - impure_because_unknown_call[fun_name] = True - else: - impure_because_unknown_call[fun_name] = False - - specified_res = reason["reason"].split(".")[0] - count_reasons_specified[specified_res] = count_reasons_specified.get(specified_res, 0) + 1 - - if reason["origin"] is None: - missing_origin += 1 - - if reason["origin"] == fun_name: - count_reasons_without_propagation[res] = count_reasons_without_propagation.get(res, 0) + 1 - count_reasons_specified_without_propagation[specified_res] = count_reasons_specified_without_propagation.get(specified_res, 0) + 1 - - unknown_calls = dict(sorted(unknown_calls.items(), key=lambda item: item[1], reverse=True)) - total_reasons_without_propagation = sum(count_reasons_without_propagation.values()) - - file_results = {"Name": file, - "Number of modules": len(data), - "Total functions": count_pure + count_impure, - "Pure functions": count_pure, - "Impure functions": count_impure, - "Reasons": count_reasons, - "Specified Reasons": count_reasons_specified, - "Reasons without propagation": count_reasons_without_propagation, - "Specified Reasons without propagation": count_reasons_specified_without_propagation, - "UnknownCalls Reasons": unknown_calls, - "UNKNOWN UnknownCalls": unknown_calls_unknown, - "Impure because UnknownCall": len({k: v for k, v in impure_because_unknown_call.items() if v}), - "Total Reasons": total_reasons, - "Total Reasons (without propagation)": total_reasons_without_propagation, - "Missing origin": missing_origin, - "Missing origin percentage": missing_origin / total_reasons * 100 if total_reasons > 0 else 0} - - if to_console: - print(f"Results for {file}:") - print(f"Number of modules: {len(data)}") - print(f"Total functions: {count_pure + count_impure}") - print(f"Pure functions: {count_pure}") - print(f"Impure functions: {count_impure}") - print("\nReasons:") - for reason, count in count_reasons.items(): - print(f"{reason}: {count}") - - print("\nSpecified Reasons:") - for reason, count in count_reasons_specified.items(): - print(f"{reason}: {count}") - - print("\nReasons without propagation:") - for reason, count in count_reasons_without_propagation.items(): - print(f"{reason}: {count}") - - print("\nSpecified Reasons without propagation:") - for reason, count in count_reasons_specified_without_propagation.items(): - print(f"{reason}: {count}") - - print("\nUnknownCalls Reasons:") - for reason, count in unknown_calls.items(): - print(f"{reason}: {count}") - - res = {k: v for k, v in impure_because_unknown_call.items() if v} - print(f"\nImpure because UnknownCall: {len(res)}") - - print(f"\nTotal Reasons: {total_reasons}, \nTotal Reasons (without propagation): {total_reasons_without_propagation}") - print(f"\nMissing origin: {missing_origin} => {missing_origin / total_reasons * 100:.2f}%") - - return file_results - - -def clear_results(file: str) -> None: - with open(file) as f: - results = json.load(f) - new_results = {} - for module_name, module in results.items(): - new_results[module_name] = {} - for function in module: - new_results[module_name][function] = { - "purity": "Pure", - } - - path = Path(r"C:\Users\Lukas Radermacher\Desktop\Results").joinpath("cleared_" + file) - with path.open("w") as f: - json.dump(new_results, f, indent=2) - - -def compare_results(expected: Any, actual: Any, result_name: str, to_console: bool = False) -> dict[str | Any, str | int | float | Any]: - tn = 0 - tp = 0 - fn = 0 - fp = 0 - - for module_name, module in expected.items(): - for function_name, function in module.items(): - if function["purity"] == actual[module_name][function_name]["purity"]: - if function["purity"] == "Pure": - tp += 1 # Expected pure, actual pure - else: - tn += 1 # Expected impure, actual impure - - if function["purity"] != actual[module_name][function_name]["purity"]: - if function["purity"] == "Pure": - fn += 1 # Expected pure, actual impure - else: - fp += 1 # Expected impure, actual pure - - if to_console: - print(f"Total equal results: {tn + tp} (True negatives: {tn}, True positives: {tp})") - print(f"Total different results: {fn + fp} (False negatives: {fn}, False positives: {fp})") - print(f"Accuracy: {(tp + tn) / (tp + tn + fp + fn) * 100:.2f}%") - print(f"Precision: {tp / (tp + fp) * 100:.2f}%") - print(f"Recall: {tp / (tp + fn) * 100:.2f}%") - print(f"F1-Score: {2 * tp / (2 * tp + fp + fn) * 100:.2f}%") - - return {"Name": result_name, - "Total equal results": tn + tp, - "True negatives": tn, - "True positives": tp, - "Total different results": fn + fp, - "False negatives": fn, - "False positives": fp, - "Accuracy": (tp + tn) / (tp + tn + fp + fn) * 100, - "Precision": tp / (tp + fp) * 100, - "Recall": tp / (tp + fn) * 100, - "F1-Score": 2 * tp / (2 * tp + fp + fn) * 100} - - -def compare_reasons(expected: Any, expected_name: str, actual: Any, actual_name: str, to_console: bool = False) -> dict[str, int | str]: - missing_reasons = 0 - missing_reasons_wrong_purity = 0 - extra_reasons = 0 - extra_reasons_wrong_purity = 0 - - # print the names of the missing functions - for module_name, module in actual.items(): - for function_name, function in module.items(): - if function_name not in expected[module_name]: - print(f"MISSING FUNCTION IN RESULT: {function_name}") - - - # Check the reasons that were expected but are missing - for module_name, module in expected.items(): - for function_name, function in module.items(): - if function["purity"] == "Impure": - if function["purity"] == actual[module_name][function_name]["purity"]: # both impure - for reason in function["reasons"]: - short_reason = (reason["result"], reason["reason"]) - short_other = [(x["result"], x["reason"]) for x in actual[module_name][function_name]["reasons"]] - if short_reason not in short_other: - # print(f"MISSING REASON IN RESULT {function_name}: {reason}") - missing_reasons += 1 - - - elif function["purity"] != actual[module_name][function_name]["purity"]: # expected impure, actual pure - for reason in function["reasons"]: - # print(f"MISSING REASON IN RESULT AND WRONG PURITY !!!VERY BAD!!! {function_name}: {reason}") - missing_reasons_wrong_purity += 1 - print(f"MISSING REASON IN RESULT AND WRONG PURITY !!!VERY BAD!!! {function_name}: {reason}") - - for module_name, module in actual.items(): - for function_name, function in module.items(): - if function["purity"] == "Impure": - if function["purity"] == expected[module_name][function_name]["purity"]: # both impure - for reason in function["reasons"]: - short_reason = (reason["result"], reason["reason"]) - short_other = [(x["result"], x["reason"]) for x in - expected[module_name][function_name]["reasons"]] - if short_reason not in short_other: - # print(f"EXTRA REASON IN RESULT {function_name}: {reason}") - extra_reasons += 1 - - - elif function["purity"] != expected[module_name][function_name]["purity"]: # expected pure, actual impure - for reason in actual[module_name][function_name]["reasons"]: - # print(f"EXTRA REASON IN RESULT AND WRONG PURITY {function_name}: {reason}") - extra_reasons_wrong_purity += 1 - - - - if to_console: - print(f"\n\nResults for {expected_name} and {actual_name}:") - if missing_reasons_wrong_purity > 0: - print("!!!FALSE POSITIVE ALARM!!!") - print(f"Missing reasons: {missing_reasons}") - print(f"Missing reasons with wrong purity: {missing_reasons_wrong_purity}") - print(f"Extra reasons: {extra_reasons}") - print(f"Extra reasons with wrong purity: {extra_reasons_wrong_purity}") - - return {"Name": actual_name, - "Missing reasons": missing_reasons, - "Missing reasons with wrong purity": missing_reasons_wrong_purity, - "Extra reasons": extra_reasons, - "Extra reasons with wrong purity": extra_reasons_wrong_purity} - - -def flatten_dict(d, parent_key="", sep="_"): - items = [] - for k, v in d.items(): - new_key = parent_key + sep + k if parent_key else k - if isinstance(v, dict): - items.extend(flatten_dict(v, new_key, sep=sep).items()) - else: - items.append((new_key, v)) - return dict(items) - -def to_excel(files: list[tuple[str, str]], out_path: str) -> None: - df = pd.DataFrame() - res_d: dict[str, pd.DataFrame] = {} - for file in files: - result = get_data(file[1]) - comp_res, comp_reasons = None, None - eval_res = evaluate_results(result, file[1]) - if file[0] != "": - expected = get_data(file[0]) - try: - comp_res = compare_results(expected, result, file[1], True) - except KeyError: - comp_res = None - try: - comp_reasons = compare_reasons(expected, file[0], result, file[1], True) - except KeyError: - comp_reasons = None - flattened_res = flatten_dict(eval_res) - res_d[file[1]] = pd.DataFrame(flattened_res, index=[0]).T - # print(df) - if comp_res: - eval_res = {**eval_res, **comp_res} - if comp_reasons: - eval_res = {**eval_res, **comp_reasons} - - df = df._append(eval_res, ignore_index=True) - - with pd.ExcelWriter(f"{out_path}results_2.xlsx") as writer: - df.to_excel(writer, sheet_name="Results", index=False) - for f, result in res_d.items().__reversed__(): - sheet_name = f.split("/")[-1] - result.to_excel(writer, sheet_name=sheet_name) - -def get_data(file: str, simple_mode: bool = True) -> Any: - if not simple_mode: - result = [] - with open(file + ".json") as f: - - objects = ijson.items(f, "safeds.data.image.containers._image") - - for obj in objects: - result.append(obj) - - parser = ijson.parse(f) - # - # # Initialize variables to track the current context - # current_key = None - # current_object = None - # - # try: - # # Iterate over each event in the parser - # for prefix, event, value in parser: - # # Check if the current prefix represents an object key - # if event == 'start_map': - # current_key = prefix - # current_object = {} - # elif event == 'map_key': - # current_key = value - # # Check if the current prefix represents a string value - # elif event == 'string': - # if current_object is not None: - # current_object[current_key] = value - # # Check if the current prefix represents the end of an object - # elif event == 'end_map': - # if current_key == "safeds.data.image.containers._image": - # result.append(current_object) - # current_object = None - # except ijson.common.IncompleteJSONError as e: - # print("Encountered incomplete JSON:", e) - - - # Convert the processed data to a DataFrame - return pd.DataFrame(result) - - else: - with open(file + ".json") as f: - return json.load(f) - -if __name__ == "__main__": - - def analyze_safe_ds(): - # evaluate_results("safe-ds__api_purity_4.json") - # print("\n__________________________\n") - # evaluate_results("safe-ds__api_purity_5.json") - # print("\n__________________________\n") - # evaluate_results("expected_safe-ds__api_purity_6.json") - # print("\n__________________________\n") - # evaluate_results("safe-ds__api_purity_6.json") - # print("\n__________________________\n") - # evaluate_results("safe-ds__api_purity_8.json") # Implemented Builtin superclasses (hardcoded) - # print("\n__________________________\n") - # evaluate_results("safe-ds__api_purity_9.json") # Added purity results for all Builtin functions (hardcoded) - # print("\n__________________________\n") - # evaluate_results("safe-ds__api_purity_15.json", True) # Added purity results for set, list, dict methods (hardcoded) - # clear_results("safe-ds__api_purity_22.json") - # evaluate_results("safe-ds__api_purity_20.json", True) # Added purity results for set, list, dict methods (hardcoded) - - - - # print("\n__________________________\n") - # compare_results("expected_safe-ds__api_purity_6.json", "safe-ds__api_purity_6.json") - # print("\n__________________________\n") - # compare_results("expected_safe-ds__api_purity_4.json", "safe-ds__api_purity_4.json") - # print("\n__________________________\n") - # compare_results("expected_safe-ds__api_purity_6.json", "safe-ds__api_purity_6.json") - # print("\n__________________________\n") - # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_8.json") - # print("\n__________________________\n") - # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_9.json") - # print("\n__________________________\n") - # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_10.json") - # compare_results("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_9.json", True) - # compare_reasons("expected_safe-ds__api_purity_8.json", "safe-ds__api_purity_9.json", True) - # print("\n__________________________\n") - # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_24.json", True) - # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_24.json", True) - # print("\n__________________________\n") - # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_25.json", True) - # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_25.json", True) - # print("\n__________________________\n") - # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_26.json", True) - # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_26.json", True) - # # print("\n__________________________\n") - # # compare_results("expected_safe-ds__api_purity_22.json", "safe-ds__api_purity_27.json", True) - # # compare_reasons("expected_safe-ds__api_purity_22.json", "safe-ds__api_purity_27.json", True) - # print("\n__________________________\n") - # compare_results("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_29.json", True) - # compare_reasons("SafeDs/expected_safe-ds__api_purity_22.json", "SafeDs/safe-ds__api_purity_29.json", True) - - # compare_results("expected_test_module__api_purity.json", "test_module__api_purity.json") - # compare_reasons("expected_test_module__api_purity.json", "test_module__api_purity.json") - - files = [ - # "safe-ds__api_purity_4", - # "safe-ds__api_purity_5", - ("SafeDs/expected_safe-ds__api_purity_6", "SafeDs/safe-ds__api_purity_6"), - ("SafeDs/expected_safe-ds__api_purity_8", "SafeDs/safe-ds__api_purity_8"), # Implemented Builtin superclasses (hardcoded) - ("SafeDs/expected_safe-ds__api_purity_8", "SafeDs/safe-ds__api_purity_9"), # Added purity results for all Builtin functions (hardcoded) - ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_10"), # Added purity results for set, list, dict methods (hardcoded) - ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_11"), # Test run to check determinism - ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_12"), # Test run to check determinism - ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_13"), # Test run to check determinism - ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_14"), # These are the results without the super cycle bug - ("SafeDs/expected_safe-ds__api_purity_10", "SafeDs/safe-ds__api_purity_15"), # Package Analysis for initial files - ("SafeDs/expected_safe-ds__api_purity_16", "SafeDs/safe-ds__api_purity_16"), # Package Analysis for initial files with bugged return - ("SafeDs/expected_safe-ds__api_purity_16", "SafeDs/safe-ds__api_purity_17"), # Package Analysis for initial files with empty files - ("SafeDs/expected_safe-ds__api_purity_16", "SafeDs/safe-ds__api_purity_18"), # Package Analysis for initial files with empty files in debug mode - ("SafeDs/expected_safe-ds__api_purity_19", "SafeDs/safe-ds__api_purity_19"), # Removed duplicate reasons - ("SafeDs/expected_safe-ds__api_purity_19", "SafeDs/safe-ds__api_purity_20"), # Removed UnknownCalls for successfully imported classes - ("SafeDs/expected_safe-ds__api_purity_19", "SafeDs/safe-ds__api_purity_21"), # Added fallback for origin - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_22"), # Fixed call graph for nested cycles - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_23"), # Added @ Origin to Builtins - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_24"), # Detect calls of __new__ and __post_init__ on class calls - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_25"), # Added purity results for str methods (hardcoded) - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_26"), # Added purity results for str methods (hardcoded) - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_27"), # Arity detection astroid 3.1 (with Attribute Errors) - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_28"), # Arity detection astroid 2.15.6 - ("SafeDs/expected_safe-ds__api_purity_22", "SafeDs/safe-ds__api_purity_29"), # Call graph for functions with the same name - ] - - to_excel(files, r"D:/Ergebnisse BA/Results/SafeDs/") - - # analyze_safe_ds() - # to_excel([("", "Seaborn/seaborn__api_purity")], "D:/Ergebnisse BA/Results/Seaborn/") - # to_excel([("", "Scikit/scikit__api_purity")], "D:/Ergebnisse BA/Results/SciKit/") - # to_excel([("", "Pandas/pandas__api_purity")], "D:/Ergebnisse BA/Results/Pandas/") - to_excel([("SafeDs/safe-ds__api_purity_28", "SafeDs/safe-ds__api_purity_29")], "D:/Ergebnisse BA/Results/SafeDS/") - - - From c379d65e9e6de64b1492a9ba7b3d07d47207f64b Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Fri, 10 May 2024 19:58:50 +0200 Subject: [PATCH 05/17] fix: linter fixes --- .../processing/api/purity_analysis/_build_call_graph.py | 4 ++-- .../processing/api/purity_analysis/_resolve_references.py | 2 +- .../processing/api/purity_analysis/model/_purity.py | 2 +- .../processing/api/purity_analysis/model/_reference.py | 3 --- .../processing/api/purity_analysis/test_resolve_references.py | 4 ++-- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index 949263ea..d521c267 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -218,12 +218,12 @@ def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: # Deal with the case that the call calls a function parameter. elif isinstance(call, Parameter): self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( - symbol=call, origin=reason.function_scope.symbol, + symbol=call, origin=reason.function_scope.symbol if reason.function_scope else None, ) else: self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( - symbol=call, origin=reason.function_scope.symbol, + symbol=call, origin=reason.function_scope.symbol if reason.function_scope else None, ) def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index a4c76d37..2a49efe4 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -476,7 +476,7 @@ def _find_value_references( import_def, inferred_node=inferred_node_def, # type: ignore[type-var] # import def is not None. ) - specified_import_def.id.name = specified_import_def.id.name + "." + specified_import_def.name + specified_import_def.id.name = specified_import_def.id.name + "." + specified_import_def.name # type: ignore[union-attr] # specified_import_def is not None. if specified_import_def: result_value_reference.referenced_symbols.append(specified_import_def) diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 401b747f..711a460f 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -203,7 +203,7 @@ def to_dict(self, shorten: bool = False) -> dict[str, Any]: case _: raise TypeError(f"Unknown reason type: {reason}") if not shorten: - combined_reasons = { + combined_reasons: dict[str, Any] = { "NonLocalVariableRead": non_local_variable_reads, "NonLocalVariableWrite": non_local_variable_writes, "FileRead": file_reads, diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index d93d66d2..fd2ebcdc 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -186,9 +186,6 @@ def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: result.join_reasons(reason) return result - def __iter__(self) -> Iterator[Symbol]: - return iter(self.writes_to.union(self.reads_from).union(self.calls)) - def join_reasons(self, other: Reasons) -> Reasons: """Join two Reasons objects. diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py index 38dd1a1b..d530aec6 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py @@ -251,7 +251,7 @@ def transform_reasons(reasons: dict[NodeID, Reasons]) -> dict[str, SimpleReasons else ( f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.member}.line{target_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine if isinstance(target_reference.symbol, InstanceVariable) - else f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" + else f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine ) ) for target_reference in function_references.writes_to.values() @@ -263,7 +263,7 @@ def transform_reasons(reasons: dict[NodeID, Reasons]) -> dict[str, SimpleReasons else ( f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.klass.name}.{value_reference.symbol.node.member}.line{value_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine if isinstance(value_reference.symbol, InstanceVariable) - else f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.node.name}.line{value_reference.symbol.node.fromlineno}" + else f"{value_reference.symbol.__class__.__name__}.{value_reference.symbol.node.name}.line{value_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine ) ) for value_reference in function_references.reads_from.values() From 2c58b771591a1ae3ac5a8c98556ccf6f0f5b9bca Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 10 May 2024 18:00:30 +0000 Subject: [PATCH 06/17] style: apply automated linter fixes --- .../api/purity_analysis/_build_call_graph.py | 6 +++-- .../api/purity_analysis/_infer_purity.py | 10 ++++---- .../purity_analysis/_resolve_references.py | 23 ++++++++++++------- .../api/purity_analysis/model/_call_graph.py | 2 -- .../api/purity_analysis/model/_purity.py | 6 ++--- .../api/purity_analysis/model/_reference.py | 10 +++----- .../test_resolve_references.py | 3 ++- 7 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index d521c267..243a5c19 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -218,12 +218,14 @@ def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: # Deal with the case that the call calls a function parameter. elif isinstance(call, Parameter): self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( - symbol=call, origin=reason.function_scope.symbol if reason.function_scope else None, + symbol=call, + origin=reason.function_scope.symbol if reason.function_scope else None, ) else: self.call_graph_forest.get_graph(reason.id).reasons.unknown_calls[call.id] = UnknownProto( - symbol=call, origin=reason.function_scope.symbol if reason.function_scope else None, + symbol=call, + origin=reason.function_scope.symbol if reason.function_scope else None, ) def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index cc0be955..7476738e 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -26,8 +26,6 @@ ImpurityReason, NativeCall, NodeID, - NonLocalVariableRead, - NonLocalVariableWrite, OpenMode, PackageData, Parameter, @@ -240,11 +238,15 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: # If the inferred node is a function, it must be analyzed to determine its purity. if isinstance(read.symbol.inferred_node, astroid.FunctionDef): impurity_reasons.add( - UnknownCall(UnknownFunctionCall(call=read.symbol.call, inferred_def=read.symbol.inferred_node)), + UnknownCall( + UnknownFunctionCall(call=read.symbol.call, inferred_def=read.symbol.inferred_node), + ), ) elif isinstance(read.symbol.inferred_node, astroid.ClassDef): impurity_reasons.add( - UnknownCall(UnknownClassInit(call=read.symbol.call, inferred_def=read.symbol.inferred_node)), + UnknownCall( + UnknownClassInit(call=read.symbol.call, inferred_def=read.symbol.inferred_node), + ), ) # If the inferred node is a module, it will not count towards the impurity of the function. # If this was added, nearly anything would be impure. diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index 2a49efe4..49fff0ec 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -21,6 +21,8 @@ MemberAccessValue, ModuleAnalysisResult, NodeID, + NonLocalVariableRead, + NonLocalVariableWrite, PackageData, ParameterKind, Reasons, @@ -28,10 +30,8 @@ ReferenceNode, Symbol, TargetReference, - ValueReference, - NonLocalVariableWrite, - NonLocalVariableRead, UnknownProto, + ValueReference, ) _BUILTINS = dir(builtins) @@ -755,7 +755,8 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node # of the raw_reasons dict for this function elif call_references_result.node not in raw_reasons[function.symbol.id].unknown_calls: raw_reasons[function.symbol.id].unknown_calls[call_references_result.node.id] = ( - UnknownProto(symbol=call_references_result.node, origin=function.symbol)) + UnknownProto(symbol=call_references_result.node, origin=function.symbol) + ) # Check if the function has value_references (References from a value node to a target node). if function.value_references: @@ -785,7 +786,8 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node # Add the referenced symbol to the list of symbols whom are read from. if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( - NonLocalVariableRead(symbol=referenced_symbol, origin=function.symbol)) + NonLocalVariableRead(symbol=referenced_symbol, origin=function.symbol) + ) elif isinstance(referenced_symbol, Import): # Since calls of imported functions are treated within _find_value_references # as MemberAccessValue, they need to be added to the calls of the raw_reasons dict @@ -799,14 +801,18 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node else: # noqa: PLR5501 if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( - NonLocalVariableRead(symbol=referenced_symbol, origin=function.symbol)) + NonLocalVariableRead( + symbol=referenced_symbol, origin=function.symbol, + ) + ) # If no referenced symbols are found, add the call to the list of unknown_calls # of the raw_reasons dict for this function elif value_reference_result.node not in raw_reasons[ function.symbol.id ].unknown_calls and isinstance(value_reference_result.node.node, astroid.Call): raw_reasons[function.symbol.id].unknown_calls[value_reference_result.node.id] = ( - UnknownProto(symbol=value_reference_result.node, origin=function.symbol)) + UnknownProto(symbol=value_reference_result.node, origin=function.symbol) + ) # Check if the function has target_references (References from a target node to another target node). if function.target_symbols: @@ -840,7 +846,8 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node # Add the referenced symbol to the list of symbols whom are written to. if referenced_symbol not in raw_reasons[function.symbol.id].writes_to: raw_reasons[function.symbol.id].writes_to[referenced_symbol.id] = ( - NonLocalVariableWrite(symbol=referenced_symbol, origin=function.symbol)) + NonLocalVariableWrite(symbol=referenced_symbol, origin=function.symbol) + ) name_references: dict[str, list[ReferenceNode]] = self.merge_dicts(value_references, target_references) resolved_references: dict[str, list[ReferenceNode]] = self.merge_dicts(call_references, name_references) diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py index f06e6c4d..addd5922 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py @@ -3,8 +3,6 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING -from library_analyzer.processing.api.purity_analysis.model._purity import Impure, UnknownCall, UnknownFunctionCall - if TYPE_CHECKING: from library_analyzer.processing.api.purity_analysis.model._module_data import ( Import, diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 711a460f..50c5daca 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -12,9 +12,9 @@ from library_analyzer.processing.api.purity_analysis.model._module_data import ( MemberAccessValue, NodeID, + Reference, Symbol, UnknownSymbol, - Reference, ) from library_analyzer.utils import ensure_file_exists @@ -224,9 +224,7 @@ def to_dict(self, shorten: bool = False) -> dict[str, Any]: } return { "purity": self.__class__.__name__, - "reasons": { - reason: value for reason, value in combined_reasons.items() if value - }, + "reasons": {reason: value for reason, value in combined_reasons.items() if value}, } def __hash__(self) -> int: diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index fd2ebcdc..f1830a23 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -8,11 +8,7 @@ from library_analyzer.processing.api.purity_analysis.model._module_data import ( ClassScope, - ClassVariable, FunctionScope, - GlobalVariable, - Import, - InstanceVariable, MemberAccessTarget, MemberAccessValue, NodeID, @@ -22,14 +18,14 @@ ) if TYPE_CHECKING: - from collections.abc import Iterator from library_analyzer.processing.api.purity_analysis.model import ( CallGraphForest, - PurityResult, NonLocalVariableRead, NonLocalVariableWrite, - UnknownProto) + PurityResult, + UnknownProto, + ) @dataclass diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py index d530aec6..55b267da 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py @@ -247,7 +247,8 @@ def transform_reasons(reasons: dict[NodeID, Reasons]) -> dict[str, SimpleReasons { ( f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.name}.line{target_reference.symbol.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine - if isinstance(target_reference.symbol, ClassVariable) and target_reference.symbol.klass is not None + if isinstance(target_reference.symbol, ClassVariable) + and target_reference.symbol.klass is not None else ( f"{target_reference.symbol.__class__.__name__}.{target_reference.symbol.klass.name}.{target_reference.symbol.node.member}.line{target_reference.symbol.node.node.fromlineno}" # type: ignore[union-attr] # "None" has no attribute "name" but since we check for the type before, this is fine if isinstance(target_reference.symbol, InstanceVariable) From fe01d2d61aa1d22b7943833c5734be3de0d26d19 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 10 May 2024 18:02:13 +0000 Subject: [PATCH 07/17] style: apply automated linter fixes --- .../processing/api/purity_analysis/_resolve_references.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index 49fff0ec..7c59550a 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -802,7 +802,8 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node if referenced_symbol.id not in raw_reasons[function.symbol.id].reads_from: raw_reasons[function.symbol.id].reads_from[referenced_symbol.id] = ( NonLocalVariableRead( - symbol=referenced_symbol, origin=function.symbol, + symbol=referenced_symbol, + origin=function.symbol, ) ) # If no referenced symbols are found, add the call to the list of unknown_calls From 296001b25a726121742abdd11e1fb1133917042a Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Wed, 5 Jun 2024 23:08:44 +0200 Subject: [PATCH 08/17] feat: detect instance variables generated with `@property`-functions --- .../api/purity_analysis/_get_module_data.py | 10 ++++ .../purity_analysis/_resolve_references.py | 11 +++- .../purity_analysis/test_get_module_data.py | 60 ++++++++++++++++++- .../api/purity_analysis/test_infer_purity.py | 20 +++++++ .../test_resolve_references.py | 2 +- 5 files changed, 100 insertions(+), 3 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index 51a9624a..a4024234 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -705,6 +705,16 @@ def enter_functiondef(self, node: astroid.FunctionDef) -> None: for decorator in node.decorators.nodes: if isinstance(decorator, astroid.Name) and decorator.name == "overload": return + elif isinstance(decorator, astroid.Name) and decorator.name == "property": + if isinstance(self.current_node_stack[-1], ClassScope) and hasattr(self.current_node_stack[-1], "instance_variables"): + self.current_node_stack[-1].instance_variables.setdefault(node.name, []).append( + InstanceVariable( + node=node, + id=NodeID.calc_node_id(node), + name=node.name, + klass=self.current_node_stack[-1].symbol.node, + ), + ) self.current_node_stack.append( FunctionScope( diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index a58dbe4f..3be3141d 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -768,8 +768,17 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node if isinstance(referenced_symbol, GlobalVariable | ClassVariable | InstanceVariable): # Since classes and functions are defined as immutable # reading from them is not a reason for impurity. + # There is an exception to this rule for functions + # that are decorated with a '@property' decorator. These functions define an + # instance variable as a property, which can be read from. if isinstance(referenced_symbol.node, astroid.ClassDef | astroid.FunctionDef): - continue + if (isinstance(referenced_symbol.node, astroid.FunctionDef) and + "builtins.property" in referenced_symbol.node.decoratornames() and + isinstance(referenced_symbol, InstanceVariable) + ): + pass + else: + continue # Add the referenced symbol to the list of symbols whom are read from. if referenced_symbol not in raw_reasons[function.symbol.id].reads_from: raw_reasons[function.symbol.id].reads_from.add(referenced_symbol) diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py b/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py index 7c056d97..e94d130d 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py @@ -119,7 +119,10 @@ def transform_scope_node( super_classes_transformed = [] for child in node.instance_variables.values(): for c1 in child: - c_str = to_string_class(c1.node.node) + if isinstance(c1.node, MemberAccess): + c_str = to_string_class(c1.node.node) + else: + c_str = to_string_class(c1.node) if c_str is not None: instance_vars_transformed.append(c_str) # type: ignore[misc] # it is not possible that c_str is None @@ -1884,6 +1887,60 @@ def __post_init__(self): ), }, ), + ( # language=Python "Assign Instance Attribute via property" + """ +class A: + def __init__(self, value): + self._value = value + + def f(self): + return self.value + + @property + def value(self): + return self._value + """, # language=none + { + "A": SimpleClassScope( + "GlobalVariable.ClassDef.A", + [ + SimpleFunctionScope( + "ClassVariable.FunctionDef.__init__", + [ + SimpleScope("Parameter.AssignName.self", []), + SimpleScope("Parameter.AssignName.value", []), + SimpleScope("InstanceVariable.MemberAccess.self._value", []), + ], + ["AssignName.self", "Name.self", "AssignName.value", "MemberAccessTarget.self._value"], + ["Name.value"], + [], + ["AssignName.self", "AssignName.value"], + ), + SimpleFunctionScope( + "ClassVariable.FunctionDef.f", + [SimpleScope("Parameter.AssignName.self", [])], + ["AssignName.self"], + ["MemberAccessValue.self.value", "Name.self"], + [], + ["AssignName.self"], + ), + SimpleFunctionScope( + "ClassVariable.FunctionDef.value", + [SimpleScope("Parameter.AssignName.self", [])], + ["AssignName.self"], + ["MemberAccessValue.self._value", "Name.self"], + [], + ["AssignName.self"], + ), + ], + ["FunctionDef.__init__", "FunctionDef.f", "FunctionDef.value"], + ["AssignAttr._value", "FunctionDef.value"], + None, + "__init__", + None, + ), + }, + ), ], ids=[ "ClassDef", @@ -1898,6 +1955,7 @@ def __post_init__(self): "Multiple ClassDef", "ClassDef with super class", "ClassDef with __new__, __init__ and __post_init__", + "Assign Instance Attribute via property", ], ) def test_get_module_data_classes(code: str, expected: dict[str, SimpleClassScope]) -> None: diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py b/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py index dc6eb812..ef111f29 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py @@ -471,6 +471,25 @@ def f(): "f.line2": Pure(), }, ), + ( # language=Python "Assign Instance Attribute via property" + """ +class A: + def __init__(self, value): + self._value = value + + def f(self): + return self.value + + @property + def value(self): + return self._value + """, # language=none + { + "__init__.line3": Pure(), + "f.line6": SimpleImpure({"NonLocalVariableRead.InstanceVariable.A.value"}), + "value.line10": SimpleImpure({"NonLocalVariableRead.InstanceVariable.A._value"}), + }, + ), ], ids=[ "Trivial function", @@ -496,6 +515,7 @@ def f(): "Builtins for dict", "Builtins for list", "Builtins for set", + "Assign Instance Attribute via property", ], # TODO: class inits in cycles ) def test_infer_purity_pure(code: str, expected: list[ImpurityReason]) -> None: diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py index 6c33e707..930d1835 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py @@ -2851,7 +2851,7 @@ def f(): ReferenceTestNode( "a.state.line18", "FunctionDef.f", - ["ClassVariable.State.state.line13", "ClassVariable.State.state.line9"], + ["ClassVariable.State.state.line13", "ClassVariable.State.state.line9", "InstanceVariable.State.state.line9"], ), ReferenceTestNode("a.line18", "FunctionDef.f", ["LocalVariable.a.line17"]), ], From 7874fa67e5f33ac4750a07918d7498ff3b37a8bd Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:40:20 +0200 Subject: [PATCH 09/17] fix: fixed propagation of nodes which already were inside the CGF --- .../api/purity_analysis/_build_call_graph.py | 15 ++++++---- .../api/purity_analysis/test_infer_purity.py | 29 +++++++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index 243a5c19..ab54298c 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -145,13 +145,16 @@ def _built_call_graph(self, reason: Reasons) -> None: # If the node is already inside the forest and does not have any calls left, it is considered to be finished. if self.call_graph_forest.has_graph(reason.id) and not reason.calls: return - + # If the node is already inside the forest but still has calls left, it needs to be updated. + if self.call_graph_forest.has_graph(reason.id): + cgn = self.call_graph_forest.get_graph(reason.id) # Create a new node and add it to the forest. - cgn = CallGraphNode( - symbol=reason.function_scope.symbol, # type: ignore[union-attr] # function_scope is never None here - reasons=reason, - ) - self.call_graph_forest.add_graph(reason.id, cgn) + else: + cgn = CallGraphNode( + symbol=reason.function_scope.symbol, # type: ignore[union-attr] # function_scope is never None here + reasons=reason, + ) + self.call_graph_forest.add_graph(reason.id, cgn) # The node has calls, which need to be added to the forest and to the children of the current node. # They are sorted to ensure a deterministic order of the children (especially but not only for testing). diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py b/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py index ef111f29..692c5fca 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_infer_purity.py @@ -490,6 +490,34 @@ def value(self): "value.line10": SimpleImpure({"NonLocalVariableRead.InstanceVariable.A._value"}), }, ), + ( # language=Python "Assign Instance Attribute via property with propagation" + """ +from abc import ABC + +class A(ABC): + def __init__(self, value): + self._value = value + if impure(): + pass + + @property + def value(self): + return self._value + +class B(A): + def __init__(self, value): + super().__init__(value) + +def impure(): + print("test") + """, # language=none + { + "__init__.line5": SimpleImpure({"FileWrite.StringLiteral.stdout"}), + "value.line11": SimpleImpure({"NonLocalVariableRead.InstanceVariable.A._value"}), + "__init__.line15": SimpleImpure({"FileWrite.StringLiteral.stdout"}), + "impure.line18": SimpleImpure({"FileWrite.StringLiteral.stdout"}), + }, + ), ], ids=[ "Trivial function", @@ -516,6 +544,7 @@ def value(self): "Builtins for list", "Builtins for set", "Assign Instance Attribute via property", + "Assign Instance Attribute via property with propagation", ], # TODO: class inits in cycles ) def test_infer_purity_pure(code: str, expected: list[ImpurityReason]) -> None: From f15df3942c69c8c3e2c1c707972bb5345336a8eb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:42:10 +0000 Subject: [PATCH 10/17] style: apply automated linter fixes --- .../processing/api/purity_analysis/_get_module_data.py | 4 +++- .../processing/api/purity_analysis/_resolve_references.py | 7 ++++--- .../processing/api/purity_analysis/test_get_module_data.py | 2 +- .../api/purity_analysis/test_resolve_references.py | 6 +++++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index a507a609..c8b5fe2a 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -707,7 +707,9 @@ def enter_functiondef(self, node: astroid.FunctionDef) -> None: if isinstance(decorator, astroid.Name) and decorator.name == "overload": return elif isinstance(decorator, astroid.Name) and decorator.name == "property": - if isinstance(self.current_node_stack[-1], ClassScope) and hasattr(self.current_node_stack[-1], "instance_variables"): + if isinstance(self.current_node_stack[-1], ClassScope) and hasattr( + self.current_node_stack[-1], "instance_variables", + ): self.current_node_stack[-1].instance_variables.setdefault(node.name, []).append( InstanceVariable( node=node, diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index bd6935d1..ef741364 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -785,9 +785,10 @@ def _resolve_references(self) -> tuple[dict[str, list[ReferenceNode]], dict[Node # that are decorated with a '@property' decorator. These functions define an # instance variable as a property, which can be read from. if isinstance(referenced_symbol.node, astroid.ClassDef | astroid.FunctionDef): - if (isinstance(referenced_symbol.node, astroid.FunctionDef) and - "builtins.property" in referenced_symbol.node.decoratornames() and - isinstance(referenced_symbol, InstanceVariable) + if ( + isinstance(referenced_symbol.node, astroid.FunctionDef) + and "builtins.property" in referenced_symbol.node.decoratornames() + and isinstance(referenced_symbol, InstanceVariable) ): pass else: diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py b/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py index e94d130d..b7c8f290 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_get_module_data.py @@ -1910,7 +1910,7 @@ def value(self): SimpleScope("Parameter.AssignName.self", []), SimpleScope("Parameter.AssignName.value", []), SimpleScope("InstanceVariable.MemberAccess.self._value", []), - ], + ], ["AssignName.self", "Name.self", "AssignName.value", "MemberAccessTarget.self._value"], ["Name.value"], [], diff --git a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py index ec3feefa..6b511eb3 100644 --- a/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py +++ b/tests/library_analyzer/processing/api/purity_analysis/test_resolve_references.py @@ -2852,7 +2852,11 @@ def f(): ReferenceTestNode( "a.state.line18", "FunctionDef.f", - ["ClassVariable.State.state.line13", "ClassVariable.State.state.line9", "InstanceVariable.State.state.line9"], + [ + "ClassVariable.State.state.line13", + "ClassVariable.State.state.line9", + "InstanceVariable.State.state.line9", + ], ), ReferenceTestNode("a.line18", "FunctionDef.f", ["LocalVariable.a.line17"]), ], From d7fa0f15747abc742ef3dc5f01b515a149717bbb Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 6 Jun 2024 13:43:43 +0000 Subject: [PATCH 11/17] style: apply automated linter fixes --- .../processing/api/purity_analysis/_get_module_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index c8b5fe2a..daa991e1 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -708,7 +708,8 @@ def enter_functiondef(self, node: astroid.FunctionDef) -> None: return elif isinstance(decorator, astroid.Name) and decorator.name == "property": if isinstance(self.current_node_stack[-1], ClassScope) and hasattr( - self.current_node_stack[-1], "instance_variables", + self.current_node_stack[-1], + "instance_variables", ): self.current_node_stack[-1].instance_variables.setdefault(node.name, []).append( InstanceVariable( From 055a9a10de190f5625dc2a9207d73c3f046fe1f7 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:36:23 +0200 Subject: [PATCH 12/17] fix: removed all type annotations from docstrings --- .../api/purity_analysis/_build_call_graph.py | 42 ++++---- .../api/purity_analysis/_get_module_data.py | 60 ++++++------ .../api/purity_analysis/_infer_purity.py | 44 ++++----- .../purity_analysis/_resolve_references.py | 52 +++++----- .../api/purity_analysis/model/_call_graph.py | 28 +++--- .../api/purity_analysis/model/_module_data.py | 98 +++++++++---------- .../api/purity_analysis/model/_purity.py | 60 ++++++------ .../api/purity_analysis/model/_reference.py | 32 +++--- 8 files changed, 208 insertions(+), 208 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index ab54298c..bd90fc62 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -22,21 +22,21 @@ class CallGraphBuilder: Attributes ---------- - classes : dict[str, ClassScope] + classes Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons : dict[NodeID, Reasons] + raw_reasons The raw reasons for impurity for all functions. Keys are the ids of the functions. - call_graph_forest : CallGraphForest + call_graph_forest The call graph forest for the given functions. - visited : set[NodeID] + visited A set of all visited nodes. Parameters ---------- - classes : dict[str, ClassScope] + classes Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons : dict[NodeID, Reasons] + raw_reasons The raw reasons for impurity for all functions. Keys are the ids of the functions. """ @@ -60,7 +60,7 @@ def _build_call_graph_forest(self) -> CallGraphForest: Returns ------- - call_graph_forest : CallGraphForest + call_graph_forest The call graph forest for the given functions. """ # Prepare the classes for the call graph. @@ -132,7 +132,7 @@ def _built_call_graph(self, reason: Reasons) -> None: Parameters ---------- - reason : Reasons + reason The raw reasons of the function. """ # If the node has already been visited, return @@ -192,9 +192,9 @@ def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: Parameters ---------- - call : Symbol + call The call that is unknown. - reason : Reasons + reason The reason of the function that contains the unknown call. """ # Deal with the case that the call calls an imported function. @@ -241,7 +241,7 @@ def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: Parameters ---------- - removed_nodes : set[NodeID] | None + removed_nodes A set of all removed nodes. If not given, a new set is created. """ @@ -272,16 +272,16 @@ def _test_cgn_for_cycles( Parameters ---------- - cgn : CallGraphNode + cgn The current node in the graph that is visited. - visited_nodes : set[NewCallGraphNode] | None + visited_nodes A set of all visited nodes. - path : list[NodeID] | None + path A list of all nodes in the current path. Returns ------- - cycle : dict[NodeID, NewCallGraphNode] + cycle Dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. Returns an empty dict if no cycle is found. @@ -326,7 +326,7 @@ def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None: Parameters ---------- - cycle : dict[NodeID, CallGraphNode] + cycle A dict of all nodes in the cycle. Keys are the NodeIDs of the CallGraphNodes. """ @@ -378,10 +378,10 @@ def _update_pointers(self, cycle: dict[NodeID, CallGraphNode], combined_node: Co Parameters ---------- - cycle : dict[NodeID, CallGraphNode] + cycle A dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. - combined_node : CombinedCallGraphNode + combined_node The combined node that replaces all nodes in the cycle. """ for graph in self.call_graph_forest.graphs.values(): @@ -396,15 +396,15 @@ def build_call_graph(classes: dict[str, ClassScope], raw_reasons: dict[NodeID, R Parameters ---------- - classes : dict[str, ClassScope] + classes Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons : dict[NodeID, Reasons] + raw_reasons The raw reasons for impurity for all functions. Keys are the ids of the functions. Returns ------- - call_graph_forest : CallGraphForest + call_graph_forest The call graph forest for the given functions. """ return CallGraphBuilder(classes, raw_reasons).call_graph_forest diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index daa991e1..8e149162 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -40,33 +40,33 @@ class ModuleDataBuilder: Attributes ---------- - current_node_stack : list[Scope] + current_node_stack Stack of nodes that are currently visited by the ASTWalker. The last node in the stack is the current node. It Is only used while walking the AST. - current_function_def : list[FunctionScope] + current_function_def Stack of FunctionScopes that are currently visited by the ASTWalker. The top of the stack is the current function definition. It is only used while walking the AST. - children : list[Scope] + children All found children nodes are stored in children until their scope is determined. After the AST is completely walked, the resulting "Module"- Scope is stored in children. (children[0]) - targets : list[Symbol] + targets All found targets are stored in targets until their scope is determined. - values : list[Reference] + values All found names are stored in names until their scope is determined. It Is only used while walking the AST. - calls : list[Reference] + calls All calls found on function level are stored in calls until their scope is determined. It Is only used while walking the AST. - classes : dict[str, ClassScope] + classes Classnames in the module as key and their corresponding ClassScope instance as value. - functions : dict[str, list[FunctionScope]] + functions Function names in the module as key and a list of their corresponding FunctionScope instances as value. - global_variables : dict[str, Scope] + global_variables All global variables and their corresponding Scope instance. - imports : dict[str, Import] + imports All imports and their corresponding Import instance. """ @@ -92,7 +92,7 @@ def has_assignattr_parent(node: astroid.Attribute) -> bool: Parameters ---------- - node : astroid.Attribute + node The node whose parents are to be checked. Returns @@ -116,9 +116,9 @@ def get_symbol(self, node: astroid.NodeNG, current_scope: astroid.NodeNG | None) Parameters ---------- - node : astroid.NodeNG + node The node whose symbol is to be determined. - current_scope : astroid.NodeNG | None + current_scope The current scope of the node (is None if the node is the module node). """ match current_scope: @@ -220,7 +220,7 @@ def _detect_scope(self, current_node: astroid.NodeNG) -> None: Parameters ---------- - current_node : astroid.NodeNG + current_node The node whose scope is to be determined. """ outer_scope_children: list[Scope] = [] @@ -276,7 +276,7 @@ def _analyze_class(self, current_node: astroid.ClassDef) -> None: Parameters ---------- - current_node : astroid.ClassDef + current_node The node to analyze. """ if not isinstance(current_node, astroid.ClassDef): @@ -304,7 +304,7 @@ def _analyze_function(self, current_node: astroid.FunctionDef) -> None: Parameters ---------- - current_node : astroid.FunctionDef + current_node The node to analyze. """ if not isinstance(current_node, astroid.FunctionDef): @@ -519,7 +519,7 @@ def find_first_parent_function(self, node: astroid.NodeNG | MemberAccess) -> ast Parameters ---------- - node : astroid.NodeNG + node The node to start the search from. Returns @@ -543,9 +543,9 @@ def handle_arg(self, node: astroid.AssignName, kind: ParameterKind) -> None: Parameters ---------- - node : astroid.AssignName + node The node that is to be handled. - kind : ParameterKind + kind The kind of the parameter. """ scope_node = Scope( @@ -562,9 +562,9 @@ def add_arg_to_function_scope_parameters(self, argument: astroid.AssignName, kin Parameters ---------- - argument : astroid.AssignName + argument The argument node to add to the parameter dict. - kind : ParameterKind + kind The kind of the parameter. """ if isinstance(self.current_node_stack[-1], FunctionScope): @@ -580,9 +580,9 @@ def is_annotated(self, node: astroid.NodeNG | MemberAccess, found_annotation_nod Parameters ---------- - node : astroid.Name + node The node to check. - found_annotation_node : bool + found_annotation_node A bool that indicates if an annotation node is found. Returns @@ -621,9 +621,9 @@ def check_if_global(self, name: str, node: astroid.NodeNG) -> list[astroid.Assig Parameters ---------- - name : str + name The variable name to check. - node : astroid.NodeNG + node The node whose root is to be checked. Returns @@ -649,7 +649,7 @@ def find_base_classes(self, node: astroid.ClassDef) -> list[ClassScope]: Parameters ---------- - node : astroid.ClassDef + node The class whose base classes are to be found. Returns @@ -676,7 +676,7 @@ def enter_module(self, node: astroid.Module) -> None: Parameters ---------- - node : astroid.Module + node The module node to enter. """ self.current_node_stack.append( @@ -1218,11 +1218,11 @@ def get_module_data(code: str, module_name: str = "", path: str | None = None) - Parameters ---------- - code : str + code The source code of the module whose module data is to be found. - module_name : str, optional + module_name The name of the module, by default "". - path : str, optional + path The path of the module, by default None. Returns diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index 7476738e..5d2e8814 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -50,20 +50,20 @@ class PurityAnalyzer: Attributes ---------- - module_id : NodeID + module_id The ID of the module to analyze. - visited_nodes : set[NodeID] + visited_nodes A set of all nodes that have been visited during the analysis. - call_graph_forest : CallGraphForest + call_graph_forest The call graph forest of the module. - current_purity_results : dict[NodeID, dict[NodeID, PurityResult]] + current_purity_results The purity results of the functions in the module. - separated_nodes : dict[NodeID, CallGraphNode] + separated_nodes If the module has cycles, they will be found by the CallGraphBuilder and combined to a single node. Since these combined nodes are not part of the module but needed for the analysis, their purity results will be propagated to the original nodes during the analysis. This attribute stores the original nodes inside after the combined node was analyzed. - cached_module_results : dict[NodeID, dict[NodeID, PurityResult]] + cached_module_results The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. @@ -72,18 +72,18 @@ class PurityAnalyzer: Parameters ---------- - code : str | None + code The source code of the module. If None is provided, the package data must be provided (or else an exception is raised). - module_name : str + module_name The name of the module. - path : str | None + path The path of the module. - results : dict[NodeID, dict[NodeID, PurityResult]] | None + results The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. - package_data : PackageData | None + package_data The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. @@ -125,7 +125,7 @@ def _handle_open_like_functions(call: astroid.Call) -> PurityResult: Parameters ---------- - call: astrid.Call + call The call to check. Returns @@ -210,7 +210,7 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: Parameters ---------- - reasons : Reasons + reasons The node to process containing the raw reasons for impurity collected. Returns @@ -309,7 +309,7 @@ def _process_imported_node(self, imported_node: ImportedCallGraphNode) -> Purity Parameters ---------- - imported_node : ImportedCallGraphNode + imported_node The imported node to process. Returns @@ -456,7 +456,7 @@ def _process_node(self, node: CallGraphNode) -> PurityResult: Parameters ---------- - node : CallGraphNode + node The node to process. Returns @@ -583,27 +583,27 @@ def infer_purity( Parameters ---------- - code : str | None + code The source code of the module. If None is provided, the package data must be provided (or else an exception is raised). - module_name : str, optional + module_name The name of the module, by default "". - path : str, optional + path The path of the module, by default None. - results : dict[NodeID, dict[NodeID, PurityResult]] | None + results The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. After the analysis of the module, the results are saved in this dictionary. All imported modules are saved in this dictionary too for further runtime reduction. Is None if no results are available. - package_data : PackageData | None + package_data The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. Returns ------- - purity_results : dict[NodeID, dict[NodeID, PurityResult]] + purity_results The purity results of the functions in the module. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. """ @@ -621,7 +621,7 @@ def get_purity_results( Parameters ---------- - src_dir_path : Path + src_dir_path The path of the source directory of the package. Returns diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index ef741364..ba31ad2e 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -42,26 +42,26 @@ class ReferenceResolver: Attributes ---------- - functions : dict[str, list[FunctionScope]] + functions The functions of the module. - classes : dict[str, ClassScope] + classes The classes of the module. - imports : dict[str, Import] + imports The imports of the module. - module_analysis_result : ModuleAnalysisResult + module_analysis_result The result of the reference resolving. - package_data_is_provided : bool + package_data_is_provided True if package data is given, False otherwise. Parameters ---------- - code : str + code The code of the module. - module_name : str + module_name The name of the module if any. - path : str | None + path The path of the module if any. - package_data : PackageData | None + package_data The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. @@ -112,9 +112,9 @@ def is_function_of_class(function: astroid.FunctionDef, klass: ClassScope) -> bo Parameters ---------- - function : astroid.FunctionDef + function The function to check. - klass : ClassScope + klass The class to check. Returns @@ -140,14 +140,14 @@ def merge_dicts( Parameters ---------- - d1 : dict[str, list[ReferenceNode]] + d1 The first dict. - d2 : dict[str, list[ReferenceNode]] + d2 The second dict. Returns ------- - d3 : dict[str, list[ReferenceNode]] + d3 The merged dict. """ d3 = d1.copy() @@ -169,9 +169,9 @@ def compare_parameters(function: FunctionScope, call: astroid.Call) -> bool: Parameters ---------- - function : FunctionScope + function The function to compare. - call : astroid.Call + call The call to compare. Returns @@ -253,9 +253,9 @@ def _find_call_references( Parameters ---------- - call_reference : Reference + call_reference The call reference which should be analyzed. - function : FunctionScope + function The function in which the call is made. Returns @@ -386,9 +386,9 @@ def _find_value_references( Parameters ---------- - value_reference : Reference + value_reference The value reference which should be analyzed. - function : FunctionScope + function The function in which the value is used. Returns @@ -589,9 +589,9 @@ def _find_target_references( Parameters ---------- - target_reference : Symbol + target_reference The target reference which should be analyzed. - function : FunctionScope + function The function in which the value is used. Returns @@ -876,13 +876,13 @@ def resolve_references( Parameters ---------- - code : str + code The code of the module. - module_name : str + module_name The name of the module if any. - path : str | None + path The path of the module if any. - package_data : PackageData | None + package_data The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py index addd5922..16168470 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py @@ -20,7 +20,7 @@ class CallGraphForest: Attributes ---------- - graphs : dict[str, CallGraphNode] + graphs The dictionary of call graph trees. The key is the name of the tree, the value is the root CallGraphNode of the tree. """ @@ -32,9 +32,9 @@ def add_graph(self, graph_id: NodeID, graph: CallGraphNode) -> None: Parameters ---------- - graph_id : NodeID + graph_id The NodeID of the tree node. - graph : CallGraphNode + graph The root of the tree. """ # if graph_id in self.forest: @@ -46,7 +46,7 @@ def get_graph(self, graph_id: NodeID) -> CallGraphNode: Parameters ---------- - graph_id : NodeID + graph_id The NodeID of the tree node to get. Raises @@ -64,7 +64,7 @@ def has_graph(self, graph_id: NodeID) -> bool: Parameters ---------- - graph_id : NodeID + graph_id The NodeID of the tree to check for. Returns @@ -79,7 +79,7 @@ def delete_graph(self, graph_id: NodeID) -> None: Parameters ---------- - graph_id : NodeID + graph_id The NodeID of the tree to delete. """ del self.graphs[graph_id] @@ -93,12 +93,12 @@ class CallGraphNode: Attributes ---------- - symbol : Symbol + symbol The symbol of the function that the node represents. - reasons : Reasons + reasons The raw Reasons for the node. After the call graph is built, this only contains reads_from and writes_to as well as unknown_calls. - children : dict[NodeID, CallGraphNode] + children The set of children of the node, (i.e., the set of nodes that this node calls) """ @@ -120,7 +120,7 @@ def add_child(self, child: CallGraphNode) -> None: Parameters ---------- - child : CallGraphNode + child The child to add. """ self.children[child.symbol.id] = child @@ -130,7 +130,7 @@ def get_child(self, child_id: NodeID) -> CallGraphNode: Parameters ---------- - child_id : NodeID + child_id The NodeID of the child to get. Raises @@ -148,7 +148,7 @@ def has_child(self, child_id: NodeID) -> bool: Parameters ---------- - child_id : NodeID + child_id The NodeID of the child to check for. Returns @@ -163,7 +163,7 @@ def delete_child(self, child_id: NodeID) -> None: Parameters ---------- - child_id : NodeID + child_id The NodeID of the child to delete. """ del self.children[child_id] @@ -191,7 +191,7 @@ class CombinedCallGraphNode(CallGraphNode): Attributes ---------- - combines : dict[NodeID, CallGraphNode] + combines A dictionary of all nodes that are combined into this node. This is later used for transferring the reasons of the combined node to the original nodes. """ diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py b/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py index 59bd4062..5a6fe292 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py @@ -18,14 +18,14 @@ class ModuleData: Attributes ---------- - scope : Scope + scope The module's scope, this contains all child scopes. - classes : dict[str, ClassScope] + classes All classes and their ClassScope. - functions : dict[str, list[FunctionScope]] + functions All functions and a list of their FunctionScopes. The value is a list since there can be multiple functions with the same name. - imports : dict[str, Import] + imports All imported symbols. """ @@ -42,13 +42,13 @@ class PackageData: Attributes ---------- - package_name : str + package_name The name of the package. - modules : dict[str, tuple[str, ModuleData]] + modules All modules and their ModuleData. The key is the name of the module. The value is a tuple of the path to the module and the ModuleData. - combined_module : ModuleData + combined_module The combined ModuleData of all modules in the package. """ @@ -94,18 +94,18 @@ class MemberAccess(astroid.NodeNG): Attributes ---------- - node : astroid.Attribute | astroid.AssignAttr + node The original node that represents the member access. Needed as fallback when determining the parent node if the receiver is None. - receiver : MemberAccess | astroid.NodeNG | None + receiver The receiver is the node that is accessed, it can be nested, e.g. `a` in `a.b` or `a.b` in `a.b.c`. The receiver can be nested. Is None if the receiver is not of type Name, Call or Attribute - member : str + member The member is the name of the node that accesses the receiver, e.g. `b` in `a.b`. parent : astroid.NodeNG | None The parent node of the member access. - name : str + name The name of the member access, e.g. `a.b`. Is set in __post_init__, after the member access has been created. If the MemberAccess is nested, the name of the receiver will be set to "UNKNOWN" since it is hard to determine @@ -152,7 +152,7 @@ def construct_member_access_target(cls, node: astroid.Attribute | astroid.Assign Parameters ---------- - node : astroid.Attribute | astroid.AssignAttr + node The node to construct the MemberAccessTarget node from. Returns @@ -203,7 +203,7 @@ def construct_member_access_value(cls, node: astroid.Attribute) -> MemberAccessV Parameters ---------- - node : astrid.Attribute + node The node to construct the MemberAccessValue node from. Returns @@ -234,15 +234,15 @@ class NodeID: Attributes ---------- - module : str | None + module The module of the node. Is None for combined nodes. - name : str + name The name of the node. - line : int + line The line of the node in the source code. Is None for combined nodes, builtins or any other node that do not have a line. - col : int | None + col The column of the node in the source code. Is None for combined nodes, builtins or any other node that do not have a line. """ @@ -326,7 +326,7 @@ def calc_node_id( Parameters ---------- - node : astroid.NodeNG | astroid.Module | astroid.ClassDef | astroid.FunctionDef | astroid.AssignName | astroid.Name | astroid.AssignAttr | astroid.Import | astroid.ImportFrom | astroid.Call | astroid.Lambda | astroid.ListComp | MemberAccess + node Returns ------- @@ -386,11 +386,11 @@ class Symbol(ABC): Attributes ---------- - node : astroid.NodeNG | MemberAccess + node The node that defines the symbol. - id : NodeID + id The id of that node. - name : str + name The name of the symbol (for easier access). """ @@ -414,7 +414,7 @@ class UnknownSymbol(Symbol): Attributes ---------- - node : None + node """ node: None = None @@ -470,7 +470,7 @@ class ClassVariable(Symbol): Attributes ---------- - klass : astroid.ClassDef | None + klass The class that defines the class variable. """ @@ -491,7 +491,7 @@ class InstanceVariable(Symbol): Attributes ---------- - klass : astroid.ClassDef | None + klass The class that defines the instance variable. """ @@ -512,22 +512,22 @@ class Import(Symbol): Attributes ---------- - node : astroid.ImportFrom | astroid.Import + node The node that defines the import. - name : str + name The name of the symbol that is imported if any is given. Else it is equal to the module name. - module : str + module The name of the module that is imported. - alias : str | None + alias If the node is of type Import alias is the alias name for the module name if any is given. If the node is of type ImportFrom alias is the alias name for the name of the symbol if any is given. - inferred_node : astroid.NodeNG | None + inferred_node When the import is used as a reference (or a symbol) the inferred_node is the node of the used reference (or symbol) in the original module. It was inferred by the reference analysis by using astroids safe_infer method. If the method could not infer the node, the inferred_node is None. - call: astroid.Call | None + call The original call node as fallback for the case, that the purity of the inferred_node cannot be inferred. Only is set if the symbol represents a call. """ @@ -558,7 +558,7 @@ class Builtin(Symbol): Attributes ---------- - call : astroid.Call + call The call node of the function. """ @@ -579,7 +579,7 @@ class BuiltinOpen(Builtin): Attributes ---------- - call : astroid.Call + call The call node of the open-like function. """ @@ -602,7 +602,7 @@ class CombinedSymbol(Symbol): Attributes ---------- - node : None + node """ @@ -625,11 +625,11 @@ class Reference: Attributes ---------- - node : astroid.Call | astroid.Name | MemberAccessValue + node The node that defines the symbol. - id : NodeID + id The id of that node. - name : str + name The name of the symbol (for easier access). """ @@ -656,12 +656,12 @@ class Scope: Attributes ---------- - _symbol : Symbol + _symbol The symbol that defines the scope. - _children : list[Scope | ClassScope] + _children The list of Scope or ClassScope instances that are defined in the scope of the Symbol node. Is None if the node is a leaf node. - _parent : Scope | ClassScope | None + _parent The parent node in the scope tree, there is None if the node is the root node. """ @@ -744,16 +744,16 @@ class ClassScope(Scope): Attributes ---------- - class_variables : dict[str, list[Symbol]] + class_variables The name of the class variable and a list of its Symbols (which represent a declaration). There can be multiple declarations of the same class variable, e.g. `a = 1` and `a = 2` since we cannot determine which one is used since we do not analyze the control flow. Also, it is impossible to distinguish between a declaration and a reassignment. - instance_variables : dict[str, list[Symbol]] + instance_variables The name of the instance variable and a list of its Symbols (which represent a declaration). - init_function : FunctionScope | None + init_function The init function of the class if it exists else None. - super_classes : list[ClassScope] + super_classes The list of superclasses of the class if any. """ @@ -771,18 +771,18 @@ class FunctionScope(Scope): Attributes ---------- - target_symbols : dict[str, list[Symbol]] + target_symbols The dict of all target nodes used inside the corresponding function. Target nodes are specified as all nodes that can be written to and which can be represented as a Symbol. This includes assignments, parameters, - value_references : dict[str, list[Reference]] + value_references The dict of all value nodes used inside the corresponding function. - call_references : dict[str, list[Reference]] + call_references The dict of all function calls inside the corresponding function. The key is the name of the call node, the value is a list of all References of call nodes with that name. - parameters : dict[str, Parameter] + parameters The parameters of the function. - globals_used : dict[str, list[GlobalVariable]] + globals_used The global variables used inside the function. It stores the globally assigned nodes (Assignment of the used variable). """ @@ -801,7 +801,7 @@ def remove_call_reference_by_id(self, call_id: str) -> None: Parameters ---------- - call_id : str + call_id The name of the call node to remove. """ self.call_references.pop(call_id, None) diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 50c5daca..3116bb80 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -35,7 +35,7 @@ class PurityResult(ABC): Purity results are either pure, impure or unknown. - is_class : bool + is_class Whether the result is for a class or not. """ @@ -62,7 +62,7 @@ class Pure(PurityResult): Attributes ---------- - is_class : bool + is_class Whether the result is for a class or not. """ @@ -73,7 +73,7 @@ def update(self, other: PurityResult | None) -> PurityResult: Parameters ---------- - other : PurityResult | None + other The result to update with. Returns @@ -129,9 +129,9 @@ class Impure(PurityResult): Attributes ---------- - reasons : set[ImpurityReason] + reasons The reasons why the function is impure. - is_class : bool + is_class Whether the result is for a class or not. """ @@ -143,7 +143,7 @@ def update(self, other: PurityResult | None) -> PurityResult: Parameters ---------- - other : PurityResult | None + other The result to update with. Returns @@ -262,9 +262,9 @@ class NonLocalVariableRead(Read): Attributes ---------- - symbol : GlobalVariable | ClassVariable | InstanceVariable | Import + symbol The symbol that is read. - origin : Symbol | NodeID | None + origin The origin of the read. """ @@ -293,9 +293,9 @@ class FileRead(Read): Attributes ---------- - source : Expression | None + source The source of the read. - origin : Symbol | NodeID | None + origin The origin of the read. """ @@ -330,9 +330,9 @@ class NonLocalVariableWrite(Write): Attributes ---------- - symbol : GlobalVariable | ClassVariable | InstanceVariable | Import + symbol The symbol that is written to. - origin : Symbol | NodeID | None + origin The origin of the write. """ @@ -361,9 +361,9 @@ class FileWrite(Write): Attributes ---------- - source : Expression + source The source of the write. - origin : Symbol | NodeID | None + origin The origin of the write. """ @@ -398,9 +398,9 @@ class UnknownProto(Unknown): Attributes ---------- - symbol : Symbol | Reference + symbol The symbol or reference object which is not fully determined. - origin : Symbol | NodeID | None + origin The origin of the unknown call. """ @@ -431,9 +431,9 @@ class UnknownCall(Unknown): Attributes ---------- - expression : Expression + expression The expression that is called. - origin : Symbol | NodeID | None + origin The origin of the call. """ @@ -464,9 +464,9 @@ class NativeCall(Unknown): # ExternalCall Attributes ---------- - expression : Expression + expression The expression that is called. - origin : Symbol | NodeID | None + origin The origin of the call. """ @@ -501,9 +501,9 @@ class CallOfParameter(Unknown): # ParameterCall Attributes ---------- - expression : Expression + expression The expression that is called. - origin : Symbol | NodeID | None + origin The origin of the call. """ @@ -543,7 +543,7 @@ class ParameterAccess(Expression): Attributes ---------- - parameter : Parameter + parameter The parameter that is accessed. """ @@ -561,7 +561,7 @@ class StringLiteral(Expression): Attributes ---------- - value : str + value The name of the string literal. """ @@ -577,11 +577,11 @@ class UnknownFunctionCall(Expression): Attributes ---------- - call : astroid.Call + call The call node. - inferred_def : astroid.FunctionDef | None + inferred_def The inferred function definition for the call if it is known. - name : str + name The name of the call. """ @@ -613,11 +613,11 @@ class UnknownClassInit(Expression): Attributes ---------- - call : astroid.Call + call The call node. - inferred_def : astroid.ClassDef | None + inferred_def The inferred class definition for the call if it is known. - name : str + name The name of the call. """ diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index f1830a23..9be39b12 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -37,11 +37,11 @@ class ReferenceNode(ABC): Attributes ---------- - node : astroid.Name | astroid.AssignName | astroid.Call | MemberAccessTarget | MemberAccessValue + node The node that references the symbols. - scope : Scope + scope The scope of the node. - referenced_symbols : list[Symbol] + referenced_symbols The list of referenced symbols. These are the symbols of the nodes that node references. """ @@ -95,17 +95,17 @@ class ModuleAnalysisResult: Attributes ---------- - resolved_references : dict[str, list[ValueReference | TargetReference]] + resolved_references The dictionary of references. The key is the name of the reference node, the value is the list of ReferenceNodes. - raw_reasons : dict[NodeID, Reasons] + raw_reasons The dictionary of function references. The key is the NodeID of the function, the value is the Reasons for the function. - classes : dict[str, ClassScope] + classes All classes and their ClassScope. call_graph_forest : CallGraphForest The call graph forest of the module. - module_id : NodeID | None + module_id The NodeID of the module which the analysis result belongs to. """ @@ -125,22 +125,22 @@ class Reasons: Attributes ---------- - function_scope : FunctionScope | None + function_scope The scope of the function which the reasons belong to. Is None if the reasons are not for a FunctionDef node. This is the case when either a builtin or a combined node is created, or a ClassScope is used to propagate reasons. - writes_to : dict[NodeID, NonLocalVariableWrite] + writes_to A dict of all nodes that are written to. - reads_from : dict[NodeID, NonLocalVariableRead] + reads_from A dict of all nodes that are read from. - calls : set[Symbol] + calls A set of all nodes that are called. - result : PurityResult | None + result The result of the purity analysis This also works as a flag to determine if the purity analysis has already been performed: If it is None, the purity analysis has not been performed - unknown_calls : dict[NodeID, UnknownProto] + unknown_calls A dict of all unknown calls. Unknown calls are calls to functions that are not defined in the module or are parameters. """ @@ -160,7 +160,7 @@ def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: Parameters ---------- - reasons_list : list[Reasons] + reasons_list The list of Reasons objects. @@ -190,7 +190,7 @@ def join_reasons(self, other: Reasons) -> Reasons: Parameters ---------- - other : Reasons + other The other Reasons object. Returns @@ -210,7 +210,7 @@ def remove_unknown_call(self, node_id: NodeID) -> None: Parameters ---------- - node_id : NodeID + node_id The NodeID of the unknown call to remove. """ del self.unknown_calls[node_id] From 01524203119df9995f3fa8dbd91285ce9f883cb5 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Tue, 18 Jun 2024 19:31:29 +0200 Subject: [PATCH 13/17] fix: added back necessary annotations from docstrings --- .../api/purity_analysis/_build_call_graph.py | 42 ++++---- .../api/purity_analysis/_get_module_data.py | 60 ++++++------ .../api/purity_analysis/_infer_purity.py | 44 ++++----- .../purity_analysis/_resolve_references.py | 52 +++++----- .../api/purity_analysis/model/_call_graph.py | 28 +++--- .../api/purity_analysis/model/_module_data.py | 98 +++++++++---------- .../api/purity_analysis/model/_purity.py | 60 ++++++------ .../api/purity_analysis/model/_reference.py | 32 +++--- 8 files changed, 208 insertions(+), 208 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index bd90fc62..22be1d1f 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -22,21 +22,21 @@ class CallGraphBuilder: Attributes ---------- - classes + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons + raw_reasons : The raw reasons for impurity for all functions. Keys are the ids of the functions. - call_graph_forest + call_graph_forest : The call graph forest for the given functions. - visited + visited : A set of all visited nodes. Parameters ---------- - classes + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons + raw_reasons : The raw reasons for impurity for all functions. Keys are the ids of the functions. """ @@ -60,7 +60,7 @@ def _build_call_graph_forest(self) -> CallGraphForest: Returns ------- - call_graph_forest + call_graph_forest : CallGraphForest The call graph forest for the given functions. """ # Prepare the classes for the call graph. @@ -132,7 +132,7 @@ def _built_call_graph(self, reason: Reasons) -> None: Parameters ---------- - reason + reason : The raw reasons of the function. """ # If the node has already been visited, return @@ -192,9 +192,9 @@ def _handle_unknown_call(self, call: Symbol, reason: Reasons) -> None: Parameters ---------- - call + call : The call that is unknown. - reason + reason : The reason of the function that contains the unknown call. """ # Deal with the case that the call calls an imported function. @@ -241,7 +241,7 @@ def _handle_cycles(self, removed_nodes: set[NodeID] | None = None) -> None: Parameters ---------- - removed_nodes + removed_nodes : A set of all removed nodes. If not given, a new set is created. """ @@ -272,16 +272,16 @@ def _test_cgn_for_cycles( Parameters ---------- - cgn + cgn : The current node in the graph that is visited. - visited_nodes + visited_nodes : A set of all visited nodes. - path + path : A list of all nodes in the current path. Returns ------- - cycle + cycle : dict[NodeID, NewCallGraphNode] Dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. Returns an empty dict if no cycle is found. @@ -326,7 +326,7 @@ def _contract_cycle(self, cycle: dict[NodeID, CallGraphNode]) -> None: Parameters ---------- - cycle + cycle : A dict of all nodes in the cycle. Keys are the NodeIDs of the CallGraphNodes. """ @@ -378,10 +378,10 @@ def _update_pointers(self, cycle: dict[NodeID, CallGraphNode], combined_node: Co Parameters ---------- - cycle + cycle : A dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. - combined_node + combined_node : The combined node that replaces all nodes in the cycle. """ for graph in self.call_graph_forest.graphs.values(): @@ -396,15 +396,15 @@ def build_call_graph(classes: dict[str, ClassScope], raw_reasons: dict[NodeID, R Parameters ---------- - classes + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - raw_reasons + raw_reasons : The raw reasons for impurity for all functions. Keys are the ids of the functions. Returns ------- - call_graph_forest + call_graph_forest : CallGraphForest The call graph forest for the given functions. """ return CallGraphBuilder(classes, raw_reasons).call_graph_forest diff --git a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py index 8e149162..fbab8d5d 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/_get_module_data.py @@ -40,33 +40,33 @@ class ModuleDataBuilder: Attributes ---------- - current_node_stack + current_node_stack : Stack of nodes that are currently visited by the ASTWalker. The last node in the stack is the current node. It Is only used while walking the AST. - current_function_def + current_function_def : Stack of FunctionScopes that are currently visited by the ASTWalker. The top of the stack is the current function definition. It is only used while walking the AST. - children + children : All found children nodes are stored in children until their scope is determined. After the AST is completely walked, the resulting "Module"- Scope is stored in children. (children[0]) - targets + targets : All found targets are stored in targets until their scope is determined. - values + values : All found names are stored in names until their scope is determined. It Is only used while walking the AST. - calls + calls : All calls found on function level are stored in calls until their scope is determined. It Is only used while walking the AST. - classes + classes : Classnames in the module as key and their corresponding ClassScope instance as value. - functions + functions : Function names in the module as key and a list of their corresponding FunctionScope instances as value. - global_variables + global_variables : All global variables and their corresponding Scope instance. - imports + imports : All imports and their corresponding Import instance. """ @@ -92,7 +92,7 @@ def has_assignattr_parent(node: astroid.Attribute) -> bool: Parameters ---------- - node + node : The node whose parents are to be checked. Returns @@ -116,9 +116,9 @@ def get_symbol(self, node: astroid.NodeNG, current_scope: astroid.NodeNG | None) Parameters ---------- - node + node : The node whose symbol is to be determined. - current_scope + current_scope : The current scope of the node (is None if the node is the module node). """ match current_scope: @@ -220,7 +220,7 @@ def _detect_scope(self, current_node: astroid.NodeNG) -> None: Parameters ---------- - current_node + current_node : The node whose scope is to be determined. """ outer_scope_children: list[Scope] = [] @@ -276,7 +276,7 @@ def _analyze_class(self, current_node: astroid.ClassDef) -> None: Parameters ---------- - current_node + current_node : The node to analyze. """ if not isinstance(current_node, astroid.ClassDef): @@ -304,7 +304,7 @@ def _analyze_function(self, current_node: astroid.FunctionDef) -> None: Parameters ---------- - current_node + current_node : The node to analyze. """ if not isinstance(current_node, astroid.FunctionDef): @@ -519,7 +519,7 @@ def find_first_parent_function(self, node: astroid.NodeNG | MemberAccess) -> ast Parameters ---------- - node + node : The node to start the search from. Returns @@ -543,9 +543,9 @@ def handle_arg(self, node: astroid.AssignName, kind: ParameterKind) -> None: Parameters ---------- - node + node : The node that is to be handled. - kind + kind : The kind of the parameter. """ scope_node = Scope( @@ -562,9 +562,9 @@ def add_arg_to_function_scope_parameters(self, argument: astroid.AssignName, kin Parameters ---------- - argument + argument : The argument node to add to the parameter dict. - kind + kind : The kind of the parameter. """ if isinstance(self.current_node_stack[-1], FunctionScope): @@ -580,9 +580,9 @@ def is_annotated(self, node: astroid.NodeNG | MemberAccess, found_annotation_nod Parameters ---------- - node + node : The node to check. - found_annotation_node + found_annotation_node : A bool that indicates if an annotation node is found. Returns @@ -621,9 +621,9 @@ def check_if_global(self, name: str, node: astroid.NodeNG) -> list[astroid.Assig Parameters ---------- - name + name : The variable name to check. - node + node : The node whose root is to be checked. Returns @@ -649,7 +649,7 @@ def find_base_classes(self, node: astroid.ClassDef) -> list[ClassScope]: Parameters ---------- - node + node : The class whose base classes are to be found. Returns @@ -676,7 +676,7 @@ def enter_module(self, node: astroid.Module) -> None: Parameters ---------- - node + node : The module node to enter. """ self.current_node_stack.append( @@ -1218,11 +1218,11 @@ def get_module_data(code: str, module_name: str = "", path: str | None = None) - Parameters ---------- - code + code : The source code of the module whose module data is to be found. - module_name + module_name : The name of the module, by default "". - path + path : The path of the module, by default None. Returns diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index 5d2e8814..1b59c6fc 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -50,20 +50,20 @@ class PurityAnalyzer: Attributes ---------- - module_id + module_id : The ID of the module to analyze. - visited_nodes + visited_nodes : A set of all nodes that have been visited during the analysis. - call_graph_forest + call_graph_forest : The call graph forest of the module. - current_purity_results + current_purity_results : The purity results of the functions in the module. - separated_nodes + separated_nodes : If the module has cycles, they will be found by the CallGraphBuilder and combined to a single node. Since these combined nodes are not part of the module but needed for the analysis, their purity results will be propagated to the original nodes during the analysis. This attribute stores the original nodes inside after the combined node was analyzed. - cached_module_results + cached_module_results : The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. @@ -72,18 +72,18 @@ class PurityAnalyzer: Parameters ---------- - code + code : The source code of the module. If None is provided, the package data must be provided (or else an exception is raised). - module_name + module_name : The name of the module. - path + path : The path of the module. - results + results : The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. - package_data + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. @@ -125,7 +125,7 @@ def _handle_open_like_functions(call: astroid.Call) -> PurityResult: Parameters ---------- - call + call : The call to check. Returns @@ -210,7 +210,7 @@ def _get_impurity_result(reasons: Reasons) -> PurityResult: Parameters ---------- - reasons + reasons : The node to process containing the raw reasons for impurity collected. Returns @@ -309,7 +309,7 @@ def _process_imported_node(self, imported_node: ImportedCallGraphNode) -> Purity Parameters ---------- - imported_node + imported_node : The imported node to process. Returns @@ -456,7 +456,7 @@ def _process_node(self, node: CallGraphNode) -> PurityResult: Parameters ---------- - node + node : The node to process. Returns @@ -583,27 +583,27 @@ def infer_purity( Parameters ---------- - code + code : The source code of the module. If None is provided, the package data must be provided (or else an exception is raised). - module_name + module_name : The name of the module, by default "". - path + path : The path of the module, by default None. - results + results : The results of all previously analyzed modules. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. After the analysis of the module, the results are saved in this dictionary. All imported modules are saved in this dictionary too for further runtime reduction. Is None if no results are available. - package_data + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. Returns ------- - purity_results + purity_results : dict[NodeID, dict[NodeID, PurityResult]] The purity results of the functions in the module. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. """ @@ -621,7 +621,7 @@ def get_purity_results( Parameters ---------- - src_dir_path + src_dir_path : The path of the source directory of the package. Returns diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index ba31ad2e..66f43d0a 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -42,26 +42,26 @@ class ReferenceResolver: Attributes ---------- - functions + functions : The functions of the module. - classes + classes : The classes of the module. - imports + imports : The imports of the module. - module_analysis_result + module_analysis_result : The result of the reference resolving. - package_data_is_provided + package_data_is_provided : True if package data is given, False otherwise. Parameters ---------- - code + code : The code of the module. - module_name + module_name : The name of the module if any. - path + path : The path of the module if any. - package_data + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. @@ -112,9 +112,9 @@ def is_function_of_class(function: astroid.FunctionDef, klass: ClassScope) -> bo Parameters ---------- - function + function : The function to check. - klass + klass : The class to check. Returns @@ -140,14 +140,14 @@ def merge_dicts( Parameters ---------- - d1 + d1 : The first dict. - d2 + d2 : The second dict. Returns ------- - d3 + d3 : dict[str, list[ReferenceNode]] The merged dict. """ d3 = d1.copy() @@ -169,9 +169,9 @@ def compare_parameters(function: FunctionScope, call: astroid.Call) -> bool: Parameters ---------- - function + function : The function to compare. - call + call : The call to compare. Returns @@ -253,9 +253,9 @@ def _find_call_references( Parameters ---------- - call_reference + call_reference : The call reference which should be analyzed. - function + function : The function in which the call is made. Returns @@ -386,9 +386,9 @@ def _find_value_references( Parameters ---------- - value_reference + value_reference : The value reference which should be analyzed. - function + function : The function in which the value is used. Returns @@ -589,9 +589,9 @@ def _find_target_references( Parameters ---------- - target_reference + target_reference : The target reference which should be analyzed. - function + function : The function in which the value is used. Returns @@ -876,13 +876,13 @@ def resolve_references( Parameters ---------- - code + code : The code of the module. - module_name + module_name : The name of the module if any. - path + path : The path of the module if any. - package_data + package_data : The module data of all modules the package. If provided, the references are resolved with the package data, else the module data is collected first. It is used for the inference of the purity between modules in the package. diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py index 16168470..43e13ebc 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_call_graph.py @@ -20,7 +20,7 @@ class CallGraphForest: Attributes ---------- - graphs + graphs : The dictionary of call graph trees. The key is the name of the tree, the value is the root CallGraphNode of the tree. """ @@ -32,9 +32,9 @@ def add_graph(self, graph_id: NodeID, graph: CallGraphNode) -> None: Parameters ---------- - graph_id + graph_id : The NodeID of the tree node. - graph + graph : The root of the tree. """ # if graph_id in self.forest: @@ -46,7 +46,7 @@ def get_graph(self, graph_id: NodeID) -> CallGraphNode: Parameters ---------- - graph_id + graph_id : The NodeID of the tree node to get. Raises @@ -64,7 +64,7 @@ def has_graph(self, graph_id: NodeID) -> bool: Parameters ---------- - graph_id + graph_id : The NodeID of the tree to check for. Returns @@ -79,7 +79,7 @@ def delete_graph(self, graph_id: NodeID) -> None: Parameters ---------- - graph_id + graph_id : The NodeID of the tree to delete. """ del self.graphs[graph_id] @@ -93,12 +93,12 @@ class CallGraphNode: Attributes ---------- - symbol + symbol : The symbol of the function that the node represents. - reasons + reasons : The raw Reasons for the node. After the call graph is built, this only contains reads_from and writes_to as well as unknown_calls. - children + children : The set of children of the node, (i.e., the set of nodes that this node calls) """ @@ -120,7 +120,7 @@ def add_child(self, child: CallGraphNode) -> None: Parameters ---------- - child + child : The child to add. """ self.children[child.symbol.id] = child @@ -130,7 +130,7 @@ def get_child(self, child_id: NodeID) -> CallGraphNode: Parameters ---------- - child_id + child_id : The NodeID of the child to get. Raises @@ -148,7 +148,7 @@ def has_child(self, child_id: NodeID) -> bool: Parameters ---------- - child_id + child_id : The NodeID of the child to check for. Returns @@ -163,7 +163,7 @@ def delete_child(self, child_id: NodeID) -> None: Parameters ---------- - child_id + child_id : The NodeID of the child to delete. """ del self.children[child_id] @@ -191,7 +191,7 @@ class CombinedCallGraphNode(CallGraphNode): Attributes ---------- - combines + combines : A dictionary of all nodes that are combined into this node. This is later used for transferring the reasons of the combined node to the original nodes. """ diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py b/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py index 5a6fe292..e7c74fb5 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_module_data.py @@ -18,14 +18,14 @@ class ModuleData: Attributes ---------- - scope + scope : The module's scope, this contains all child scopes. - classes + classes : All classes and their ClassScope. - functions + functions : All functions and a list of their FunctionScopes. The value is a list since there can be multiple functions with the same name. - imports + imports : All imported symbols. """ @@ -42,13 +42,13 @@ class PackageData: Attributes ---------- - package_name + package_name : The name of the package. - modules + modules : All modules and their ModuleData. The key is the name of the module. The value is a tuple of the path to the module and the ModuleData. - combined_module + combined_module : ModuleData The combined ModuleData of all modules in the package. """ @@ -94,18 +94,18 @@ class MemberAccess(astroid.NodeNG): Attributes ---------- - node + node : The original node that represents the member access. Needed as fallback when determining the parent node if the receiver is None. - receiver + receiver : The receiver is the node that is accessed, it can be nested, e.g. `a` in `a.b` or `a.b` in `a.b.c`. The receiver can be nested. Is None if the receiver is not of type Name, Call or Attribute - member + member : The member is the name of the node that accesses the receiver, e.g. `b` in `a.b`. parent : astroid.NodeNG | None The parent node of the member access. - name + name : The name of the member access, e.g. `a.b`. Is set in __post_init__, after the member access has been created. If the MemberAccess is nested, the name of the receiver will be set to "UNKNOWN" since it is hard to determine @@ -152,7 +152,7 @@ def construct_member_access_target(cls, node: astroid.Attribute | astroid.Assign Parameters ---------- - node + node : The node to construct the MemberAccessTarget node from. Returns @@ -203,7 +203,7 @@ def construct_member_access_value(cls, node: astroid.Attribute) -> MemberAccessV Parameters ---------- - node + node : The node to construct the MemberAccessValue node from. Returns @@ -234,15 +234,15 @@ class NodeID: Attributes ---------- - module + module : The module of the node. Is None for combined nodes. - name + name : The name of the node. - line + line : The line of the node in the source code. Is None for combined nodes, builtins or any other node that do not have a line. - col + col : The column of the node in the source code. Is None for combined nodes, builtins or any other node that do not have a line. """ @@ -326,7 +326,7 @@ def calc_node_id( Parameters ---------- - node + node : Returns ------- @@ -386,11 +386,11 @@ class Symbol(ABC): Attributes ---------- - node + node : The node that defines the symbol. - id + id : The id of that node. - name + name : The name of the symbol (for easier access). """ @@ -414,7 +414,7 @@ class UnknownSymbol(Symbol): Attributes ---------- - node + node : """ node: None = None @@ -470,7 +470,7 @@ class ClassVariable(Symbol): Attributes ---------- - klass + klass : The class that defines the class variable. """ @@ -491,7 +491,7 @@ class InstanceVariable(Symbol): Attributes ---------- - klass + klass : The class that defines the instance variable. """ @@ -512,22 +512,22 @@ class Import(Symbol): Attributes ---------- - node + node : The node that defines the import. - name + name : The name of the symbol that is imported if any is given. Else it is equal to the module name. - module + module : The name of the module that is imported. - alias + alias : If the node is of type Import alias is the alias name for the module name if any is given. If the node is of type ImportFrom alias is the alias name for the name of the symbol if any is given. - inferred_node + inferred_node : When the import is used as a reference (or a symbol) the inferred_node is the node of the used reference (or symbol) in the original module. It was inferred by the reference analysis by using astroids safe_infer method. If the method could not infer the node, the inferred_node is None. - call + call : The original call node as fallback for the case, that the purity of the inferred_node cannot be inferred. Only is set if the symbol represents a call. """ @@ -558,7 +558,7 @@ class Builtin(Symbol): Attributes ---------- - call + call : The call node of the function. """ @@ -579,7 +579,7 @@ class BuiltinOpen(Builtin): Attributes ---------- - call + call : The call node of the open-like function. """ @@ -602,7 +602,7 @@ class CombinedSymbol(Symbol): Attributes ---------- - node + node : """ @@ -625,11 +625,11 @@ class Reference: Attributes ---------- - node + node : The node that defines the symbol. - id + id : The id of that node. - name + name : The name of the symbol (for easier access). """ @@ -656,12 +656,12 @@ class Scope: Attributes ---------- - _symbol + _symbol : The symbol that defines the scope. - _children + _children : The list of Scope or ClassScope instances that are defined in the scope of the Symbol node. Is None if the node is a leaf node. - _parent + _parent : The parent node in the scope tree, there is None if the node is the root node. """ @@ -744,16 +744,16 @@ class ClassScope(Scope): Attributes ---------- - class_variables + class_variables : The name of the class variable and a list of its Symbols (which represent a declaration). There can be multiple declarations of the same class variable, e.g. `a = 1` and `a = 2` since we cannot determine which one is used since we do not analyze the control flow. Also, it is impossible to distinguish between a declaration and a reassignment. - instance_variables + instance_variables : The name of the instance variable and a list of its Symbols (which represent a declaration). - init_function + init_function : The init function of the class if it exists else None. - super_classes + super_classes : The list of superclasses of the class if any. """ @@ -771,18 +771,18 @@ class FunctionScope(Scope): Attributes ---------- - target_symbols + target_symbols : The dict of all target nodes used inside the corresponding function. Target nodes are specified as all nodes that can be written to and which can be represented as a Symbol. This includes assignments, parameters, - value_references + value_references : The dict of all value nodes used inside the corresponding function. - call_references + call_references : The dict of all function calls inside the corresponding function. The key is the name of the call node, the value is a list of all References of call nodes with that name. - parameters + parameters : The parameters of the function. - globals_used + globals_used : The global variables used inside the function. It stores the globally assigned nodes (Assignment of the used variable). """ @@ -801,7 +801,7 @@ def remove_call_reference_by_id(self, call_id: str) -> None: Parameters ---------- - call_id + call_id : The name of the call node to remove. """ self.call_references.pop(call_id, None) diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 3116bb80..18e141c5 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -35,7 +35,7 @@ class PurityResult(ABC): Purity results are either pure, impure or unknown. - is_class + is_class : Whether the result is for a class or not. """ @@ -62,7 +62,7 @@ class Pure(PurityResult): Attributes ---------- - is_class + is_class : Whether the result is for a class or not. """ @@ -73,7 +73,7 @@ def update(self, other: PurityResult | None) -> PurityResult: Parameters ---------- - other + other : The result to update with. Returns @@ -129,9 +129,9 @@ class Impure(PurityResult): Attributes ---------- - reasons + reasons : The reasons why the function is impure. - is_class + is_class : Whether the result is for a class or not. """ @@ -143,7 +143,7 @@ def update(self, other: PurityResult | None) -> PurityResult: Parameters ---------- - other + other : The result to update with. Returns @@ -262,9 +262,9 @@ class NonLocalVariableRead(Read): Attributes ---------- - symbol + symbol : The symbol that is read. - origin + origin : The origin of the read. """ @@ -293,9 +293,9 @@ class FileRead(Read): Attributes ---------- - source + source : The source of the read. - origin + origin : The origin of the read. """ @@ -330,9 +330,9 @@ class NonLocalVariableWrite(Write): Attributes ---------- - symbol + symbol : The symbol that is written to. - origin + origin : The origin of the write. """ @@ -361,9 +361,9 @@ class FileWrite(Write): Attributes ---------- - source + source : The source of the write. - origin + origin : The origin of the write. """ @@ -398,9 +398,9 @@ class UnknownProto(Unknown): Attributes ---------- - symbol + symbol : The symbol or reference object which is not fully determined. - origin + origin : The origin of the unknown call. """ @@ -431,9 +431,9 @@ class UnknownCall(Unknown): Attributes ---------- - expression + expression : The expression that is called. - origin + origin : The origin of the call. """ @@ -464,9 +464,9 @@ class NativeCall(Unknown): # ExternalCall Attributes ---------- - expression + expression : The expression that is called. - origin + origin : The origin of the call. """ @@ -501,9 +501,9 @@ class CallOfParameter(Unknown): # ParameterCall Attributes ---------- - expression + expression : The expression that is called. - origin + origin : The origin of the call. """ @@ -543,7 +543,7 @@ class ParameterAccess(Expression): Attributes ---------- - parameter + parameter : The parameter that is accessed. """ @@ -561,7 +561,7 @@ class StringLiteral(Expression): Attributes ---------- - value + value : The name of the string literal. """ @@ -577,11 +577,11 @@ class UnknownFunctionCall(Expression): Attributes ---------- - call + call : The call node. - inferred_def + inferred_def : The inferred function definition for the call if it is known. - name + name : The name of the call. """ @@ -613,11 +613,11 @@ class UnknownClassInit(Expression): Attributes ---------- - call + call : The call node. - inferred_def + inferred_def : The inferred class definition for the call if it is known. - name + name : The name of the call. """ diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py index 9be39b12..18d819e6 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_reference.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_reference.py @@ -37,11 +37,11 @@ class ReferenceNode(ABC): Attributes ---------- - node + node : The node that references the symbols. - scope + scope : The scope of the node. - referenced_symbols + referenced_symbols : The list of referenced symbols. These are the symbols of the nodes that node references. """ @@ -95,17 +95,17 @@ class ModuleAnalysisResult: Attributes ---------- - resolved_references + resolved_references : The dictionary of references. The key is the name of the reference node, the value is the list of ReferenceNodes. - raw_reasons + raw_reasons : The dictionary of function references. The key is the NodeID of the function, the value is the Reasons for the function. - classes + classes : All classes and their ClassScope. call_graph_forest : CallGraphForest The call graph forest of the module. - module_id + module_id : The NodeID of the module which the analysis result belongs to. """ @@ -125,22 +125,22 @@ class Reasons: Attributes ---------- - function_scope + function_scope : The scope of the function which the reasons belong to. Is None if the reasons are not for a FunctionDef node. This is the case when either a builtin or a combined node is created, or a ClassScope is used to propagate reasons. - writes_to + writes_to : A dict of all nodes that are written to. - reads_from + reads_from : A dict of all nodes that are read from. - calls + calls : A set of all nodes that are called. - result + result : The result of the purity analysis This also works as a flag to determine if the purity analysis has already been performed: If it is None, the purity analysis has not been performed - unknown_calls + unknown_calls : A dict of all unknown calls. Unknown calls are calls to functions that are not defined in the module or are parameters. """ @@ -160,7 +160,7 @@ def join_reasons_list(self, reasons_list: list[Reasons]) -> Reasons: Parameters ---------- - reasons_list + reasons_list : The list of Reasons objects. @@ -190,7 +190,7 @@ def join_reasons(self, other: Reasons) -> Reasons: Parameters ---------- - other + other : The other Reasons object. Returns @@ -210,7 +210,7 @@ def remove_unknown_call(self, node_id: NodeID) -> None: Parameters ---------- - node_id + node_id : The NodeID of the unknown call to remove. """ del self.unknown_calls[node_id] From 7069e07128b337674d1544bb9203befb6ea4abd7 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Tue, 18 Jun 2024 19:35:59 +0200 Subject: [PATCH 14/17] feat: activated short mode as default (only the reason count will be listed in the result file) --- src/library_analyzer/cli/_run_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/library_analyzer/cli/_run_api.py b/src/library_analyzer/cli/_run_api.py index 2689eb9e..1f9e75df 100644 --- a/src/library_analyzer/cli/_run_api.py +++ b/src/library_analyzer/cli/_run_api.py @@ -36,4 +36,4 @@ def _run_api_command( api_purity = get_purity_results(src_dir_path) out_file_api_purity = out_dir_path.joinpath(f"{package}__api_purity.json") - api_purity.to_json_file(out_file_api_purity) + api_purity.to_json_file(out_file_api_purity, True) # True shortens the results to only contain the count of each reason. From 9475a82035151337770cf285672258b2fc2038c6 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Tue, 18 Jun 2024 20:05:31 +0200 Subject: [PATCH 15/17] fix: linter error --- src/library_analyzer/cli/_run_api.py | 2 +- .../processing/api/purity_analysis/model/_purity.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/library_analyzer/cli/_run_api.py b/src/library_analyzer/cli/_run_api.py index 1f9e75df..cdb2b17a 100644 --- a/src/library_analyzer/cli/_run_api.py +++ b/src/library_analyzer/cli/_run_api.py @@ -36,4 +36,4 @@ def _run_api_command( api_purity = get_purity_results(src_dir_path) out_file_api_purity = out_dir_path.joinpath(f"{package}__api_purity.json") - api_purity.to_json_file(out_file_api_purity, True) # True shortens the results to only contain the count of each reason. + api_purity.to_json_file(out_file_api_purity) # Shorten is set to True by default, therefore the results will only contain the count of each reason. diff --git a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py index 18e141c5..80649a3a 100644 --- a/src/library_analyzer/processing/api/purity_analysis/model/_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/model/_purity.py @@ -652,7 +652,7 @@ class APIPurity: purity_results: typing.ClassVar[dict[NodeID, dict[NodeID, PurityResult]]] = {} - def to_json_file(self, path: Path, shorten: bool = False) -> None: + def to_json_file(self, path: Path, shorten: bool = True) -> None: ensure_file_exists(path) with path.open("w") as f: json.dump(self.to_dict(shorten), f, indent=2) From 6cd89ab9d735c86ed1f1d649baeff297e8e5afa3 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Tue, 18 Jun 2024 18:07:15 +0000 Subject: [PATCH 16/17] style: apply automated linter fixes --- src/library_analyzer/cli/_run_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/library_analyzer/cli/_run_api.py b/src/library_analyzer/cli/_run_api.py index cdb2b17a..ca259cc3 100644 --- a/src/library_analyzer/cli/_run_api.py +++ b/src/library_analyzer/cli/_run_api.py @@ -36,4 +36,6 @@ def _run_api_command( api_purity = get_purity_results(src_dir_path) out_file_api_purity = out_dir_path.joinpath(f"{package}__api_purity.json") - api_purity.to_json_file(out_file_api_purity) # Shorten is set to True by default, therefore the results will only contain the count of each reason. + api_purity.to_json_file( + out_file_api_purity, + ) # Shorten is set to True by default, therefore the results will only contain the count of each reason. From 1737a01efe35b8f31c764c65421bee048f996898 Mon Sep 17 00:00:00 2001 From: lukarade <84092952+lukarade@users.noreply.github.com> Date: Tue, 18 Jun 2024 20:19:53 +0200 Subject: [PATCH 17/17] fix: removed return types in docstrings (again) --- .../processing/api/purity_analysis/_build_call_graph.py | 6 +++--- .../processing/api/purity_analysis/_infer_purity.py | 2 +- .../processing/api/purity_analysis/_resolve_references.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py index 22be1d1f..458e8db6 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py +++ b/src/library_analyzer/processing/api/purity_analysis/_build_call_graph.py @@ -60,7 +60,7 @@ def _build_call_graph_forest(self) -> CallGraphForest: Returns ------- - call_graph_forest : CallGraphForest + call_graph_forest : The call graph forest for the given functions. """ # Prepare the classes for the call graph. @@ -281,7 +281,7 @@ def _test_cgn_for_cycles( Returns ------- - cycle : dict[NodeID, NewCallGraphNode] + cycle : Dict of all nodes in the cycle. Keys are the NodeIDs of the nodes. Returns an empty dict if no cycle is found. @@ -404,7 +404,7 @@ def build_call_graph(classes: dict[str, ClassScope], raw_reasons: dict[NodeID, R Returns ------- - call_graph_forest : CallGraphForest + call_graph_forest : The call graph forest for the given functions. """ return CallGraphBuilder(classes, raw_reasons).call_graph_forest diff --git a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py index 1b59c6fc..4d00f535 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py +++ b/src/library_analyzer/processing/api/purity_analysis/_infer_purity.py @@ -603,7 +603,7 @@ def infer_purity( Returns ------- - purity_results : dict[NodeID, dict[NodeID, PurityResult]] + purity_results : The purity results of the functions in the module. The key is the NodeID of the module, the value is a dictionary of the purity results of the functions in the module. """ diff --git a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py index 66f43d0a..baab8923 100644 --- a/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py +++ b/src/library_analyzer/processing/api/purity_analysis/_resolve_references.py @@ -147,7 +147,7 @@ def merge_dicts( Returns ------- - d3 : dict[str, list[ReferenceNode]] + d3 : The merged dict. """ d3 = d1.copy()