From 08c99ad9df4af977a6dd64d8d6ba95c7219d7ac4 Mon Sep 17 00:00:00 2001 From: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com> Date: Fri, 8 Sep 2023 16:00:40 +0200 Subject: [PATCH] chore: bump-pydantic to 2.3.0 (#375) Signed-off-by: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com> --- CHANGELOG.md | 5 + bin/generate_sdk_schemas_documentation.py | 6 +- references/sdk.md | 2 +- references/sdk_models.md | 174 +++++++++---------- references/sdk_schemas.md | 110 ++++++------ setup.py | 2 +- substra/sdk/backends/local/backend.py | 25 ++- substra/sdk/backends/local/compute/worker.py | 2 +- substra/sdk/backends/remote/backend.py | 2 +- substra/sdk/client.py | 2 +- substra/sdk/models.py | 62 +++---- substra/sdk/schemas.py | 103 +++++------ tests/sdk/test_add.py | 2 +- tests/sdk/test_get.py | 2 +- 14 files changed, 259 insertions(+), 240 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8f59d30..f4197ad8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Update to pydantic 2.3.0 ([#375](https://github.com/Substra/substra/pull/375)) + ## [0.47.0](https://github.com/Substra/substra/releases/tag/0.47.0) - 2023-09-07 ### Added @@ -16,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - warning and help message when logging in with username/password rather than token ([#378](https://github.com/Substra/substra/pull/378)) - new `Client.logout` function, mirroring `Client.login` ([#381](https://github.com/Substra/substra/pull/381)) - `Client` can now be used within a context manager ([#381](https://github.com/Substra/substra/pull/381)) + ```python with Client( client_name="org-1", diff --git a/bin/generate_sdk_schemas_documentation.py b/bin/generate_sdk_schemas_documentation.py index d4e5203f..fc51be32 100644 --- a/bin/generate_sdk_schemas_documentation.py +++ b/bin/generate_sdk_schemas_documentation.py @@ -45,7 +45,7 @@ def _get_field_description(fields): - desc = [f"{field.name}: {field._type_display()}" for _, field in fields.items()] + desc = [f"{name}: {field.annotation}" for name, field in fields.items()] return desc @@ -76,7 +76,7 @@ def _create_anchor(schema): # Write the docstring fh.write(f"{inspect.getdoc(asset)}\n") # List the fields and their types - description = _get_field_description(asset.__fields__) + description = _get_field_description(asset.model_fields) fh.write("```text\n") fh.write("- " + "\n- ".join(description)) fh.write("\n```") @@ -89,7 +89,7 @@ def write_help(path, models: bool): if __name__ == "__main__": - expected_pydantic_version = "1.9.0" + expected_pydantic_version = "2.3.0" if pydantic.VERSION != expected_pydantic_version: warnings.warn( f"The documentation should be generated with the version {expected_pydantic_version} of pydantic or \ diff --git a/references/sdk.md b/references/sdk.md index 1c35bd88..2908fb45 100644 --- a/references/sdk.md +++ b/references/sdk.md @@ -351,7 +351,7 @@ to pandas dataframe. You can wait for compute task to finish by setting **Examples:** ```python perf = client.get_performances(cp_key) -df = pd.DataFrame(perf.dict()) +df = pd.DataFrame(perf.model_dump()) print(df) ``` ## get_task diff --git a/references/sdk_models.md b/references/sdk_models.md index 0a9af793..15a93c83 100644 --- a/references/sdk_models.md +++ b/references/sdk_models.md @@ -18,142 +18,142 @@ ## DataSample Data sample ```text -- key: str -- owner: str -- data_manager_keys: Optional[List[str]] -- path: Optional[DirectoryPath] -- creation_date: datetime +- key: +- owner: +- data_manager_keys: typing.Optional[typing.List[str]] +- path: typing.Optional[typing.Annotated[pathlib.Path, PathType(path_type='dir')]] +- creation_date: ``` ## Dataset Dataset asset ```text -- key: str -- name: str -- owner: str -- permissions: Permissions -- type: str -- data_sample_keys: List[str] -- opener: _File -- description: _File -- metadata: Mapping[str, str] -- creation_date: datetime -- logs_permission: Permission +- key: +- name: +- owner: +- permissions: +- type: +- data_sample_keys: typing.List[str] +- opener: +- description: +- metadata: typing.Dict[str, str] +- creation_date: +- logs_permission: ``` ## Task Asset creation specification base class. ```text -- key: str -- function: Function -- owner: str -- compute_plan_key: str -- metadata: Mapping[str, str] -- status: Status -- worker: str -- rank: Optional[int] -- tag: str -- creation_date: datetime -- start_date: Optional[datetime] -- end_date: Optional[datetime] -- error_type: Optional[TaskErrorType] -- inputs: List[InputRef] -- outputs: Mapping[str, ComputeTaskOutput] +- key: +- function: +- owner: +- compute_plan_key: +- metadata: typing.Union[typing.Dict[str, str], typing.Dict[str, int]] +- status: +- worker: +- rank: typing.Optional[int] +- tag: +- creation_date: +- start_date: typing.Optional[datetime.datetime] +- end_date: typing.Optional[datetime.datetime] +- error_type: typing.Optional[substra.sdk.models.TaskErrorType] +- inputs: typing.List[substra.sdk.models.InputRef] +- outputs: typing.Dict[str, substra.sdk.models.ComputeTaskOutput] ``` ## Function Asset creation specification base class. ```text -- key: str -- name: str -- owner: str -- permissions: Permissions -- metadata: Mapping[str, str] -- creation_date: datetime -- inputs: List[FunctionInput] -- outputs: List[FunctionOutput] -- description: _File -- function: _File +- key: +- name: +- owner: +- permissions: +- metadata: typing.Dict[str, str] +- creation_date: +- inputs: typing.List[substra.sdk.models.FunctionInput] +- outputs: typing.List[substra.sdk.models.FunctionOutput] +- description: +- function: ``` ## ComputePlan ComputePlan ```text -- key: str -- tag: str -- name: str -- owner: str -- metadata: Mapping[str, str] -- task_count: int -- waiting_count: int -- todo_count: int -- doing_count: int -- canceled_count: int -- failed_count: int -- done_count: int -- failed_task_key: Optional[str] -- status: ComputePlanStatus -- creation_date: datetime -- start_date: Optional[datetime] -- end_date: Optional[datetime] -- estimated_end_date: Optional[datetime] -- duration: Optional[int] -- creator: Optional[str] +- key: +- tag: +- name: +- owner: +- metadata: typing.Dict[str, str] +- task_count: +- waiting_count: +- todo_count: +- doing_count: +- canceled_count: +- failed_count: +- done_count: +- failed_task_key: typing.Optional[str] +- status: +- creation_date: +- start_date: typing.Optional[datetime.datetime] +- end_date: typing.Optional[datetime.datetime] +- estimated_end_date: typing.Optional[datetime.datetime] +- duration: typing.Optional[int] +- creator: typing.Optional[str] ``` ## Performances Performances of the different compute tasks of a compute plan ```text -- compute_plan_key: List[str] -- compute_plan_tag: List[str] -- compute_plan_status: List[str] -- compute_plan_start_date: List[datetime] -- compute_plan_end_date: List[datetime] -- compute_plan_metadata: List[dict] -- worker: List[str] -- task_key: List[str] -- task_rank: List[int] -- round_idx: List[int] -- identifier: List[str] -- performance: List[float] +- compute_plan_key: typing.List[str] +- compute_plan_tag: typing.List[str] +- compute_plan_status: typing.List[str] +- compute_plan_start_date: typing.List[datetime.datetime] +- compute_plan_end_date: typing.List[datetime.datetime] +- compute_plan_metadata: typing.List[dict] +- worker: typing.List[str] +- task_key: typing.List[str] +- task_rank: typing.List[int] +- round_idx: typing.List[int] +- identifier: typing.List[str] +- performance: typing.List[float] ``` ## Organization Organization ```text -- id: str -- is_current: bool -- creation_date: datetime +- id: +- is_current: +- creation_date: ``` ## Permissions Permissions structure stored in various asset types. ```text -- process: Permission +- process: ``` ## InModel In model of a task ```text -- checksum: str -- storage_address: Union[FilePath, AnyUrl, str] +- checksum: +- storage_address: typing.Union[typing.Annotated[pathlib.Path, PathType(path_type='file')], pydantic_core._pydantic_core.Url, str] ``` ## OutModel Out model of a task ```text -- key: str -- compute_task_key: str -- address: Optional[InModel] -- permissions: Permissions -- owner: str -- creation_date: datetime +- key: +- compute_task_key: +- address: typing.Optional[substra.sdk.models.InModel] +- permissions: +- owner: +- creation_date: ``` ## _File File as stored in the models ```text -- checksum: str -- storage_address: Union[FilePath, AnyUrl, str] +- checksum: +- storage_address: typing.Union[typing.Annotated[pathlib.Path, PathType(path_type='file')], pydantic_core._pydantic_core.Url, str] ``` diff --git a/references/sdk_schemas.md b/references/sdk_schemas.md index dd1e4680..016b1c7b 100644 --- a/references/sdk_schemas.md +++ b/references/sdk_schemas.md @@ -24,9 +24,9 @@ Specification to create one or many data samples To create one data sample, use the 'path' field, otherwise use the 'paths' field. ```text -- path: Optional[Path] -- paths: Optional[List[Path]] -- data_manager_keys: List[str] +- path: typing.Optional[pathlib.Path] +- paths: typing.Optional[typing.List[pathlib.Path]] +- data_manager_keys: typing.List[str] ``` ## DatasetSpec @@ -34,19 +34,19 @@ Specification for creating a dataset note : metadata field does not accept strings containing '__' as dict key ```text -- name: str -- data_opener: Path -- type: str -- description: Path -- permissions: Permissions -- metadata: Optional[Mapping[str, str]] -- logs_permission: Permissions +- name: +- data_opener: +- type: +- description: +- permissions: +- metadata: typing.Optional[typing.Dict[str, str]] +- logs_permission: ``` ## UpdateDatasetSpec Specification for updating a dataset ```text -- name: str +- name: ``` ## FunctionSpec @@ -54,57 +54,57 @@ Specification for creating an function note : metadata field does not accept strings containing '__' as dict key ```text -- name: str -- description: Path -- file: Path -- permissions: Permissions -- metadata: Optional[Mapping[str, str]] -- inputs: Optional[List[FunctionInputSpec]] -- outputs: Optional[List[FunctionOutputSpec]] +- name: +- description: +- file: +- permissions: +- metadata: typing.Optional[typing.Dict[str, str]] +- inputs: typing.Optional[typing.List[substra.sdk.schemas.FunctionInputSpec]] +- outputs: typing.Optional[typing.List[substra.sdk.schemas.FunctionOutputSpec]] ``` ## FunctionInputSpec Asset creation specification base class. ```text -- identifier: str -- multiple: bool -- optional: bool -- kind: AssetKind +- identifier: +- multiple: +- optional: +- kind: ``` ## FunctionOutputSpec Asset creation specification base class. ```text -- identifier: str -- kind: AssetKind -- multiple: bool +- identifier: +- kind: +- multiple: ``` ## TaskSpec Asset creation specification base class. ```text -- key: str -- tag: Optional[str] -- compute_plan_key: Optional[str] -- metadata: Optional[Mapping[str, str]] -- function_key: str -- worker: str -- rank: Optional[int] -- inputs: Optional[List[InputRef]] -- outputs: Optional[Mapping[str, ComputeTaskOutputSpec]] +- key: +- tag: typing.Optional[str] +- compute_plan_key: typing.Optional[str] +- metadata: typing.Union[typing.Dict[str, str], typing.Dict[str, int], NoneType] +- function_key: +- worker: +- rank: typing.Optional[int] +- inputs: typing.Optional[typing.List[substra.sdk.schemas.InputRef]] +- outputs: typing.Optional[typing.Dict[str, substra.sdk.schemas.ComputeTaskOutputSpec]] ``` ## ComputeTaskOutputSpec Specification of a compute task output ```text -- permissions: Permissions -- is_transient: Optional[bool] +- permissions: +- is_transient: typing.Optional[bool] ``` ## UpdateFunctionSpec Specification for updating an function ```text -- name: str +- name: ``` ## ComputePlanSpec @@ -112,24 +112,24 @@ Specification for creating a compute plan note : metadata field does not accept strings containing '__' as dict key ```text -- key: str -- tasks: Optional[List[ComputePlanTaskSpec]] -- tag: Optional[str] -- name: str -- metadata: Optional[Mapping[str, str]] +- key: +- tasks: typing.Optional[typing.List[substra.sdk.schemas.ComputePlanTaskSpec]] +- tag: typing.Optional[str] +- name: +- metadata: typing.Optional[typing.Dict[str, str]] ``` ## UpdateComputePlanSpec Specification for updating a compute plan ```text -- name: str +- name: ``` ## UpdateComputePlanTasksSpec Specification for updating a compute plan's tasks ```text -- key: str -- tasks: Optional[List[ComputePlanTaskSpec]] +- key: +- tasks: typing.Optional[typing.List[substra.sdk.schemas.ComputePlanTaskSpec]] ``` ## ComputePlanTaskSpec @@ -137,27 +137,27 @@ Specification of a compute task inside a compute plan specification note : metadata field does not accept strings containing '__' as dict key ```text -- task_id: str -- function_key: str -- worker: str -- tag: Optional[str] -- metadata: Optional[Mapping[str, str]] -- inputs: Optional[List[InputRef]] -- outputs: Optional[Mapping[str, ComputeTaskOutputSpec]] +- task_id: +- function_key: +- worker: +- tag: typing.Optional[str] +- metadata: typing.Union[typing.Dict[str, str], typing.Dict[str, int], NoneType] +- inputs: typing.Optional[typing.List[substra.sdk.schemas.InputRef]] +- outputs: typing.Optional[typing.Dict[str, substra.sdk.schemas.ComputeTaskOutputSpec]] ``` ## Permissions Specification for permissions. If public is False, give the list of authorized ids. ```text -- public: bool -- authorized_ids: List[str] +- public: +- authorized_ids: typing.List[str] ``` ## PrivatePermissions Specification for private permissions. Only the organizations whose ids are in authorized_ids can access the asset. ```text -- authorized_ids: List[str] +- authorized_ids: typing.List[str] ``` diff --git a/setup.py b/setup.py index 735cad08..e7756987 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ "urllib3<2", "docker", "pyyaml", - "pydantic>=1.9.0, <2.0.0", + "pydantic>=2.3.0,<3.0.0", "tqdm", "python-slugify", ], diff --git a/substra/sdk/backends/local/backend.py b/substra/sdk/backends/local/backend.py index e32e53f8..aedaac35 100644 --- a/substra/sdk/backends/local/backend.py +++ b/substra/sdk/backends/local/backend.py @@ -5,6 +5,7 @@ import warnings from datetime import datetime from pathlib import Path +from typing import Any from typing import Dict from typing import List from typing import NoReturn @@ -138,7 +139,7 @@ def _check_metadata(metadata: Optional[Dict[str, str]]): if metadata is not None: if any([len(key) > _MAX_LEN_KEY_METADATA for key in metadata]): raise exceptions.InvalidRequest("The key in metadata cannot be more than 50 characters", 400) - if any([len(value) > _MAX_LEN_VALUE_METADATA or len(value) == 0 for value in metadata.values()]): + if any([len(str(value)) > _MAX_LEN_VALUE_METADATA or len(str(value)) == 0 for value in metadata.values()]): raise exceptions.InvalidRequest("Values in metadata cannot be empty or more than 100 characters", 400) if any("__" in key for key in metadata): raise exceptions.InvalidRequest( @@ -298,8 +299,8 @@ def _add_function(self, key, spec, spec_options=None): "storage_address": function_description_path, }, metadata=spec.metadata if spec.metadata else dict(), - inputs=spec.inputs or [], - outputs=spec.outputs or [], + inputs=_schemas_list_to_models_list(spec.inputs, models.FunctionInput), + outputs=_schemas_list_to_models_list(spec.outputs, models.FunctionOutput), ) return self._db.add(function) @@ -330,7 +331,7 @@ def _add_dataset(self, key, spec, spec_options=None): "storage_address": dataset_description_path, }, metadata=spec.metadata if spec.metadata else dict(), - logs_permission=logs_permission.dict(), + logs_permission=logs_permission.model_dump(), ) return self._db.add(asset) @@ -422,7 +423,7 @@ def _add_task(self, key, spec, spec_options=None): worker=spec.worker, compute_plan_key=compute_plan_key, rank=rank, - inputs=spec.inputs, + inputs=_schemas_list_to_models_list(spec.inputs, models.InputRef), outputs=_output_from_spec(spec.outputs), tag=spec.tag or "", status=models.Status.waiting, @@ -557,8 +558,8 @@ def cancel_compute_plan(self, key): def update(self, key, spec, spec_options=None): asset_type = spec.__class__.type_ asset = self.get(asset_type, key) - data = asset.dict() - data.update(spec.dict()) + data = asset.model_dump() + data.update(spec.model_dump()) updated_asset = models.SCHEMA_TO_MODEL[asset_type](**data) self._db.update(updated_asset) return @@ -591,13 +592,21 @@ def _output_from_spec(outputs: Dict[str, schemas.ComputeTaskOutputSpec]) -> Dict """Convert a list of schemas.ComputeTaskOuput to a list of models.ComputeTaskOutput""" return { identifier: models.ComputeTaskOutput( - permissions=models.Permissions(process=output.permissions), transient=output.is_transient, value=None + permissions=models.Permissions(process=output.permissions.model_dump()), + transient=output.is_transient, + value=None, ) # default isNone (= outputs are not computed yet) for identifier, output in outputs.items() } +def _schemas_list_to_models_list(inputs: Any, model: Any) -> Any: + if not inputs: + return [] + return [model.model_validate(input_schema.model_dump()) for input_schema in inputs] + + def _warn_on_transient_outputs(outputs: typing.Dict[str, schemas.ComputeTaskOutputSpec]): for _, output in outputs.items(): if output.is_transient: diff --git a/substra/sdk/backends/local/compute/worker.py b/substra/sdk/backends/local/compute/worker.py index e30c526e..1c02427f 100644 --- a/substra/sdk/backends/local/compute/worker.py +++ b/substra/sdk/backends/local/compute/worker.py @@ -84,7 +84,7 @@ def _save_cp_performances_as_json(self, compute_plan_key: str, path: Path) -> No performances = self._db.get_performances(compute_plan_key) with (path).open("w", encoding="UTF-8") as json_file: - json.dump(performances.dict(), json_file, default=str) + json.dump(performances.model_dump(), json_file, default=str) def _get_asset_unknown_type(self, asset_key, possible_types: List[schemas.Type]) -> Tuple[Any, schemas.Type]: for asset_type in possible_types: diff --git a/substra/sdk/backends/remote/backend.py b/substra/sdk/backends/remote/backend.py index f1fca6ec..8ea83b2e 100644 --- a/substra/sdk/backends/remote/backend.py +++ b/substra/sdk/backends/remote/backend.py @@ -200,7 +200,7 @@ def add(self, spec, spec_options=None): def _add_compute_plan(self, spec, spec_options): """Register compute plan info (without tasks).""" - cp_spec = spec.copy() + cp_spec = spec.model_copy() del cp_spec.tasks with spec.build_request_kwargs(**spec_options) as (data, _): diff --git a/substra/sdk/client.py b/substra/sdk/client.py index 57f850bb..77be1967 100644 --- a/substra/sdk/client.py +++ b/substra/sdk/client.py @@ -565,7 +565,7 @@ def get_performances(self, key: str, *, wait_completion: bool = False) -> models Example: ```python perf = client.get_performances(cp_key) - df = pd.DataFrame(perf.dict()) + df = pd.DataFrame(perf.model_dump()) print(df) ``` """ diff --git a/substra/sdk/models.py b/substra/sdk/models.py index cebc29fb..c45bedd0 100644 --- a/substra/sdk/models.py +++ b/substra/sdk/models.py @@ -11,6 +11,7 @@ import pydantic from pydantic import AnyUrl +from pydantic import ConfigDict from pydantic import DirectoryPath from pydantic import FilePath from pydantic.fields import Field @@ -97,10 +98,10 @@ class _Model(schemas._PydanticConfig, abc.ABC): # pretty print def __str__(self): - return self.json(indent=4) + return self.model_dump_json(indent=4) def __repr__(self): - return self.json(indent=4) + return self.model_dump_json(indent=4) @staticmethod def allowed_filters() -> List[str]: @@ -113,8 +114,8 @@ class DataSample(_Model): key: str owner: str - data_manager_keys: Optional[List[str]] - path: Optional[DirectoryPath] + data_manager_keys: Optional[List[str]] = None + path: Optional[DirectoryPath] = None creation_date: datetime type_: ClassVar[str] = schemas.Type.DataSample @@ -185,8 +186,8 @@ class Function(_Model): def allowed_filters() -> List[str]: return ["key", "name", "owner", "permissions", "compute_plan_key", "dataset_key", "data_sample_key"] - @pydantic.validator("inputs", pre=True) - def dict_input_to_list(cls, v): + @pydantic.field_validator("inputs", mode="before") + def dict_input_to_list(cls, v): # noqa: N805 if isinstance(v, dict): # Transform the inputs dict to a list return [ @@ -201,8 +202,8 @@ def dict_input_to_list(cls, v): else: return v - @pydantic.validator("outputs", pre=True) - def dict_output_to_list(cls, v): + @pydantic.field_validator("outputs", mode="before") + def dict_output_to_list(cls, v): # noqa: N805 if isinstance(v, dict): # Transform the outputs dict to a list return [ @@ -227,7 +228,7 @@ class OutModel(schemas._PydanticConfig): key: str compute_task_key: str - address: Optional[InModel] + address: Optional[InModel] = None permissions: Permissions owner: str creation_date: datetime @@ -241,12 +242,12 @@ def allowed_filters() -> List[str]: class InputRef(schemas._PydanticConfig): identifier: str - asset_key: Optional[str] - parent_task_key: Optional[str] - parent_task_output_identifier: Optional[str] + asset_key: Optional[str] = None + parent_task_key: Optional[str] = None + parent_task_output_identifier: Optional[str] = None # either (asset_key) or (parent_task_key, parent_task_output_identifier) must be specified - _check_asset_key_or_parent_ref = pydantic.root_validator(allow_reuse=True)(schemas.check_asset_key_or_parent_ref) + _check_asset_key_or_parent_ref = pydantic.model_validator(mode="before")(schemas.check_asset_key_or_parent_ref) class ComputeTaskOutput(schemas._PydanticConfig): @@ -254,9 +255,7 @@ class ComputeTaskOutput(schemas._PydanticConfig): permissions: Permissions is_transient: bool = Field(False, alias="transient") - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class Task(_Model): @@ -267,11 +266,11 @@ class Task(_Model): metadata: Dict[str, str] status: Status worker: str - rank: Optional[int] + rank: Optional[int] = None tag: str creation_date: datetime - start_date: Optional[datetime] - end_date: Optional[datetime] + start_date: Optional[datetime] = None + end_date: Optional[datetime] = None error_type: Optional[TaskErrorType] = None inputs: List[InputRef] outputs: Dict[str, ComputeTaskOutput] @@ -292,7 +291,7 @@ def allowed_filters() -> List[str]: ] -Task.update_forward_refs() +Task.model_rebuild() class ComputePlan(_Model): @@ -310,14 +309,14 @@ class ComputePlan(_Model): canceled_count: int = 0 failed_count: int = 0 done_count: int = 0 - failed_task_key: Optional[str] + failed_task_key: Optional[str] = None status: ComputePlanStatus creation_date: datetime - start_date: Optional[datetime] - end_date: Optional[datetime] - estimated_end_date: Optional[datetime] - duration: Optional[int] - creator: Optional[str] + start_date: Optional[datetime] = None + end_date: Optional[datetime] = None + estimated_end_date: Optional[datetime] = None + duration: Optional[int] = None + creator: Optional[str] = None type_: ClassVar[str] = schemas.Type.ComputePlan @@ -363,7 +362,8 @@ class Organization(schemas._PydanticConfig): type_: ClassVar[str] = schemas.Type.Organization -class OrganizationInfoConfig(schemas._PydanticConfig): +class OrganizationInfoConfig(schemas._PydanticConfig, extra="allow"): + model_config = ConfigDict(protected_namespaces=()) model_export_enabled: bool @@ -396,9 +396,9 @@ class OutputAsset(_TaskAsset): type_: ClassVar[str] = schemas.Type.OutputAsset # Deal with remote returning the actual performance object - @pydantic.validator("asset", pre=True) - def convert_remote_performance(cls, value, values): - if values.get("kind") == schemas.AssetKind.performance and isinstance(value, dict): + @pydantic.field_validator("asset", mode="before") + def convert_remote_performance(cls, value, values): # noqa: N805 + if values.data.get("kind") == schemas.AssetKind.performance and isinstance(value, dict): return value.get("performance_value") return value @@ -414,4 +414,6 @@ def convert_remote_performance(cls, value, values): schemas.Type.Model: OutModel, schemas.Type.InputAsset: InputAsset, schemas.Type.OutputAsset: OutputAsset, + schemas.Type.FunctionOutput: FunctionOutput, + schemas.Type.FunctionInput: FunctionInput, } diff --git a/substra/sdk/schemas.py b/substra/sdk/schemas.py index 43a79d9e..b5a715f9 100644 --- a/substra/sdk/schemas.py +++ b/substra/sdk/schemas.py @@ -9,6 +9,7 @@ from typing import Optional import pydantic +from pydantic import ConfigDict from pydantic.fields import Field from substra.sdk import utils @@ -48,6 +49,8 @@ class AssetKind(str, enum.Enum): class Type(enum.Enum): Function = "function" + FunctionOutput = "function_output" + FunctionInput = "function_input" DataSample = "data_sample" Dataset = "dataset" Model = "model" @@ -76,9 +79,7 @@ def __str__(self): class _PydanticConfig(pydantic.BaseModel): """Shared configuration for all schemas here""" - class Config: - # Ignore extra fields, leave them unexposed - extra = "ignore" + model_config = ConfigDict(extra="ignore") class _Spec(_PydanticConfig): @@ -86,10 +87,10 @@ class _Spec(_PydanticConfig): # pretty print def __str__(self): - return self.json(indent=4) + return self.model_dump_json(indent=4) def __repr__(self): - return self.json(indent=4) + return self.model_dump_json(indent=4) class Meta: file_attributes = None @@ -101,7 +102,7 @@ def is_many(self): def build_request_kwargs(self): # TODO should be located in the backends/remote module # Serialize and deserialize to prevent errors eg with pathlib.Path - data = json.loads(self.json(exclude_unset=True)) + data = json.loads(self.model_dump_json(exclude_unset=True)) if self.Meta.file_attributes: with utils.extract_files(data, self.Meta.file_attributes) as (data, files): yield (data, files) @@ -136,8 +137,8 @@ class DataSampleSpec(_Spec): the 'paths' field. """ - path: Optional[pathlib.Path] # Path to the data sample if only one - paths: Optional[List[pathlib.Path]] # Path to the data samples if several + path: Optional[pathlib.Path] = None # Path to the data sample if only one + paths: Optional[List[pathlib.Path]] = None # Path to the data samples if several data_manager_keys: typing.List[str] type_: typing.ClassVar[Type] = Type.DataSample @@ -145,8 +146,8 @@ class DataSampleSpec(_Spec): def is_many(self): return self.paths and len(self.paths) > 0 - @pydantic.root_validator(pre=True) - def exclusive_paths(cls, values): + @pydantic.model_validator(mode="before") + def exclusive_paths(cls, values): # noqa: N805 """Check that one and only one path(s) field is defined.""" if "paths" in values and "path" in values: raise ValueError("'path' and 'paths' fields are exclusive.") @@ -158,7 +159,7 @@ def exclusive_paths(cls, values): def build_request_kwargs(self, local): # redefine kwargs builder to handle the local paths # Serialize and deserialize to prevent errors eg with pathlib.Path - data = json.loads(self.json(exclude_unset=True)) + data = json.loads(self.model_dump_json(exclude_unset=True)) if local: with utils.extract_data_sample_files(data) as (data, files): yield (data, files) @@ -169,8 +170,8 @@ def build_request_kwargs(self, local): def check_asset_key_or_parent_ref(cls, values): """Check that either (asset key) or (parent_task_key, parent_task_output_identifier) are set, but not both.""" - has_asset_key = bool(values.get("asset_key")) - has_parent = bool(values.get("parent_task_key")) and bool(values.get("parent_task_output_identifier")) + has_asset_key = bool(dict(values).get("asset_key")) + has_parent = bool(dict(values).get("parent_task_key")) and bool(dict(values).get("parent_task_output_identifier")) if has_asset_key != has_parent: # xor return values @@ -182,12 +183,12 @@ class InputRef(_PydanticConfig): """Specification of a compute task input""" identifier: str - asset_key: Optional[str] - parent_task_key: Optional[str] - parent_task_output_identifier: Optional[str] + asset_key: Optional[str] = None + parent_task_key: Optional[str] = None + parent_task_output_identifier: Optional[str] = None # either (asset_key) or (parent_task_key, parent_task_output_identifier) must be specified - _check_asset_key_or_parent_ref = pydantic.root_validator(allow_reuse=True)(check_asset_key_or_parent_ref) + _check_asset_key_or_parent_ref = pydantic.model_validator(mode="before")(check_asset_key_or_parent_ref) class ComputeTaskOutputSpec(_PydanticConfig): @@ -195,9 +196,7 @@ class ComputeTaskOutputSpec(_PydanticConfig): permissions: Permissions is_transient: Optional[bool] = Field(False, alias="transient") - - class Config: - allow_population_by_field_name = True + model_config = ConfigDict(populate_by_name=True) class ComputePlanTaskSpec(_Spec): @@ -209,15 +208,15 @@ class ComputePlanTaskSpec(_Spec): task_id: str function_key: str worker: str - tag: Optional[str] - metadata: Optional[Dict[str, str]] - inputs: Optional[List[InputRef]] - outputs: Optional[Dict[str, ComputeTaskOutputSpec]] + tag: Optional[str] = None + metadata: Optional[Dict[str, str]] = None + inputs: Optional[List[InputRef]] = None + outputs: Optional[Dict[str, ComputeTaskOutputSpec]] = None class _BaseComputePlanSpec(_Spec): key: str - tasks: Optional[List[ComputePlanTaskSpec]] + tasks: Optional[List[ComputePlanTaskSpec]] = None class ComputePlanSpec(_BaseComputePlanSpec): @@ -227,9 +226,9 @@ class ComputePlanSpec(_BaseComputePlanSpec): """ key: str = pydantic.Field(default_factory=lambda: str(uuid.uuid4())) - tag: Optional[str] + tag: Optional[str] = None name: str - metadata: Optional[Dict[str, str]] + metadata: Optional[Dict[str, str]] = None type_: typing.ClassVar[Type] = Type.ComputePlan @@ -237,7 +236,7 @@ class ComputePlanSpec(_BaseComputePlanSpec): def build_request_kwargs(self): # default values are not dumped when `exclude_unset` flag is enabled, # this is why we need to reimplement this custom function. - data = json.loads(self.json(exclude_unset=True)) + data = json.loads(self.model_dump_json(exclude_unset=True)) data["key"] = self.key yield data, None @@ -267,7 +266,7 @@ class DatasetSpec(_Spec): type: str description: pathlib.Path # Path to the description file permissions: Permissions - metadata: Optional[Dict[str, str]] + metadata: Optional[Dict[str, str]] = None logs_permission: Permissions type_: typing.ClassVar[Type] = Type.Dataset @@ -293,8 +292,8 @@ class FunctionInputSpec(_Spec): optional: bool kind: AssetKind - @pydantic.root_validator - def _check_identifiers(cls, values): + @pydantic.model_validator(mode="before") + def _check_identifiers(cls, values): # noqa: N805 """Checks that the multiplicity and the optionality of a data manager is always set to False""" if values["kind"] == AssetKind.data_manager: if values["multiple"]: @@ -327,10 +326,10 @@ class FunctionOutputSpec(_Spec): kind: AssetKind multiple: bool - @pydantic.root_validator - def _check_performance(cls, values): + @pydantic.model_validator(mode="before") + def _check_performance(cls, values): # noqa: N805 """Checks that the performance is always set to False""" - if values.get("kind") == AssetKind.performance and values.get("multiple"): + if values == AssetKind.performance and values["multiple"]: raise ValueError("Performance can't be multiple.") return values @@ -346,22 +345,22 @@ class FunctionSpec(_Spec): description: pathlib.Path file: pathlib.Path permissions: Permissions - metadata: Optional[Dict[str, str]] + metadata: Optional[Dict[str, str]] = None inputs: Optional[List[FunctionInputSpec]] = None outputs: Optional[List[FunctionOutputSpec]] = None type_: typing.ClassVar[Type] = Type.Function - @pydantic.validator("inputs") - def _check_inputs(cls, v): + @pydantic.field_validator("inputs") + def _check_inputs(cls, v): # noqa: N805 inputs = v or [] identifiers = {value.identifier for value in inputs} if len(identifiers) != len(inputs): raise ValueError("Several function inputs cannot have the same identifier.") return v - @pydantic.validator("outputs") - def _check_outputs(cls, v): + @pydantic.field_validator("outputs") + def _check_outputs(cls, v): # noqa: N805 outputs = v or [] identifiers = {value.identifier for value in outputs} if len(identifiers) != len(outputs): @@ -372,15 +371,19 @@ def _check_outputs(cls, v): def build_request_kwargs(self): # TODO should be located in the backends/remote module # Serialize and deserialize to prevent errors eg with pathlib.Path - data = json.loads(self.json(exclude_unset=True)) + data = json.loads(self.model_dump_json(exclude_unset=True)) # Computed fields using `@property` are not dumped when `exclude_unset` flag is enabled, # this is why we need to reimplement this custom function. data["inputs"] = ( - {input.identifier: input.dict(exclude={"identifier"}) for input in self.inputs} if self.inputs else {} + {input.identifier: input.model_dump(exclude={"identifier"}) for input in self.inputs} + if self.inputs + else dict() ) data["outputs"] = ( - {output.identifier: output.dict(exclude={"identifier"}) for output in self.outputs} if self.outputs else {} + {output.identifier: output.model_dump(exclude={"identifier"}) for output in self.outputs} + if self.outputs + else dict() ) if self.Meta.file_attributes: @@ -406,14 +409,14 @@ class UpdateFunctionSpec(_Spec): class TaskSpec(_Spec): key: str = pydantic.Field(default_factory=lambda: str(uuid.uuid4())) - tag: Optional[str] - compute_plan_key: Optional[str] - metadata: Optional[Dict[str, str]] + tag: Optional[str] = None + compute_plan_key: Optional[str] = None + metadata: Optional[Dict[str, str]] = None function_key: str worker: str rank: Optional[int] = None - inputs: Optional[List[InputRef]] - outputs: Optional[Dict[str, ComputeTaskOutputSpec]] + inputs: Optional[List[InputRef]] = None + outputs: Optional[Dict[str, ComputeTaskOutputSpec]] = None type_: typing.ClassVar[Type] = Type.Task @@ -421,10 +424,10 @@ class TaskSpec(_Spec): def build_request_kwargs(self): # default values are not dumped when `exclude_unset` flag is enabled, # this is why we need to reimplement this custom function. - data = json.loads(self.json(exclude_unset=True)) + data = json.loads(self.model_dump_json(exclude_unset=True)) data["key"] = self.key - data["inputs"] = [input.dict() for input in self.inputs] if self.inputs else [] - data["outputs"] = {k: v.dict(by_alias=True) for k, v in self.outputs.items()} if self.outputs else {} + data["inputs"] = [input.model_dump() for input in self.inputs] if self.inputs else [] + data["outputs"] = {k: v.model_dump(by_alias=True) for k, v in self.outputs.items()} if self.outputs else {} yield data, None @classmethod diff --git a/tests/sdk/test_add.py b/tests/sdk/test_add.py index 20d98e60..7ea8c39a 100644 --- a/tests/sdk/test_add.py +++ b/tests/sdk/test_add.py @@ -24,7 +24,7 @@ def test_add_dataset_invalid_args(client, dataset_query, mocker): mock_requests(mocker, "post", response=datastore.DATASET) del dataset_query["data_opener"] - with pytest.raises(pydantic.error_wrappers.ValidationError): + with pytest.raises(pydantic.ValidationError): client.add_dataset(dataset_query) diff --git a/tests/sdk/test_get.py b/tests/sdk/test_get.py index 09d81793..bf2bebfe 100644 --- a/tests/sdk/test_get.py +++ b/tests/sdk/test_get.py @@ -128,7 +128,7 @@ def test_get_performances(client, mocker): m = mock_requests_responses(mocker, "get", [mock_response(cp_item), mock_response(perf_item)]) response = client.get_performances("magic-key") - results = response.dict() + results = response.model_dump() df = pd.DataFrame(results) assert list(df.columns) == list(results.keys())