From dfd69c1a425689c49f7d166d654b5573a7b73268 Mon Sep 17 00:00:00 2001 From: tfx-team Date: Mon, 15 Jul 2024 17:08:12 -0700 Subject: [PATCH] Prohibit execution parameters that serialize to more than 262144 bytes in size. PiperOrigin-RevId: 652641352 --- RELEASE.md | 6 ++++-- tfx/dsl/compiler/compiler.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 7eabd06f88..e40951fb5f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,6 +9,8 @@ most likely you discovered a bug and should not use an f-string in the first place. If it is truly your intention to print the placeholder (not its resolved value) for debugging purposes, use `repr()` or `!r` instead. +* Execution parameters that serialize to larger than 262144 bytes are now + prohibited, and will cause an error during DSL compilation. ### For Pipeline Authors @@ -224,7 +226,7 @@ ## Bug Fixes and Other Changes -* Support to task type "workerpool1" of CLUSTER_SPEC in Vertex AI training's +* Support to task type "workerpool1" of CLUSTER_SPEC in Vertex AI training's service according to the changes of task type in Tuner component. * Propagates unexpected import failures in the public v1 module. @@ -2887,4 +2889,4 @@ the 1.1.x release for TFX library. ### For component authors -* N/A \ No newline at end of file +* N/A diff --git a/tfx/dsl/compiler/compiler.py b/tfx/dsl/compiler/compiler.py index e798b6930d..1fb5e5036c 100644 --- a/tfx/dsl/compiler/compiler.py +++ b/tfx/dsl/compiler/compiler.py @@ -37,6 +37,9 @@ from tfx.utils import deprecation_utils from tfx.utils import name_utils +# Maximum size of serialized parameter Value proto. +_PARAMETER_VALUE_LIMIT = 2**20 + class Compiler: """Compiles a TFX pipeline or a component into a uDSL IR proto.""" @@ -485,6 +488,14 @@ def _set_node_parameters(node: pipeline_pb2.PipelineNode, raise ValueError( "Component {} got unsupported parameter {} with type {}.".format( tfx_node.id, key, type(value))) from e + size = len(parameter_value.SerializeToString()) + if size > _PARAMETER_VALUE_LIMIT: + raise ValueError( + "Component {} got parameter {} which is too big: it serializes to " + "{} bytes, which exceeds the limit of {}".format( + tfx_node.id, key, size, _PARAMETER_VALUE_LIMIT + ) + ) def _set_node_execution_options(