From d46b3b94a76fae77d3825670fc2e0c5ffc67db2f Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 26 Sep 2023 17:22:37 -0700 Subject: [PATCH] add unit tests (#1351) * add unit tests Signed-off-by: Niels Bantilan * update Signed-off-by: Niels Bantilan --------- Signed-off-by: Niels Bantilan --- pandera/engines/pandas_engine.py | 11 +++++-- tests/core/test_dtypes.py | 50 ++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index e55f617ce..f78c2c46f 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -30,6 +30,7 @@ import pandas as pd import typeguard from pydantic import BaseModel, ValidationError, create_model +from typeguard import CollectionCheckStrategy from pandera import dtypes, errors from pandera.dtypes import immutable @@ -1157,7 +1158,7 @@ def _check_type(self, element: Any) -> bool: # if the element is None or pd.NA, this function should return True: # the schema should only fail if nullable=False is specifed at the # schema/schema component level. - if element is None or pd.isna(element): + if element is None or element is pd.NA: return True try: @@ -1177,7 +1178,13 @@ def _check_type(self, element: Any) -> bool: from typing import TypedDict as _TypedDict _type = _TypedDict(_type.__name__, _type.__annotations__) # type: ignore - typeguard.check_type(element, _type) + + typeguard.check_type( + element, + _type, + # This may be worth making configurable at the global level. + collection_check_strategy=CollectionCheckStrategy.ALL_ITEMS, + ) return True except typeguard.TypeCheckError: return False diff --git a/tests/core/test_dtypes.py b/tests/core/test_dtypes.py index bc3a9a25b..d38e76af9 100644 --- a/tests/core/test_dtypes.py +++ b/tests/core/test_dtypes.py @@ -790,12 +790,12 @@ def test_python_std_list_dict_generics(): "data_dict", [ { - "dict_column": [{"foo": 1, "bar": 2}, {}, None], - "list_column": [[1.0], [], None], + "dict_column": [{"foo": 1}, {"foo": 1, "bar": 2}, {}, None], + "list_column": [[1.0], [1.0, 2.0], [], None], }, { - "dict_column": [{"foo": "1", "bar": "2"}, {}, None], - "list_column": [["1.0"], [], None], + "dict_column": [{"foo": "1"}, {"foo": "1", "bar": "2"}, {}, None], + "list_column": [["1.0"], ["1.0", "2.0"], [], None], }, ], ) @@ -816,8 +816,8 @@ def test_python_typing_handle_empty_list_dict_and_none(nullable, data_dict): expected = pd.DataFrame( { - "dict_column": [{"foo": 1, "bar": 2}, {}, pd.NA], - "list_column": [[1.0], [], pd.NA], + "dict_column": [{"foo": 1}, {"foo": 1, "bar": 2}, {}, pd.NA], + "list_column": [[1.0], [1.0, 2.0], [], pd.NA], } ) @@ -838,12 +838,12 @@ def test_python_typing_handle_empty_list_dict_and_none(nullable, data_dict): "data_dict", [ { - "dict_column": [{"foo": 1, "bar": 2}, {}, None], - "list_column": [[1.0], [], None], + "dict_column": [{"foo": 1}, {"foo": 1, "bar": 2}, {}, None], + "list_column": [[1.0], [1.0, 2.0], [], None], }, { - "dict_column": [{"foo": "1", "bar": "2"}, {}, None], - "list_column": [["1.0"], [], None], + "dict_column": [{"foo": "1"}, {"foo": "1", "bar": "2"}, {}, None], + "list_column": [["1.0"], ["1.0", "2.0"], [], None], }, ], ) @@ -865,8 +865,8 @@ def test_python_std_list_dict_empty_and_none(nullable, data_dict): expected = pd.DataFrame( { - "dict_column": [{"foo": 1, "bar": 2}, {}, pd.NA], - "list_column": [[1.0], [], pd.NA], + "dict_column": [{"foo": 1}, {"foo": 1, "bar": 2}, {}, pd.NA], + "list_column": [[1.0], [1.0, 2.0], [], pd.NA], } ) @@ -876,3 +876,29 @@ def test_python_std_list_dict_empty_and_none(nullable, data_dict): else: with pytest.raises(pa.errors.SchemaError): schema.validate(data) + + +def test_python_std_list_dict_error(): + """Test that non-standard dict/list invalid values raise Schema Error.""" + schema = pa.DataFrameSchema( + { + "dict_column": pa.Column(Dict[str, int]), + "list_column": pa.Column(List[float]), + }, + ) + + data = pd.DataFrame( + { + "dict_column": [{"foo": 1}, {"foo": 1, "bar": "2"}, {}], + "list_column": [[1.0], ["1.0", 2.0], []], + } + ) + + try: + schema.validate(data, lazy=True) + except pa.errors.SchemaErrors as exc: + assert exc.failure_cases["failure_case"].iloc[0] == { + "foo": 1, + "bar": "2", + } + assert exc.failure_cases["failure_case"].iloc[1] == ["1.0", 2.0]