From 1b978602e3b3aa906f7bfff409b656fd18258c15 Mon Sep 17 00:00:00 2001 From: karlma821 <54348512+karlma821@users.noreply.github.com> Date: Thu, 16 Nov 2023 23:18:43 +0800 Subject: [PATCH 1/2] fix: always cast series to bool dtype Signed-off-by: karlma821 <54348512+karlma821@users.noreply.github.com> --- pandera/engines/pandas_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index 4d3d63f5c..c72b058b7 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -718,7 +718,7 @@ def check( ) else: is_python_string = data_container.map(lambda x: isinstance(x, str)) # type: ignore[operator] - return is_python_string | data_container.isna() + return is_python_string.astype(bool) | data_container.isna() Engine.register_dtype( From d2ae52b5e827a93740d840121976b3212147b0cc Mon Sep 17 00:00:00 2001 From: karlma821 <54348512+karlma821@users.noreply.github.com> Date: Thu, 16 Nov 2023 23:18:51 +0800 Subject: [PATCH 2/2] test: add test_pandas_data_type_check Signed-off-by: karlma821 <54348512+karlma821@users.noreply.github.com> --- tests/core/test_pandas_engine.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/core/test_pandas_engine.py b/tests/core/test_pandas_engine.py index 125b79995..3d6c93aa9 100644 --- a/tests/core/test_pandas_engine.py +++ b/tests/core/test_pandas_engine.py @@ -5,6 +5,7 @@ import hypothesis import hypothesis.extra.pandas as pd_st import hypothesis.strategies as st +import numpy as np import pandas as pd import pytest import pytz @@ -58,6 +59,35 @@ def test_pandas_data_type_coerce(data_type_cls): assert exc.failure_cases.shape[0] > 0 +@pytest.mark.parametrize( + "data_type_cls", list(pandas_engine.Engine.get_registered_dtypes()) +) +def test_pandas_data_type_check(data_type_cls): + """ + Test that pandas data type check results can be reduced. + """ + try: + data_type = data_type_cls() + except TypeError: + # don't test data types that require parameters + return + + try: + data_container = pd.Series([], dtype=data_type.type) + except TypeError: + # don't test complex data types, e.g. PythonDict, PythonTuple, etc + return + + check_result = data_type.check( + pandas_engine.Engine.dtype(data_container.dtype), + data_container, + ) + assert isinstance(check_result, bool) or isinstance( + check_result.all(), + (bool, np.bool_), + ) + + CATEGORIES = ["A", "B", "C"]