Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: support string column validation for pandas 2.1.3 #1425

Merged
merged 2 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def check(
)
else:
is_python_string = data_container.map(lambda x: isinstance(x, str)) # type: ignore[operator]
return is_python_string | data_container.isna()
return is_python_string.astype(bool) | data_container.isna()


Engine.register_dtype(
Expand Down
30 changes: 30 additions & 0 deletions tests/core/test_pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hypothesis
import hypothesis.extra.pandas as pd_st
import hypothesis.strategies as st
import numpy as np
import pandas as pd
import pytest
import pytz
Expand Down Expand Up @@ -58,6 +59,35 @@ def test_pandas_data_type_coerce(data_type_cls):
assert exc.failure_cases.shape[0] > 0


@pytest.mark.parametrize(
"data_type_cls", list(pandas_engine.Engine.get_registered_dtypes())
)
def test_pandas_data_type_check(data_type_cls):
"""
Test that pandas data type check results can be reduced.
"""
try:
data_type = data_type_cls()
except TypeError:
# don't test data types that require parameters
return

try:
data_container = pd.Series([], dtype=data_type.type)
except TypeError:
# don't test complex data types, e.g. PythonDict, PythonTuple, etc
return

check_result = data_type.check(
pandas_engine.Engine.dtype(data_container.dtype),
data_container,
)
assert isinstance(check_result, bool) or isinstance(
check_result.all(),
(bool, np.bool_),
)


CATEGORIES = ["A", "B", "C"]


Expand Down