Skip to content

Commit

Permalink
improve pydantic model efficiency (#1358)
Browse files Browse the repository at this point in the history
* improve pydantic model efficiency

Signed-off-by: Niels Bantilan <[email protected]>

* mypy lint

Signed-off-by: Niels Bantilan <[email protected]>

---------

Signed-off-by: Niels Bantilan <[email protected]>
  • Loading branch information
cosmicBboy authored Sep 30, 2023
1 parent d46b3b9 commit ceeae10
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions pandera/engines/pandas_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,17 +476,17 @@ def _check_decimal(
decimals = pandas_obj[is_decimal]
# fix for modin unamed series raises KeyError
# https://github.com/modin-project/modin/issues/4317
decimals.name = "decimals"
decimals.name = "decimals" # type: ignore

splitted = decimals.astype("string").str.split(".", n=1, expand=True)
splitted = decimals.astype("string").str.split(".", n=1, expand=True) # type: ignore
if splitted.shape[1] < 2:
splitted[1] = ""
len_left = splitted[0].str.len().fillna(0)
len_right = splitted[1].str.len().fillna(0)
precisions = len_left + len_right

scales = series_cls(
np.full_like(decimals, np.nan), dtype=np.object_, index=decimals.index
np.full_like(decimals, np.nan), dtype=np.object_, index=decimals.index # type: ignore
)
pos_left = len_left > 0
scales[pos_left] = len_right[pos_left]
Expand Down Expand Up @@ -1110,7 +1110,10 @@ def _coerce_row(row):
"""
try:
# pylint: disable=not-callable
row = pd.Series(self.type(**row).dict())
if PYDANTIC_V2:
row = self.type(**row).model_dump()
else:
row = self.type(**row).dict()
row["failure_cases"] = np.nan
except ValidationError as exc:
row["failure_cases"] = {
Expand All @@ -1119,7 +1122,10 @@ def _coerce_row(row):

return row

coerced_df = data_container.apply(_coerce_row, axis="columns")
records = data_container.to_dict(orient="records") # type: ignore
coerced_df = type(data_container).from_records( # type: ignore
[_coerce_row(row) for row in records]
)

# raise a ParserError with failure cases where each case is a
# dictionary containing the failed elements in the pydantic record
Expand Down

0 comments on commit ceeae10

Please sign in to comment.