Skip to content

Commit

Permalink
move code from PR #1376
Browse files Browse the repository at this point in the history
Signed-off-by: Filipe Oliveira <[email protected]>
  • Loading branch information
filipeo2-mck committed Oct 18, 2023
1 parent 4425ad8 commit b7756a8
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pandera/backends/pyspark/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def collect_schema_components(
schema_components = []
for col_name, column in schema.columns.items():
if (
column.required or col_name in check_obj
column.required or col_name in check_obj.columns
) and col_name not in column_info.lazy_exclude_column_names:
column = copy.deepcopy(column)
if schema.dtype is not None:
Expand Down
52 changes: 45 additions & 7 deletions tests/pyspark/test_pyspark_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,18 +300,56 @@ def function_expected():
)


def test_optional_column() -> None:
"""Test that optional columns are not required."""
# Define a fixture for the Schema
@pytest.fixture(scope="module", name="test_schema_optional_columns")
def test_schema():
"""Fixture containing DataFrameModel with optional columns."""

class Schema(pa.DataFrameModel):
"""Simple DataFrameModel containing optional columns."""

class Schema(DataFrameModel): # pylint:disable=missing-class-docstring
a: Optional[str]
b: Optional[str] = pa.Field(eq="b")
c: Optional[str] # test pandera.typing alias

schema = Schema.to_schema()
assert not schema.columns["a"].required
assert not schema.columns["b"].required
assert not schema.columns["c"].required
return Schema


def test_optional_column(test_schema_optional_columns) -> None:
"""Test that optional columns are not required."""

schema = test_schema_optional_columns.to_schema()
assert not schema.columns[
"a"
].required, "Optional column 'a' shouldn't be required"
assert not schema.columns[
"b"
].required, "Optional column 'b' shouldn't be required"
assert not schema.columns[
"c"
].required, "Optional column 'c' shouldn't be required"


def test_validation_succeeds_with_missing_optional_column(
spark, test_schema_optional_columns
) -> None:
"""Test that validation succeeds even when an optional column is missing."""

data = [("5", "b"), ("15", "b")]
spark_schema = T.StructType(
[
T.StructField("a", T.StringType(), False),
T.StructField("b", T.StringType(), False),
# 'c' column is missing, but it's optional
],
)
df = spark_df(spark, data, spark_schema)
df_out = test_schema_optional_columns.validate(check_obj=df)

# `df_out.pandera.errors` should be empty if validation is successful.
assert (
df_out.pandera.errors == {}
), "No error should be raised in case of a missing optional column."


def test_invalid_field() -> None:
Expand Down

0 comments on commit b7756a8

Please sign in to comment.