avoid running nullable checks if nullable=true

Signed-off-by: Filipe Oliveira <[email protected]>
unionai-oss · Nov 2, 2023 · 7675ae8 · 7675ae8
1 parent 37c24d9
commit 7675ae8
Showing 1 changed file with 10 additions and 4 deletions.
diff --git a/pandera/backends/pyspark/column.py b/pandera/backends/pyspark/column.py
@@ -125,10 +125,16 @@ def coerce_dtype(
 
     @validate_scope(scope=ValidationScope.SCHEMA)
     def check_nullable(self, check_obj: DataFrame, schema):
-        isna = (
-            check_obj.filter(col(schema.name).isNull()).limit(1).count() == 0
-        )
-        passed = schema.nullable or isna
+        # If True, ignore this `nullable` check
+        passed = schema.nullable
+
+        # If False, execute the costly validation
+        if not schema.nullable:
+            passed = (
+                check_obj.filter(col(schema.name).isNull()).limit(1).count()
+                == 0
+            )
+
         return CoreCheckResult(
             check="not_nullable",
             reason_code=SchemaErrorReason.SERIES_CONTAINS_NULLS,