TST (string dtype): resolve xfails for frame methods

pandas-dev · Nov 16, 2024 · a2e8dc3 · a2e8dc3
1 parent fae3e80
commit a2e8dc3
Show file tree

Hide file tree

Showing 10 changed files with 12 additions and 41 deletions.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6273,6 +6273,10 @@ class    max    type
             else:
                 to_insert = ((self.index, None),)
 
+            if len(new_obj.columns) == 0 and names:
+                target_dtype = Index(names).dtype
+                new_obj.columns = new_obj.columns.astype(target_dtype)
+
             multi_col = isinstance(self.columns, MultiIndex)
             for j, (lev, lab) in enumerate(to_insert, start=1):
                 i = self.index.nlevels - j

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -2362,5 +2362,6 @@ def external_values(values: ArrayLike) -> ArrayLike:
         values.flags.writeable = False
 
     # TODO(CoW) we should also mark our ExtensionArrays as read-only
-
+    if isinstance(values, ExtensionArray):
+        ...  # this is why test_to_dict_of_blocks_item_cache fails
     return values
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -745,7 +743,6 @@ def test_astype_tz_object_conversion(self, tz):
         result = result.astype({"tz": "datetime64[ns, Europe/London]"})
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_astype_dt64_to_string(
         self, frame_or_series, tz_naive_fixture, using_infer_string
     ):
@@ -767,13 +764,9 @@ def test_astype_dt64_to_string(
         if frame_or_series is DataFrame:
             item = item.iloc[0]
         if using_infer_string:
-            assert item is np.nan
-        else:
             assert item is pd.NA
-
-        # For non-NA values, we should match what we get for non-EA str
-        alt = obj.astype(str)
-        assert np.all(alt.iloc[1:] == result.iloc[1:])
+        else:
+            assert item is np.nan
 
     def test_astype_td64_to_string(self, frame_or_series):
         # GH#41409

diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import is_dtype_equal
 
@@ -32,7 +30,6 @@ def test_combine_first_mixed(self):
         combined = f.combine_first(g)
         tm.assert_frame_equal(combined, exp)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_combine_first(self, float_frame, using_infer_string):
         # disjoint
         head, tail = float_frame[:5], float_frame[5:]
@@ -79,9 +76,7 @@ def test_combine_first(self, float_frame, using_infer_string):
         tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])
 
         # corner cases
-        warning = FutureWarning if using_infer_string else None
-        with tm.assert_produces_warning(warning, match="empty entries"):
-            comb = float_frame.combine_first(DataFrame())
+        comb = float_frame.combine_first(DataFrame())
         tm.assert_frame_equal(comb, float_frame)
 
         comb = DataFrame().combine_first(float_frame)

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -320,7 +318,6 @@ def test_corrwith_non_timeseries_data(self):
         for row in index[:4]:
             tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_corrwith_with_objects(self, using_infer_string):
         df1 = DataFrame(
             np.random.default_rng(2).standard_normal((10, 4)),
@@ -334,9 +331,7 @@ def test_corrwith_with_objects(self, using_infer_string):
         df2["obj"] = "bar"
 
         if using_infer_string:
-            import pyarrow as pa
-
-            with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
+            with pytest.raises(TypeError, match="Cannot perform reduction"):
                 df1.corrwith(df2)
         else:
             with pytest.raises(TypeError, match="Could not convert"):

diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -184,13 +182,11 @@ def test_dropna_multiple_axes(self):
         with pytest.raises(TypeError, match="supplying multiple axes"):
             inp.dropna(how="all", axis=(0, 1), inplace=True)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_dropna_tz_aware_datetime(self):
         # GH13407
-        df = DataFrame()
         dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc())
         dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc())
-        df["Time"] = [dt1]
+        df = DataFrame({"Time": [dt1]})
         result = df.dropna(axis=0)
         expected = DataFrame({"Time": [dt1]})
         tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
 import pandas as pd
@@ -135,13 +133,9 @@ def test_dtypes_timedeltas(self):
         )
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_frame_apply_np_array_return_type(self, using_infer_string):
         # GH 35517
         df = DataFrame([["foo"]])
         result = df.apply(lambda col: np.array("bar"))
-        if using_infer_string:
-            expected = Series([np.array(["bar"])])
-        else:
-            expected = Series(["bar"])
+        expected = Series(np.array(["bar"]), dtype=object)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
@@ -64,7 +64,6 @@ def test_interpolate_inplace(self, frame_or_series, request):
         assert np.shares_memory(orig, obj.values)
         assert orig.squeeze()[1] == 1.5
 
-    # TODO(infer_string) raise proper TypeError in case of string dtype
     @pytest.mark.xfail(
         using_string_dtype(), reason="interpolate doesn't work for string"
     )

diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
@@ -644,7 +642,6 @@ def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
         tm.assert_frame_equal(res, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "array, dtype",
     [

diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     MultiIndex,
@@ -27,7 +25,6 @@ def test_no_copy_blocks(self, float_frame):
         assert _last_df is not None and not _last_df[column].equals(df[column])
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 def test_to_dict_of_blocks_item_cache():
     # Calling to_dict_of_blocks should not poison item_cache
     df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]})