From 085caa1bc02bacaad4818ad8e9859ecade6cc783 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 04:01:40 +0530 Subject: [PATCH 01/13] BUG: Fix TypeError in assert_index_equal when comparing CategoricalIndex with check_categorical=True and exact=False --- pandas/_testing/asserters.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index daa5187cdb636..11f0613601c8c 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -322,7 +322,10 @@ def _check_types(left, right, obj: str = "Index") -> None: # skip exact index checking when `check_categorical` is False elif check_exact and check_categorical: if not left.equals(right): - mismatch = left._values != right._values + try: + mismatch = left._values != right._values + except TypeError as e: + raise AssertionError(f"{obj} cannot be compared due to incompatible categorical types.\n{e}") from e if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From c0427e6a359e0f250e9b88c52a7d0acddd3a92a6 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 05:20:13 +0530 Subject: [PATCH 02/13] STYLE: Fix E501 line too long in assert_index_equal error message --- pandas/_testing/asserters.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 11f0613601c8c..e0f698cf8ac6d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -325,7 +325,9 @@ def _check_types(left, right, obj: str = "Index") -> None: try: mismatch = left._values != right._values except TypeError as e: - raise AssertionError(f"{obj} cannot be compared due to incompatible categorical types.\n{e}") from e + raise AssertionError( + f"{obj} cannot be compared due to incompatible categorical types.\n{e}" + ) from e if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From db54fad1ecea88cc806beb79f8f9a7f7500b2759 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 05:27:12 +0530 Subject: [PATCH 03/13] TST: Add test case for categorical index assertion fix --- pandas/_testing/asserters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index e0f698cf8ac6d..f36d6c2b586a8 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -326,7 +326,8 @@ def _check_types(left, right, obj: str = "Index") -> None: mismatch = left._values != right._values except TypeError as e: raise AssertionError( - f"{obj} cannot be compared due to incompatible categorical types.\n{e}" + f"{obj} cannot be compared due to incompatible" + f"categorical types.\n{e}" ) from e if not isinstance(mismatch, np.ndarray): From 5edf8cee7392a862cf359f474336969ec46a712a Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 23:23:04 +0530 Subject: [PATCH 04/13] BUG: Fix (GH#61941) and unit test --- pandas/_testing/asserters.py | 4 +++- pandas/tests/util/test_assert_index_equal.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index f36d6c2b586a8..d5d41e591ff81 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -323,7 +323,9 @@ def _check_types(left, right, obj: str = "Index") -> None: elif check_exact and check_categorical: if not left.equals(right): try: - mismatch = left._values != right._values + mismatch = ( + left._internal_get_values() != right._internal_get_values() + ) except TypeError as e: raise AssertionError( f"{obj} cannot be compared due to incompatible" diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index ab52d6c8e9f39..9786a2f552e00 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -317,3 +317,11 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) else: tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) + +def test_assert_index_equal_categorical_mismatch_categories(): + # GH#61941 + left = CategoricalIndex(["a", "b"], categories=["a", "b"]) + right = CategoricalIndex(["a", "b"], categories=["b", "a"]) + + with pytest.raises(AssertionError, match="cannot be compared due to incompatible"): + tm.assert_index_equal(left, right, check_exact=True, check_categorical=True) From 2b6d3a0d5d95415750b2fe014d3311bf57dd5891 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 14:28:15 +0530 Subject: [PATCH 05/13] Add unit test and (GH#61941) --- pandas/_testing/asserters.py | 11 +++-------- pandas/tests/util/test_assert_index_equal.py | 8 ++++---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d5d41e591ff81..4733e80b07e74 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -323,14 +323,9 @@ def _check_types(left, right, obj: str = "Index") -> None: elif check_exact and check_categorical: if not left.equals(right): try: - mismatch = ( - left._internal_get_values() != right._internal_get_values() - ) - except TypeError as e: - raise AssertionError( - f"{obj} cannot be compared due to incompatible" - f"categorical types.\n{e}" - ) from e + mismatch = left._values != right._values + except TypeError : + mismatch = left._internal_get_values() != right._internal_get_values() if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 9786a2f552e00..3af14f58a7999 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -320,8 +320,8 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 - left = CategoricalIndex(["a", "b"], categories=["a", "b"]) - right = CategoricalIndex(["a", "b"], categories=["b", "a"]) + ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) + ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - with pytest.raises(AssertionError, match="cannot be compared due to incompatible"): - tm.assert_index_equal(left, right, check_exact=True, check_categorical=True) + with pytest.raises(AssertionError, match="Index are different"): + tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) \ No newline at end of file From 08739f2e6f8045a188df99a236080a47a6840474 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 15:42:08 +0530 Subject: [PATCH 06/13] Add unit test and (GH#61941) --- pandas/tests/util/test_assert_index_equal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 3af14f58a7999..38bce762080a0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -322,6 +322,5 @@ def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - with pytest.raises(AssertionError, match="Index are different"): - tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) \ No newline at end of file + tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) From ddf650ebfb4c0055eb966d27cfead2c8d773801c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 16:38:26 +0530 Subject: [PATCH 07/13] Add unit test and (GH#61941) --- pandas/tests/util/test_assert_index_equal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 38bce762080a0..db7c6287bcfff 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -322,5 +322,7 @@ def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) + with pytest.raises(AssertionError, match="Index are different"): tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) + \ No newline at end of file From 3f998d473503fd4c2a5c702680cbae500e3ea61c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 17:17:21 +0530 Subject: [PATCH 08/13] TST: Fix formatting in test_assert_index_equal (via pre-commit) --- pandas/tests/util/test_assert_index_equal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index db7c6287bcfff..bbda55e86ca30 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -318,11 +318,11 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): else: tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) + def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - + with pytest.raises(AssertionError, match="Index are different"): tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) - \ No newline at end of file From 4cc4e77a7cfbf7b84b33df395361546a5743142a Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 20:03:09 +0530 Subject: [PATCH 09/13] style: fix formatting with pre-commit (Ruff) --- pandas/_testing/asserters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 4733e80b07e74..3a948b67f71eb 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -324,8 +324,8 @@ def _check_types(left, right, obj: str = "Index") -> None: if not left.equals(right): try: mismatch = left._values != right._values - except TypeError : - mismatch = left._internal_get_values() != right._internal_get_values() + except TypeError: + mismatch = left._internal_get_values() != right._internal_get_values() if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From f9f62a3016725d652f176b9fa81adb576d7fd845 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 20:52:07 +0530 Subject: [PATCH 10/13] Fix: Check fail --- pandas/_testing/asserters.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3a948b67f71eb..9b3d998607fd0 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -325,7 +325,14 @@ def _check_types(left, right, obj: str = "Index") -> None: try: mismatch = left._values != right._values except TypeError: - mismatch = left._internal_get_values() != right._internal_get_values() + if hasattr(left, "_internal_get_values") and hasattr( + right, "_internal_get_values" + ): + mismatch = ( + left._internal_get_values() != right._internal_get_values() + ) + else: + mismatch = left.values != right.values if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From 4f1551cb48b57f61d664f593cb44c8e6d9280cb3 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Mon, 28 Jul 2025 19:55:15 +0530 Subject: [PATCH 11/13] BUG: Fix boolean column indexing (#61980) --- pandas/core/frame.py | 9 +++++++++ pandas/tests/frame/indexing/test_getitem.py | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48a5596e00061..0b9b85d3f09e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3995,6 +3995,15 @@ def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) + if ( + isinstance(key, (list, np.ndarray)) + and len(key) > 0 + and any(isinstance(k, bool) for k in key) + and all(isinstance(k, (bool, str)) for k in key) + and not (len(key) == len((self.index) and all(isinstance(k, bool) for k in key))) + ): + return self.reindex_columns(key) + if is_hashable(key) and not is_iterator(key) and not isinstance(key, slice): # is_iterator to exclude generator e.g. test_getitem_listlike # As of Python 3.12, slice is hashable which breaks MultiIndex (GH#57500) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 25d6e06a4c675..b03763e5ae503 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -101,6 +101,14 @@ def test_getitem_list_duplicates(self): expected = df.iloc[:, 2:] tm.assert_frame_equal(result, expected) + def test_getitem_bool_column_name(self): + # GH#61980 + data = {"A": [1, 2, 3], "B": [4, 5, 6], True: [7, 8, 9]} + df = DataFrame(data) + result = df[[True]] + expected = DataFrame({True: [7, 8, 9]}) + tm.assert_frame_equal(result, expected) + def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) msg = "\"None of [Index(['baf'], dtype=" From 0a5fc216c80c18d5250d22f58f62593e3f09f5d4 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Mon, 28 Jul 2025 22:55:41 +0530 Subject: [PATCH 12/13] BUG: Fix boolean column indexing (#61980) --- pandas/core/frame.py | 10 ++++++---- pandas/tests/frame/indexing/test_getitem.py | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b9b85d3f09e6..ccae46f1e3dde 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4000,10 +4000,12 @@ def __getitem__(self, key): and len(key) > 0 and any(isinstance(k, bool) for k in key) and all(isinstance(k, (bool, str)) for k in key) - and not (len(key) == len((self.index) and all(isinstance(k, bool) for k in key))) - ): - return self.reindex_columns(key) - + and not ( + len(key) == len(self.index) and all(isinstance(k, bool) for k in key) + ) + ): + return self.reindex_columns(key) + if is_hashable(key) and not is_iterator(key) and not isinstance(key, slice): # is_iterator to exclude generator e.g. test_getitem_listlike # As of Python 3.12, slice is hashable which breaks MultiIndex (GH#57500) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index b03763e5ae503..5d1744725f114 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -101,12 +101,11 @@ def test_getitem_list_duplicates(self): expected = df.iloc[:, 2:] tm.assert_frame_equal(result, expected) - def test_getitem_bool_column_name(self): - # GH#61980 - data = {"A": [1, 2, 3], "B": [4, 5, 6], True: [7, 8, 9]} - df = DataFrame(data) + def test_getitem_single_bool_column(self): + # GH#61980 + df = DataFrame({True: [10, 20, 30]}) result = df[[True]] - expected = DataFrame({True: [7, 8, 9]}) + expected = DataFrame({True: [10, 20, 30]}) tm.assert_frame_equal(result, expected) def test_getitem_dupe_cols(self): From c1d129a20a5ebcfaf4c99517b0d97b4276881e9f Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Mon, 28 Jul 2025 23:35:42 +0530 Subject: [PATCH 13/13] Revert unrelated changes to asserters.py and test_assert_index_equal.py --- pandas/_testing/asserters.py | 12 +----------- pandas/tests/util/test_assert_index_equal.py | 9 --------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 9b3d998607fd0..daa5187cdb636 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -322,17 +322,7 @@ def _check_types(left, right, obj: str = "Index") -> None: # skip exact index checking when `check_categorical` is False elif check_exact and check_categorical: if not left.equals(right): - try: - mismatch = left._values != right._values - except TypeError: - if hasattr(left, "_internal_get_values") and hasattr( - right, "_internal_get_values" - ): - mismatch = ( - left._internal_get_values() != right._internal_get_values() - ) - else: - mismatch = left.values != right.values + mismatch = left._values != right._values if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index bbda55e86ca30..ab52d6c8e9f39 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -317,12 +317,3 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) else: tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) - - -def test_assert_index_equal_categorical_mismatch_categories(): - # GH#61941 - ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) - ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - - with pytest.raises(AssertionError, match="Index are different"): - tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True)