From 085caa1bc02bacaad4818ad8e9859ecade6cc783 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 04:01:40 +0530 Subject: [PATCH 01/13] BUG: Fix TypeError in assert_index_equal when comparing CategoricalIndex with check_categorical=True and exact=False --- pandas/_testing/asserters.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index daa5187cdb636..11f0613601c8c 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -322,7 +322,10 @@ def _check_types(left, right, obj: str = "Index") -> None: # skip exact index checking when `check_categorical` is False elif check_exact and check_categorical: if not left.equals(right): - mismatch = left._values != right._values + try: + mismatch = left._values != right._values + except TypeError as e: + raise AssertionError(f"{obj} cannot be compared due to incompatible categorical types.\n{e}") from e if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From c0427e6a359e0f250e9b88c52a7d0acddd3a92a6 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 05:20:13 +0530 Subject: [PATCH 02/13] STYLE: Fix E501 line too long in assert_index_equal error message --- pandas/_testing/asserters.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 11f0613601c8c..e0f698cf8ac6d 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -325,7 +325,9 @@ def _check_types(left, right, obj: str = "Index") -> None: try: mismatch = left._values != right._values except TypeError as e: - raise AssertionError(f"{obj} cannot be compared due to incompatible categorical types.\n{e}") from e + raise AssertionError( + f"{obj} cannot be compared due to incompatible categorical types.\n{e}" + ) from e if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From db54fad1ecea88cc806beb79f8f9a7f7500b2759 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 05:27:12 +0530 Subject: [PATCH 03/13] TST: Add test case for categorical index assertion fix --- pandas/_testing/asserters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index e0f698cf8ac6d..f36d6c2b586a8 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -326,7 +326,8 @@ def _check_types(left, right, obj: str = "Index") -> None: mismatch = left._values != right._values except TypeError as e: raise AssertionError( - f"{obj} cannot be compared due to incompatible categorical types.\n{e}" + f"{obj} cannot be compared due to incompatible" + f"categorical types.\n{e}" ) from e if not isinstance(mismatch, np.ndarray): From 5edf8cee7392a862cf359f474336969ec46a712a Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Fri, 25 Jul 2025 23:23:04 +0530 Subject: [PATCH 04/13] BUG: Fix (GH#61941) and unit test --- pandas/_testing/asserters.py | 4 +++- pandas/tests/util/test_assert_index_equal.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index f36d6c2b586a8..d5d41e591ff81 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -323,7 +323,9 @@ def _check_types(left, right, obj: str = "Index") -> None: elif check_exact and check_categorical: if not left.equals(right): try: - mismatch = left._values != right._values + mismatch = ( + left._internal_get_values() != right._internal_get_values() + ) except TypeError as e: raise AssertionError( f"{obj} cannot be compared due to incompatible" diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index ab52d6c8e9f39..9786a2f552e00 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -317,3 +317,11 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) else: tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) + +def test_assert_index_equal_categorical_mismatch_categories(): + # GH#61941 + left = CategoricalIndex(["a", "b"], categories=["a", "b"]) + right = CategoricalIndex(["a", "b"], categories=["b", "a"]) + + with pytest.raises(AssertionError, match="cannot be compared due to incompatible"): + tm.assert_index_equal(left, right, check_exact=True, check_categorical=True) From 2b6d3a0d5d95415750b2fe014d3311bf57dd5891 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 14:28:15 +0530 Subject: [PATCH 05/13] Add unit test and (GH#61941) --- pandas/_testing/asserters.py | 11 +++-------- pandas/tests/util/test_assert_index_equal.py | 8 ++++---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d5d41e591ff81..4733e80b07e74 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -323,14 +323,9 @@ def _check_types(left, right, obj: str = "Index") -> None: elif check_exact and check_categorical: if not left.equals(right): try: - mismatch = ( - left._internal_get_values() != right._internal_get_values() - ) - except TypeError as e: - raise AssertionError( - f"{obj} cannot be compared due to incompatible" - f"categorical types.\n{e}" - ) from e + mismatch = left._values != right._values + except TypeError : + mismatch = left._internal_get_values() != right._internal_get_values() if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 9786a2f552e00..3af14f58a7999 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -320,8 +320,8 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 - left = CategoricalIndex(["a", "b"], categories=["a", "b"]) - right = CategoricalIndex(["a", "b"], categories=["b", "a"]) + ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) + ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - with pytest.raises(AssertionError, match="cannot be compared due to incompatible"): - tm.assert_index_equal(left, right, check_exact=True, check_categorical=True) + with pytest.raises(AssertionError, match="Index are different"): + tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) \ No newline at end of file From 08739f2e6f8045a188df99a236080a47a6840474 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 15:42:08 +0530 Subject: [PATCH 06/13] Add unit test and (GH#61941) --- pandas/tests/util/test_assert_index_equal.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 3af14f58a7999..38bce762080a0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -322,6 +322,5 @@ def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - with pytest.raises(AssertionError, match="Index are different"): - tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) \ No newline at end of file + tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) From ddf650ebfb4c0055eb966d27cfead2c8d773801c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 16:38:26 +0530 Subject: [PATCH 07/13] Add unit test and (GH#61941) --- pandas/tests/util/test_assert_index_equal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 38bce762080a0..db7c6287bcfff 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -322,5 +322,7 @@ def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) + with pytest.raises(AssertionError, match="Index are different"): tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) + \ No newline at end of file From 3f998d473503fd4c2a5c702680cbae500e3ea61c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 17:17:21 +0530 Subject: [PATCH 08/13] TST: Fix formatting in test_assert_index_equal (via pre-commit) --- pandas/tests/util/test_assert_index_equal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index db7c6287bcfff..bbda55e86ca30 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -318,11 +318,11 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): else: tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) + def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) - + with pytest.raises(AssertionError, match="Index are different"): tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) - \ No newline at end of file From 4cc4e77a7cfbf7b84b33df395361546a5743142a Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 20:03:09 +0530 Subject: [PATCH 09/13] style: fix formatting with pre-commit (Ruff) --- pandas/_testing/asserters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 4733e80b07e74..3a948b67f71eb 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -324,8 +324,8 @@ def _check_types(left, right, obj: str = "Index") -> None: if not left.equals(right): try: mismatch = left._values != right._values - except TypeError : - mismatch = left._internal_get_values() != right._internal_get_values() + except TypeError: + mismatch = left._internal_get_values() != right._internal_get_values() if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From f9f62a3016725d652f176b9fa81adb576d7fd845 Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Sat, 26 Jul 2025 20:52:07 +0530 Subject: [PATCH 10/13] Fix: Check fail --- pandas/_testing/asserters.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3a948b67f71eb..9b3d998607fd0 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -325,7 +325,14 @@ def _check_types(left, right, obj: str = "Index") -> None: try: mismatch = left._values != right._values except TypeError: - mismatch = left._internal_get_values() != right._internal_get_values() + if hasattr(left, "_internal_get_values") and hasattr( + right, "_internal_get_values" + ): + mismatch = ( + left._internal_get_values() != right._internal_get_values() + ) + else: + mismatch = left.values != right.values if not isinstance(mismatch, np.ndarray): mismatch = cast("ExtensionArray", mismatch).fillna(True) From 5d5d6682a58246b0d2bd8d6250c3ba91a095dbbd Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Mon, 28 Jul 2025 23:12:11 +0530 Subject: [PATCH 11/13] TST: Add test for CategoricalIndex vs Index in assert_index_equal --- pandas/tests/util/test_assert_index_equal.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index bbda55e86ca30..caf33f16638f1 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -321,8 +321,11 @@ def test_assert_multi_index_dtype_check_categorical(check_categorical): def test_assert_index_equal_categorical_mismatch_categories(): # GH#61941 - ci1 = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) - ci2 = CategoricalIndex(["a", "x", "c"], categories=["a", "b", "c"], ordered=False) + ci = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"], ordered=False) + idx = Index(["a", "b", "c"]) with pytest.raises(AssertionError, match="Index are different"): - tm.assert_index_equal(ci1, ci2, check_exact=False, check_categorical=True) + tm.assert_index_equal( + ci, + idx, + ) From 459298d812a97e24f5c1948eb2a838f4c170089c Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Tue, 29 Jul 2025 00:48:07 +0530 Subject: [PATCH 12/13] TST: Use isinstance for CategoricalIndex in assert_index_equal --- pandas/_testing/asserters.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 9b3d998607fd0..fe445ffb15f49 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -35,6 +35,7 @@ import pandas as pd from pandas import ( Categorical, + CategoricalIndex, DataFrame, DatetimeIndex, Index, @@ -325,8 +326,8 @@ def _check_types(left, right, obj: str = "Index") -> None: try: mismatch = left._values != right._values except TypeError: - if hasattr(left, "_internal_get_values") and hasattr( - right, "_internal_get_values" + if isinstance(left, CategoricalIndex) and isinstance( + right, CategoricalIndex ): mismatch = ( left._internal_get_values() != right._internal_get_values() From ecaeb64b846bc4595bc8a2d64da0d758dcd966bd Mon Sep 17 00:00:00 2001 From: Aniket Singh Yadav Date: Tue, 29 Jul 2025 23:37:10 +0530 Subject: [PATCH 13/13] Use the public .codes property for internal comparison of CategoricalIndex --- pandas/_testing/asserters.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index fe445ffb15f49..8ecc114293f5f 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -329,9 +329,7 @@ def _check_types(left, right, obj: str = "Index") -> None: if isinstance(left, CategoricalIndex) and isinstance( right, CategoricalIndex ): - mismatch = ( - left._internal_get_values() != right._internal_get_values() - ) + mismatch = left.codes != right.codes else: mismatch = left.values != right.values