Skip to content

BUG: Fix Series.reindex losing values when reindexing to MultiIndex #61969

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 7, 2025
Merged
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,8 @@ MultiIndex
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
- Bug in :meth:`DataFrame.reindex` where reindexing a Dataframe with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`)
- Bug in :meth:`Series.reindex` where reindexing a Series with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`)

I/O
^^^
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5355,6 +5355,18 @@ def reindex(
limit: int | None = None,
tolerance=None,
) -> DataFrame:
# Automatically detect matching level when reindexing from Index to MultiIndex.
# This prevents values from being incorrectly set to NaN when the source index
# name matches a level name in the target MultiIndex. Only applies when source
# is not already a MultiIndex.
if (
level is None
and index is not None
and isinstance(index, MultiIndex)
and not isinstance(self.index, MultiIndex)
and self.index.name in index.names
):
level = self.index.name
return super().reindex(
labels=labels,
index=index,
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4886,6 +4886,15 @@ def reindex( # type: ignore[override]
limit: int | None = None,
tolerance=None,
) -> Series:
# Automatically detect matching level when reindexing from Index to MultiIndex.
# This prevents values from being incorrectly set to NaN when the source index
# name matches a level name in the target MultiIndex
if (
level is None
and isinstance(index, MultiIndex)
and self.index.name in index.names
):
level = self.index.name
return super().reindex(
index=index,
method=method,
Expand Down
52 changes: 52 additions & 0 deletions pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1258,3 +1258,55 @@ def test_invalid_method(self):
msg = "Invalid fill method"
with pytest.raises(ValueError, match=msg):
df.reindex([1, 0, 2], method="asfreq")

def test_reindex_index_name_matches_multiindex_level(self):
"""
Test automatic level detection when reindexing from Index to MultiIndex.
When source index name matches a level name in target MultiIndex and level
is not specified, should behave same as if level was explicitly set.
"""
# Create source DataFrame with named Index
df = DataFrame(
{"value": [1, 2], "other": ["A", "B"]},
index=Index([10, 20], name="a"),
)

# Create target MultiIndex with matching level name
target = MultiIndex.from_product(
[[10, 20], ["x", "y"]],
names=["a", "b"], # 'a' matches source index name
)

result = df.reindex(index=target)
expected = df.reindex(index=target, level="a")

tm.assert_frame_equal(result, expected)

# Verify values are propagated correctly
expected_values = {
(10, "x"): {"value": 1, "other": "A"},
(10, "y"): {"value": 1, "other": "A"},
(20, "x"): {"value": 2, "other": "B"},
(20, "y"): {"value": 2, "other": "B"},
}
for idx, expected_row in expected_values.items():
for col, val in expected_row.items():
assert result.loc[idx, col] == val

def test_reindex_index_name_no_match_multiindex_level(self):
"""
Test reindexing behavior when source index name doesn't match any level
in target MultiIndex. Should fill with NaN since there's no level match.
"""
df = DataFrame({"value": [1, 2]}, index=Index([10, 20], name="different_name"))

target = MultiIndex.from_product([[10, 20], ["x", "y"]], names=["a", "b"])

result = df.reindex(index=target)

# Should fill with NaN since no level match
assert result.isna().all().all()

# Verify shape is correct
assert result.index.equals(target)
assert result.columns.equals(df.columns)
46 changes: 46 additions & 0 deletions pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,49 @@ def test_reindex_expand_nonnano_nat(dtype):
np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"name, expected_match_level_a",
[
# Source index has matching name - should match level "a"
("a", True),
# Source index has no name - should not match any level
(None, False),
# Source index name doesn't match any level - should not match
("x", False),
],
)
def test_reindex_multiindex_automatic_level(name, expected_match_level_a):
"""
Test automatic level detection when reindexing from Index to MultiIndex.
"""
series = Series([26.73, 24.255], index=Index([81, 82], name=name))
target = MultiIndex.from_product(
[[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"]
)

result = series.reindex(target)

if expected_match_level_a:
# Should match behavior of explicit level="a"
expected = series.reindex(target, level="a")
else:
# Should contain all NaN values
expected = Series(np.nan, index=target, dtype=series.dtype)

tm.assert_series_equal(result, expected)


def test_reindex_multiindex_explicit_level_overrides():
"""
Test that explicit level parameter overrides automatic detection.
"""
series = Series([26.73, 24.255], index=Index([81, 82], name="a"))
target = MultiIndex.from_product(
[[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"]
)

result = series.reindex(target, level=0)
expected = series.reindex(target, level="a")
tm.assert_series_equal(result, expected)
Loading