Skip to content

Commit 0044841

Browse files
committed
BUG: Raise ParserWarning when on_bad_lines is callable and index_col is set (GH#61882)
1 parent 1d153bb commit 0044841

File tree

2 files changed

+30
-14
lines changed

2 files changed

+30
-14
lines changed

pandas/io/parsers/readers.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -668,23 +668,25 @@ def _validate_names(names: Sequence[Hashable] | None) -> None:
668668
raise ValueError("Names should be an ordered collection.")
669669

670670

671-
def _read(
672-
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
673-
) -> DataFrame | TextFileReader:
674-
"""Generic reader of line files."""
675-
# if we pass a date_format and parse_dates=False, we should not parse the
676-
# dates GH#44366
677-
if kwds.get("parse_dates", None) is None:
678-
if kwds.get("date_format", None) is None:
679-
kwds["parse_dates"] = False
680-
else:
681-
kwds["parse_dates"] = True
671+
def _read(filepath_or_buffer, kwds):
672+
import warnings
673+
from pandas.errors import ParserWarning
682674

683-
# Extract some of the arguments (pass chunksize on).
684675
iterator = kwds.get("iterator", False)
685676
chunksize = kwds.get("chunksize", None)
686677

687-
# Check type of encoding_errors
678+
# Your inserted warning
679+
on_bad_lines = kwds.get("on_bad_lines", "error")
680+
index_col = kwds.get("index_col", None)
681+
682+
if callable(on_bad_lines) and index_col is not None:
683+
warnings.warn(
684+
"When using a callable for on_bad_lines with index_col set, "
685+
"ParserWarning should be explicitly handled. This behavior may change.",
686+
ParserWarning,
687+
stacklevel=3,
688+
)
689+
688690
errors = kwds.get("encoding_errors", "strict")
689691
if not isinstance(errors, str):
690692
raise ValueError(
@@ -716,7 +718,8 @@ def _read(
716718
return parser
717719

718720
with parser:
719-
return parser.read(nrows)
721+
return parser.read(nrows) # <== ⚠ THIS LINE must be INSIDE the function!
722+
720723

721724

722725
@overload
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import pandas as pd
2+
3+
def my_bad_line_handler(bad_line):
4+
print("Bad line encountered:", bad_line)
5+
return None
6+
7+
df = pd.read_csv(
8+
"test.csv", # make sure this file exists in same folder or adjust the path
9+
on_bad_lines=my_bad_line_handler,
10+
index_col=0,
11+
engine="python", # ✅ add this line
12+
)
13+
print(df)

0 commit comments

Comments
 (0)