diff --git a/doc/make.py b/doc/make.py index 9542563dc037b..a22bdeb1c6296 100755 --- a/doc/make.py +++ b/doc/make.py @@ -130,14 +130,14 @@ def _sphinx_build(self, kind: str): Examples -------- - >>> DocBuilder(num_jobs=4)._sphinx_build("html") + >>> DocBuilder(num_jobs=1)._sphinx_build("html") """ if kind not in ("html", "latex", "linkcheck"): raise ValueError(f"kind must be html, latex or linkcheck, not {kind}") cmd = ["sphinx-build", "-b", kind] if self.num_jobs: - cmd += ["-j", self.num_jobs] + cmd += ["-j", "1"] if self.warnings_are_errors: cmd += ["-W", "--keep-going"] if self.verbosity: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index be7a07dface0a..91bb463d6ac11 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -817,6 +817,7 @@ I/O - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) - Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`) - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) +- Bug in :func:`read_csv` with ``engine="python"`` and callable ``on_bad_lines`` where a ``ParserWarning`` for extra fields returned by the callable was only raised when ``index_col`` was ``None``. Now the warning is consistently raised regardless of ``index_col`` (:issue:`#61837`) Period ^^^^^^ diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 23efc9c87e07c..acb458efa71b2 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -614,16 +614,10 @@ def _check_data_length( columns: list of column names data: list of array-likes containing the data column-wise. """ - if not self.index_col and len(columns) != len(data) and columns: - empty_str = is_object_dtype(data[-1]) and data[-1] == "" - # error: No overload variant of "__ror__" of "ndarray" matches - # argument type "ExtensionArray" - empty_str_or_na = empty_str | isna(data[-1]) # type: ignore[operator] - if len(columns) == len(data) - 1 and np.all(empty_str_or_na): - return + if columns and len(data) != len(columns): warnings.warn( - "Length of header or names does not match length of data. This leads " - "to a loss of data with index_col=False.", + f"Length of header or names ({len(columns)}) does not match number of " + f"fields in line ({len(data)}). Extra field will be dropped.", ParserWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index a5bb151e84f47..c5524b1f89ee1 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -322,6 +322,29 @@ def test_malformed_skipfooter(python_parser_only): parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1) +def test_on_bad_lines_extra_fields_warns(python_parser_only): + parser = python_parser_only + data = """id,field_1,field_2 +101,A,B +102,C,D, E +103,F,G +""" + + def line_fixer(_line): + return ["1", "2", "3", "4", "5"] + + expected_warning = ( + r"Length of header or names \(3\) does not match number of fields in " + r"line \(5\)\. Extra field will be dropped\." + ) + + for index_col in [None, 0]: + with tm.assert_produces_warning(ParserWarning, match=expected_warning): + parser.read_csv( + StringIO(data), on_bad_lines=line_fixer, index_col=index_col + ) + + def test_python_engine_file_no_next(python_parser_only): parser = python_parser_only