diff --git a/pandas/__init__.py b/pandas/__init__.py index 8b92ad6cdfebb..2d26ade931a06 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -187,7 +187,7 @@ __version__ = v.get("closest-tag", v["version"]) __git_version__ = v.get("full-revisionid") del get_versions, v - +__version__ = "2.3.3.dev0" # module level doc-string __doc__ = """ diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 4fbd71ed03662..eff4ac6ca0165 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -672,8 +672,31 @@ def _read( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds ) -> DataFrame | TextFileReader: """Generic reader of line files.""" - # if we pass a date_format and parse_dates=False, we should not parse the - # dates GH#44366 + engine = kwds.get("engine", "c") + + if engine not in ("c", "python", "pyarrow", "polars"): + raise ValueError(f"Unknown engine: {engine}") + + if engine == "polars": + try: + import polars as pl # type: ignore[import-untyped] + except ImportError: + raise ImportError("Polars is not installed. Please install it with 'pip install polars'.") + + # Filter kwargs that are not supported by Polars + allowed_polars_args = { + "has_header", "columns", "new_columns", "skip_rows", "n_rows", + "encoding", "separator", "quote_char", "comment_char", "null_values" + } + polars_kwargs = {k: v for k, v in kwds.items() if k in allowed_polars_args} + + # Polars doesn't accept Path-like objects directly in all versions, convert to string + path = str(filepath_or_buffer) + + df = pl.read_csv(path, **polars_kwargs).to_pandas() + return df + + # Default pandas behavior if kwds.get("parse_dates", None) is None: if kwds.get("date_format", None) is None: kwds["parse_dates"] = False @@ -1802,7 +1825,7 @@ def _refine_defaults_read( kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN elif on_bad_lines == "skip": kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP - elif callable(on_bad_lines): + elif callable(on_bad_lines): if engine not in ["python", "pyarrow"]: raise ValueError( "on_bad_line can only be a callable function " diff --git a/pandas/tests/io/parser/test_read_csv_polars.py b/pandas/tests/io/parser/test_read_csv_polars.py new file mode 100644 index 0000000000000..4772faaaddfb6 --- /dev/null +++ b/pandas/tests/io/parser/test_read_csv_polars.py @@ -0,0 +1,17 @@ +import pytest + +def test_read_csv_with_polars(tmp_path): + pl = pytest.importorskip("polars") + pd = pytest.importorskip("pandas") + + # Create a simple CSV file + file = tmp_path / "sample.csv" + file.write_text("a,b\n1,2\n3,4") + + # Read using engine='polars' + df = pd.read_csv(file, engine="polars") + + assert df.shape == (2, 2) + assert list(df.columns) == ["a", "b"] + assert df.iloc[0, 0] == 1 + assert df.iloc[1, 1] == 4 diff --git a/pyproject.toml b/pyproject.toml index e013222f8fe79..ea40b46414344 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,9 +16,8 @@ build-backend = "mesonpy" [project] name = 'pandas' -dynamic = [ - 'version' -] +version = "2.3.3.dev0" + description = 'Powerful data structures for data analysis, time series, and statistics' readme = 'README.md' authors = [