Skip to content

[ENH] Add polars Engine Support to pd.read_csv() #61988

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
__version__ = v.get("closest-tag", v["version"])
__git_version__ = v.get("full-revisionid")
del get_versions, v

__version__ = "2.3.3.dev0"

# module level doc-string
__doc__ = """
Expand Down
29 changes: 26 additions & 3 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,31 @@ def _read(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
) -> DataFrame | TextFileReader:
"""Generic reader of line files."""
# if we pass a date_format and parse_dates=False, we should not parse the
# dates GH#44366
engine = kwds.get("engine", "c")

if engine not in ("c", "python", "pyarrow", "polars"):
raise ValueError(f"Unknown engine: {engine}")

if engine == "polars":
try:
import polars as pl # type: ignore[import-untyped]
except ImportError:
raise ImportError("Polars is not installed. Please install it with 'pip install polars'.")

# Filter kwargs that are not supported by Polars
allowed_polars_args = {
"has_header", "columns", "new_columns", "skip_rows", "n_rows",
"encoding", "separator", "quote_char", "comment_char", "null_values"
}
polars_kwargs = {k: v for k, v in kwds.items() if k in allowed_polars_args}

# Polars doesn't accept Path-like objects directly in all versions, convert to string
path = str(filepath_or_buffer)

df = pl.read_csv(path, **polars_kwargs).to_pandas()
return df

# Default pandas behavior
if kwds.get("parse_dates", None) is None:
if kwds.get("date_format", None) is None:
kwds["parse_dates"] = False
Expand Down Expand Up @@ -1802,7 +1825,7 @@ def _refine_defaults_read(
kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN
elif on_bad_lines == "skip":
kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP
elif callable(on_bad_lines):
elif callable(on_bad_lines):
if engine not in ["python", "pyarrow"]:
raise ValueError(
"on_bad_line can only be a callable function "
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/io/parser/test_read_csv_polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pytest

def test_read_csv_with_polars(tmp_path):
pl = pytest.importorskip("polars")
pd = pytest.importorskip("pandas")

# Create a simple CSV file
file = tmp_path / "sample.csv"
file.write_text("a,b\n1,2\n3,4")

# Read using engine='polars'
df = pd.read_csv(file, engine="polars")

assert df.shape == (2, 2)
assert list(df.columns) == ["a", "b"]
assert df.iloc[0, 0] == 1
assert df.iloc[1, 1] == 4
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ build-backend = "mesonpy"

[project]
name = 'pandas'
dynamic = [
'version'
]
version = "2.3.3.dev0"

description = 'Powerful data structures for data analysis, time series, and statistics'
readme = 'README.md'
authors = [
Expand Down
Loading