From cd8b49d9b560215b653b533728713fd5591f4402 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 19:00:04 +0200 Subject: [PATCH 01/45] Eager mode --- narwhals/__init__.py | 2 + narwhals/_arrow/series.py | 27 +++++++++- narwhals/_compliant/series.py | 11 +++- narwhals/_pandas_like/series.py | 16 +++++- narwhals/_polars/series.py | 14 ++++- narwhals/functions.py | 68 ++++++++++++++++++++++++- tests/expr_and_series/int_range_test.py | 54 ++++++++++++++++++++ 7 files changed, 186 insertions(+), 6 deletions(-) create mode 100644 tests/expr_and_series/int_range_test.py diff --git a/narwhals/__init__.py b/narwhals/__init__.py index 1c6dfbc0e4..6b6b971112 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -58,6 +58,7 @@ from_dict, from_numpy, get_level, + int_range, len_ as len, lit, max, @@ -143,6 +144,7 @@ "generate_temporary_column_name", "get_level", "get_native_namespace", + "int_range", "is_ordered_categorical", "len", "lit", diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 6c2b551ea9..6eb413a8f2 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -59,7 +59,7 @@ ) from narwhals._compliant.series import HistData from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import DType + from narwhals.dtypes import DType, IntegerType from narwhals.typing import ( ClosedInterval, FillNullStrategy, @@ -161,6 +161,31 @@ def from_iterable( chunked_array([data], dtype_pa), name=name, context=context ) + @classmethod + def _int_range( + cls, + start: int, + end: int, + step: int, + dtype: IntegerType | type[IntegerType], + context: _LimitedContext, + ) -> Self: + version = context._version + dtype_pa = narwhals_to_native_dtype(dtype, version) + name = "literal" + if cls._implementation._backend_version() < (21, 0, 0): # pragma: no cover + import numpy as np # ignore-banned-import + + data = np.arange(start=start, stop=end, step=step) + else: + data = pc.cast( + pa.arange(start=start, stop=end, step=step), # type: ignore[attr-defined] + dtype_pa, + ) + return cls.from_native( + chunked_array([data], dtype_pa), name=name, context=context + ) + def _from_scalar(self, value: Any) -> Self: if hasattr(value, "as_py"): value = value.as_py() diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 788ec0bc9f..be8a12cfb4 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -39,7 +39,7 @@ from narwhals._compliant.expr import CompliantExpr, EagerExpr from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace from narwhals._utils import Implementation, Version, _LimitedContext - from narwhals.dtypes import DType + from narwhals.dtypes import DType, IntegerType from narwhals.series import Series from narwhals.typing import ( ClosedInterval, @@ -122,6 +122,15 @@ def from_iterable( name: str = "", dtype: IntoDType | None = None, ) -> Self: ... + @classmethod + def _int_range( + cls, + start: int, + end: int, + step: int, + dtype: IntegerType | type[IntegerType], + context: _LimitedContext, + ) -> Self: ... def to_narwhals(self) -> Series[NativeSeriesT]: return self._version.series(self, level="full") diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 9d985353bc..d1b8a6bd9e 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -41,7 +41,7 @@ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import DType + from narwhals.dtypes import DType, IntegerType from narwhals.typing import ( ClosedInterval, FillNullStrategy, @@ -182,6 +182,20 @@ def from_iterable( kwds["index"] = index return cls.from_native(ns.Series(data, name=name, **kwds), context=context) + @classmethod + def _int_range( + cls, + start: int, + end: int, + step: int, + dtype: IntegerType | type[IntegerType], + context: _LimitedContext, + ) -> Self: + impl = context._implementation + array_funcs = import_array_module(impl) + data = array_funcs.arange(start, end, step) + return cls.from_iterable(data, context=context, name="literal", dtype=dtype) + @staticmethod def _is_native(obj: Any) -> TypeIs[Any]: return is_pandas_like_series(obj) # pragma: no cover diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 170c6e5150..b17141def9 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -22,13 +22,14 @@ import pandas as pd import pyarrow as pa + from polars.datatypes import IntegerType as PlIntegerType from typing_extensions import Self, TypeAlias, TypeIs from narwhals._polars.dataframe import Method, PolarsDataFrame from narwhals._polars.expr import PolarsExpr from narwhals._polars.namespace import PolarsNamespace from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import DType + from narwhals.dtypes import DType, IntegerType from narwhals.series import Series from narwhals.typing import Into1DArray, IntoDType, MultiIndexSelector, _1DArray @@ -164,6 +165,17 @@ def from_iterable( native = pl.Series(name=name, values=cast("Sequence[Any]", data), dtype=dtype_pl) return cls.from_native(native, context=context) + @classmethod + def _int_range( + cls, start: int, end: int, step: int, dtype: IntegerType, context: _LimitedContext + ) -> Self: + version = context._version + dtype_pl: PlIntegerType = narwhals_to_native_dtype(dtype, version) # type: ignore[assignment] + return cls.from_native( + pl.int_range(start=start, end=end, step=step, dtype=dtype_pl, eager=True), + context=context, + ) + @staticmethod def _is_native(obj: pl.Series | Any) -> TypeIs[pl.Series]: return isinstance(obj, pl.Series) diff --git a/narwhals/functions.py b/narwhals/functions.py index 9bc7501b9f..635ac29af8 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -4,7 +4,7 @@ import sys from collections.abc import Iterable, Mapping, Sequence from functools import partial -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, cast, overload from narwhals._expression_parsing import ( ExprKind, @@ -33,6 +33,7 @@ is_numpy_array_2d, is_pyarrow_table, ) +from narwhals.dtypes import Int64 from narwhals.exceptions import InvalidOperationError from narwhals.expr import Expr from narwhals.series import Series @@ -46,7 +47,7 @@ from narwhals._compliant import CompliantExpr, CompliantNamespace from narwhals._translate import IntoArrowTable from narwhals.dataframe import DataFrame, LazyFrame - from narwhals.dtypes import DType + from narwhals.dtypes import DType, IntegerType from narwhals.schema import Schema from narwhals.typing import ( ConcatMethod, @@ -1918,3 +1919,66 @@ def coalesce( ), ExprMetadata.from_horizontal_op(*flat_exprs), ) + + +@overload +def int_range( + start: int | IntoExpr, + end: int | IntoExpr | None = None, + step: int = 1, + *, + dtype: type[IntegerType] | IntegerType = Int64, + eager: None = None, +) -> Expr: ... + + +@overload +def int_range( + start: int | IntoExpr, + end: int | IntoExpr | None = None, + step: int = 1, + *, + dtype: type[IntegerType] | IntegerType = Int64, + eager: ModuleType | Implementation | str, +) -> Series[Any]: ... + + +def int_range( + start: int | IntoExpr, + end: int | IntoExpr | None = None, + step: int = 1, + *, + dtype: type[IntegerType] | IntegerType = Int64, + eager: ModuleType | Implementation | str | None = None, +) -> Expr | Series[Any]: + from narwhals._utils import isinstance_or_issubclass + from narwhals.dtypes import IntegerType + + if not isinstance_or_issubclass(dtype, IntegerType): + from narwhals.exceptions import ComputeError + + msg = f"non-integer `dtype` passed to `int_range`: {dtype}" + raise ComputeError(msg) + + if end is None: + end = start + start = 0 + + if not eager: + return Expr( + lambda plx: plx.int_range(start=start, end=end, step=step, dtype=dtype), + ExprMetadata.selector_single(), + ) + + impl = Implementation.from_backend(eager) + if is_eager_allowed(impl): + assert isinstance(start, int) # noqa: S101, help mypy + assert isinstance(end, int) # noqa: S101, help mypy + ns = Version.MAIN.namespace.from_backend(impl).compliant + series = ns._series._int_range( + start=start, end=end, step=step, dtype=dtype, context=ns + ) + return series.to_narwhals() + + msg = f"Cannot create a Series from a lazy backend. Found: {impl}" + raise ValueError(msg) diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py new file mode 100644 index 0000000000..8b56348e9c --- /dev/null +++ b/tests/expr_and_series/int_range_test.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +import narwhals as nw +from narwhals import Implementation +from tests.utils import assert_equal_data +from narwhals.exceptions import ComputeError + +if TYPE_CHECKING: + from narwhals.dtypes import DType, IntegerType + +EAGER_BACKENDS = ( + Implementation.PANDAS, + Implementation.PYARROW, + Implementation.POLARS, +) + +@pytest.mark.parametrize("impl", EAGER_BACKENDS) +@pytest.mark.parametrize( + ("start", "end", "step", "dtype"), + [ + (0, 0, 1, nw.UInt8()), + (0, 3, 1, nw.UInt16), + (-3, 0, -1, nw.Int16()), + (0, 3, 2, nw.Int64), + (3, None, 1, nw.UInt32), + (3, None, 2, nw.Int8()), + ], +) +def test_int_range_eager( + start: int, + end: int | None, + step: int, + dtype: type[IntegerType] | IntegerType, + impl: nw.Implementation, +) -> None: + series = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=impl) + + assert series.dtype == dtype + if end is None: + end = start + start = 0 + assert_equal_data({"a": series}, {"a": list(range(start, end, step))}) + + +@pytest.mark.parametrize("dtype", [nw.List, nw.Float64(), nw.Float32, nw.Decimal, nw.String()]) +def test_int_range_non_int_dtype(dtype: DType) -> None: + + msg = f"non-integer `dtype` passed to `int_range`: {dtype}" + with pytest.raises(ComputeError, match=msg): + nw.int_range(start=0, end=3, dtype=dtype, eager=None) # type: ignore[arg-type] From 74b94c18d020cf94671af5a58aa5ca8740f89da3 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 20:11:06 +0200 Subject: [PATCH 02/45] lazy WIP --- narwhals/_arrow/namespace.py | 27 ++++++++++++++ narwhals/_compliant/namespace.py | 10 +++++- narwhals/_pandas_like/namespace.py | 29 +++++++++++++++ narwhals/_pandas_like/series.py | 2 +- narwhals/_polars/namespace.py | 21 +++++++++++ narwhals/_polars/series.py | 7 +++- narwhals/functions.py | 48 ++++++++++++++++++------- tests/expr_and_series/int_range_test.py | 40 ++++++++++++++++----- 8 files changed, 161 insertions(+), 23 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 2d6e3096af..9bc7e50ca5 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -26,6 +26,7 @@ from narwhals._arrow.typing import ArrayOrScalar, ChunkedArrayAny, Incomplete from narwhals._compliant.typing import ScalarKwargs from narwhals._utils import Version + from narwhals.dtypes import IntegerType from narwhals.typing import IntoDType, NonNestedLiteral @@ -284,6 +285,32 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: context=self, ) + def int_range( + self, + start: int | ArrowExpr, + end: int | ArrowExpr, + step: int, + *, + dtype: IntegerType | type[IntegerType], + ) -> ArrowExpr: + def func(df: ArrowDataFrame) -> list[ArrowSeries]: + start_value = start(df)[0].item() if isinstance(start, ArrowExpr) else start + end_value = end(df)[0].item() if isinstance(end, ArrowExpr) else end + return [ + ArrowSeries._int_range( + start=start_value, end=end_value, step=step, dtype=dtype, context=self + ) + ] + + return self._expr._from_callable( + func=func, + depth=0, + function_name="int_range", + evaluate_output_names=lambda _df: ["literal"], + alias_output_names=None, + context=self, + ) + class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ChunkedArrayAny"]): @property diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index bb998cc815..f6ab8d96fe 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -31,7 +31,7 @@ from narwhals._compliant.selectors import CompliantSelectorNamespace from narwhals._compliant.when_then import CompliantWhen, EagerWhen from narwhals._utils import Implementation, Version - from narwhals.dtypes import DType + from narwhals.dtypes import DType, IntegerType from narwhals.schema import Schema from narwhals.typing import ( ConcatMethod, @@ -92,6 +92,14 @@ def when( def concat_str( self, *exprs: CompliantExprT, separator: str, ignore_nulls: bool ) -> CompliantExprT: ... + def int_range( + self, + start: int | CompliantExprT, + end: int | CompliantExprT, + step: int, + *, + dtype: IntegerType | type[IntegerType], + ) -> CompliantExprT: ... @property def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... @property diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 637c2ffae9..3017512ce7 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -25,6 +25,7 @@ from narwhals._compliant.typing import ScalarKwargs from narwhals._utils import Implementation, Version + from narwhals.dtypes import IntegerType from narwhals.typing import IntoDType, NonNestedLiteral @@ -370,6 +371,34 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: context=self, ) + def int_range( + self, + start: int | PandasLikeExpr, + end: int | PandasLikeExpr, + step: int, + *, + dtype: IntegerType | type[IntegerType], + ) -> PandasLikeExpr: + def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: + start_value = ( + start(df)[0].item() if isinstance(start, PandasLikeExpr) else start + ) + end_value = end(df)[0].item() if isinstance(end, PandasLikeExpr) else end + return [ + PandasLikeSeries._int_range( + start=start_value, end=end_value, step=step, dtype=dtype, context=self + ) + ] + + return self._expr._from_callable( + func=func, + depth=0, + function_name="int_range", + evaluate_output_names=lambda _df: ["literal"], + alias_output_names=None, + context=self, + ) + class _NativeConcat(Protocol[NativeDataFrameT, NativeSeriesT]): @overload diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index d1b8a6bd9e..5a07d3721d 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -325,7 +325,7 @@ def cast(self, dtype: IntoDType) -> Self: ) return self._with_native(self.native.astype(pd_dtype), preserve_broadcast=True) - def item(self, index: int | None) -> Any: + def item(self, index: int | None = None) -> Any: # cuDF doesn't have Series.item(). if index is None: if len(self) != 1: diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 0e61f4f68e..11d708c1f2 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -16,10 +16,13 @@ from collections.abc import Iterable, Mapping, Sequence from datetime import timezone + from polars.datatypes import IntegerType as PlIntegerType + from narwhals._compliant import CompliantSelectorNamespace, CompliantWhen from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame from narwhals._polars.typing import FrameT from narwhals._utils import Version, _LimitedContext + from narwhals.dtypes import IntegerType from narwhals.schema import Schema from narwhals.typing import Into1DArray, IntoDType, TimeUnit, _2DArray @@ -201,6 +204,24 @@ def concat_str( version=self._version, ) + def int_range( + self, + start: int | PolarsExpr, + end: int | PolarsExpr, + step: int, + *, + dtype: IntegerType | type[IntegerType], + ) -> PolarsExpr: + start_ = start if isinstance(start, int) else start.native + end_ = end if isinstance(end, int) else end.native + pl_dtype: PlIntegerType = narwhals_to_native_dtype( + dtype=dtype, version=self._version + ) # type: ignore[assignment] + return self._expr( + pl.int_range(start=start_, end=end_, step=step, dtype=pl_dtype, eager=False), + version=self._version, + ) + # NOTE: Implementation is too different to annotate correctly (vs other `*SelectorNamespace`) # 1. Others have lots of private stuff for code reuse # i. None of that is useful here diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index b17141def9..0537eb8b11 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -167,7 +167,12 @@ def from_iterable( @classmethod def _int_range( - cls, start: int, end: int, step: int, dtype: IntegerType, context: _LimitedContext + cls, + start: int, + end: int, + step: int, + dtype: IntegerType | type[IntegerType], + context: _LimitedContext, ) -> Self: version = context._version dtype_pl: PlIntegerType = narwhals_to_native_dtype(dtype, version) # type: ignore[assignment] diff --git a/narwhals/functions.py b/narwhals/functions.py index 635ac29af8..d3c2d3d9b6 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1923,40 +1923,39 @@ def coalesce( @overload def int_range( - start: int | IntoExpr, - end: int | IntoExpr | None = None, + start: int | Expr, + end: int | Expr | None = None, step: int = 1, *, - dtype: type[IntegerType] | IntegerType = Int64, + dtype: IntegerType | type[IntegerType], eager: None = None, ) -> Expr: ... @overload def int_range( - start: int | IntoExpr, - end: int | IntoExpr | None = None, + start: int | Expr, + end: int | Expr | None = None, step: int = 1, *, - dtype: type[IntegerType] | IntegerType = Int64, + dtype: IntegerType | type[IntegerType], eager: ModuleType | Implementation | str, ) -> Series[Any]: ... def int_range( - start: int | IntoExpr, - end: int | IntoExpr | None = None, + start: int | Expr, + end: int | Expr | None = None, step: int = 1, *, - dtype: type[IntegerType] | IntegerType = Int64, + dtype: IntegerType | type[IntegerType] = Int64, eager: ModuleType | Implementation | str | None = None, ) -> Expr | Series[Any]: from narwhals._utils import isinstance_or_issubclass from narwhals.dtypes import IntegerType + from narwhals.exceptions import ComputeError if not isinstance_or_issubclass(dtype, IntegerType): - from narwhals.exceptions import ComputeError - msg = f"non-integer `dtype` passed to `int_range`: {dtype}" raise ComputeError(msg) @@ -1965,11 +1964,36 @@ def int_range( start = 0 if not eager: + assert start is not None # noqa: S101, help mypy + assert end is not None # noqa: S101, help mypy + + start = start if isinstance(start, Expr) else lit(start, dtype=dtype) + end = end if isinstance(end, Expr) else lit(end, dtype=dtype) + + if start._metadata.expansion_kind.is_multi_output(): + msg = "`start` must contain exactly one value, got multiple values" + raise ComputeError(msg) + + if end._metadata.expansion_kind.is_multi_output(): + msg = "`end` must contain exactly one value, got multiple values" + raise ComputeError(msg) + return Expr( - lambda plx: plx.int_range(start=start, end=end, step=step, dtype=dtype), + lambda plx: apply_n_ary_operation( + plx, + lambda *args: plx.int_range(*args, dtype=dtype), + start, + end, + step, + str_as_lit=False, + ), ExprMetadata.selector_single(), ) + return Expr( + lambda plx: plx.int_range(start=start, end=end, step=step, dtype=dtype) + ) + impl = Implementation.from_backend(eager) if is_eager_allowed(impl): assert isinstance(start, int) # noqa: S101, help mypy diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 8b56348e9c..d0eda97625 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -6,17 +6,14 @@ import narwhals as nw from narwhals import Implementation -from tests.utils import assert_equal_data from narwhals.exceptions import ComputeError +from tests.utils import Constructor, assert_equal_data if TYPE_CHECKING: from narwhals.dtypes import DType, IntegerType -EAGER_BACKENDS = ( - Implementation.PANDAS, - Implementation.PYARROW, - Implementation.POLARS, -) +EAGER_BACKENDS = (Implementation.PANDAS, Implementation.PYARROW, Implementation.POLARS) + @pytest.mark.parametrize("impl", EAGER_BACKENDS) @pytest.mark.parametrize( @@ -46,9 +43,36 @@ def test_int_range_eager( assert_equal_data({"a": series}, {"a": list(range(start, end, step))}) -@pytest.mark.parametrize("dtype", [nw.List, nw.Float64(), nw.Float32, nw.Decimal, nw.String()]) -def test_int_range_non_int_dtype(dtype: DType) -> None: +@pytest.mark.parametrize( + ("start", "end", "step", "dtype", "expected"), + [ + (0, nw.len(), 1, nw.UInt8(), [0, 1, 2]), + (0, 3, 1, nw.UInt16, [0, 1, 2]), + (-3, nw.len() - 3, 1, nw.Int16(), [-3, -2, -1]), + (nw.len(), 0, -1, nw.Int64, [3, 2, 1]), + (nw.len(), None, 1, nw.UInt32, [0, 1, 2]), + ], +) +def test_int_range_lazy( + constructor: Constructor, + start: int, + end: int | None, + step: int, + dtype: type[IntegerType] | IntegerType, + expected: list[int], +) -> None: + data = {"a": ["foo", "bar", "baz"]} + int_range = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=None) + result = nw.from_native(constructor(data)).select(int_range) + + assert_equal_data(result, {"literal": expected}) + assert result.collect_schema()["literal"] == dtype + +@pytest.mark.parametrize( + "dtype", [nw.List, nw.Float64(), nw.Float32, nw.Decimal, nw.String()] +) +def test_int_range_non_int_dtype(dtype: DType) -> None: msg = f"non-integer `dtype` passed to `int_range`: {dtype}" with pytest.raises(ComputeError, match=msg): nw.int_range(start=0, end=3, dtype=dtype, eager=None) # type: ignore[arg-type] From c775ccd0c93db19d535e0567c34d29b30cd7685a Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 21:06:20 +0200 Subject: [PATCH 03/45] fixed eager --- narwhals/_arrow/namespace.py | 23 ++++++++++++++++++++--- narwhals/_arrow/series.py | 2 +- narwhals/_compliant/series.py | 1 + narwhals/_pandas_like/namespace.py | 24 +++++++++++++++++++----- narwhals/_pandas_like/series.py | 3 ++- narwhals/_polars/series.py | 3 ++- narwhals/functions.py | 6 +----- tests/expr_and_series/int_range_test.py | 10 ++++++++-- 8 files changed, 54 insertions(+), 18 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 9bc7e50ca5..f4b5e7b46e 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -294,19 +294,36 @@ def int_range( dtype: IntegerType | type[IntegerType], ) -> ArrowExpr: def func(df: ArrowDataFrame) -> list[ArrowSeries]: - start_value = start(df)[0].item() if isinstance(start, ArrowExpr) else start + if isinstance(start, ArrowExpr): + start_eval = start(df)[0] + name = start_eval.name + start_value = start_eval.item() + else: + name = "literal" + start_value = start + end_value = end(df)[0].item() if isinstance(end, ArrowExpr) else end return [ ArrowSeries._int_range( - start=start_value, end=end_value, step=step, dtype=dtype, context=self + start=start_value, + end=end_value, + step=step, + dtype=dtype, + context=self, + name=name, ) ] + evaluate_output_names = ( + combine_evaluate_output_names(start) + if isinstance(start, ArrowExpr) + else lambda _df: ["literal"] + ) return self._expr._from_callable( func=func, depth=0, function_name="int_range", - evaluate_output_names=lambda _df: ["literal"], + evaluate_output_names=evaluate_output_names, alias_output_names=None, context=self, ) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 6eb413a8f2..d92a625f0a 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -169,10 +169,10 @@ def _int_range( step: int, dtype: IntegerType | type[IntegerType], context: _LimitedContext, + name: str, ) -> Self: version = context._version dtype_pa = narwhals_to_native_dtype(dtype, version) - name = "literal" if cls._implementation._backend_version() < (21, 0, 0): # pragma: no cover import numpy as np # ignore-banned-import diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index be8a12cfb4..13110f705e 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -130,6 +130,7 @@ def _int_range( step: int, dtype: IntegerType | type[IntegerType], context: _LimitedContext, + name: str, ) -> Self: ... def to_narwhals(self) -> Series[NativeSeriesT]: return self._version.series(self, level="full") diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 3017512ce7..9e65976e0a 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -380,21 +380,35 @@ def int_range( dtype: IntegerType | type[IntegerType], ) -> PandasLikeExpr: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - start_value = ( - start(df)[0].item() if isinstance(start, PandasLikeExpr) else start - ) + if isinstance(start, PandasLikeExpr): + start_eval = start(df)[0] + name = start_eval.name + start_value = start_eval.item() + else: + name = "literal" + start_value = start end_value = end(df)[0].item() if isinstance(end, PandasLikeExpr) else end return [ PandasLikeSeries._int_range( - start=start_value, end=end_value, step=step, dtype=dtype, context=self + start=start_value, + end=end_value, + step=step, + dtype=dtype, + context=self, + name=name, ) ] + evaluate_output_names = ( + combine_evaluate_output_names(start) + if isinstance(start, PandasLikeExpr) + else lambda _df: ["literal"] + ) return self._expr._from_callable( func=func, depth=0, function_name="int_range", - evaluate_output_names=lambda _df: ["literal"], + evaluate_output_names=evaluate_output_names, alias_output_names=None, context=self, ) diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 5a07d3721d..e0dddd9e41 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -190,11 +190,12 @@ def _int_range( step: int, dtype: IntegerType | type[IntegerType], context: _LimitedContext, + name: str, ) -> Self: impl = context._implementation array_funcs = import_array_module(impl) data = array_funcs.arange(start, end, step) - return cls.from_iterable(data, context=context, name="literal", dtype=dtype) + return cls.from_iterable(data, context=context, name=name, dtype=dtype) @staticmethod def _is_native(obj: Any) -> TypeIs[Any]: diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 0537eb8b11..3b7f066ee6 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -173,13 +173,14 @@ def _int_range( step: int, dtype: IntegerType | type[IntegerType], context: _LimitedContext, + name: str, ) -> Self: version = context._version dtype_pl: PlIntegerType = narwhals_to_native_dtype(dtype, version) # type: ignore[assignment] return cls.from_native( pl.int_range(start=start, end=end, step=step, dtype=dtype_pl, eager=True), context=context, - ) + ).alias(name) @staticmethod def _is_native(obj: pl.Series | Any) -> TypeIs[pl.Series]: diff --git a/narwhals/functions.py b/narwhals/functions.py index d3c2d3d9b6..b0c3e1ab08 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1990,17 +1990,13 @@ def int_range( ExprMetadata.selector_single(), ) - return Expr( - lambda plx: plx.int_range(start=start, end=end, step=step, dtype=dtype) - ) - impl = Implementation.from_backend(eager) if is_eager_allowed(impl): assert isinstance(start, int) # noqa: S101, help mypy assert isinstance(end, int) # noqa: S101, help mypy ns = Version.MAIN.namespace.from_backend(impl).compliant series = ns._series._int_range( - start=start, end=end, step=step, dtype=dtype, context=ns + start=start, end=end, step=step, dtype=dtype, context=ns, name="literal" ) return series.to_narwhals() diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index d0eda97625..cecc70dce7 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -54,6 +54,7 @@ def test_int_range_eager( ], ) def test_int_range_lazy( + request: pytest.FixtureRequest, constructor: Constructor, start: int, end: int | None, @@ -61,12 +62,17 @@ def test_int_range_lazy( dtype: type[IntegerType] | IntegerType, expected: list[int], ) -> None: + if any(x in str(constructor) for x in ("dask", "duckdb", "ibis", "spark")): + reason = "not implemented yet" + request.applymarker(pytest.mark.xfail(reason=reason)) + data = {"a": ["foo", "bar", "baz"]} int_range = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=None) result = nw.from_native(constructor(data)).select(int_range) - assert_equal_data(result, {"literal": expected}) - assert result.collect_schema()["literal"] == dtype + output_name = "len" if isinstance(start, nw.Expr) and end is not None else "literal" + assert_equal_data(result, {output_name: expected}) + assert result.collect_schema()[output_name] == dtype @pytest.mark.parametrize( From b3ba8106529f2d6ffadfb655fdfcf739996ab111 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 22:33:43 +0200 Subject: [PATCH 04/45] add docs, cleanse a bit --- docs/api-reference/narwhals.md | 1 + narwhals/_arrow/dataframe.py | 11 ++-- narwhals/_pandas_like/dataframe.py | 3 +- narwhals/_pandas_like/namespace.py | 2 +- narwhals/_pandas_like/series.py | 3 +- narwhals/functions.py | 72 +++++++++++++++++++++++-- narwhals/stable/v1/__init__.py | 54 ++++++++++++++++++- tests/expr_and_series/int_range_test.py | 21 +++++++- tests/v1_test.py | 12 +++++ 9 files changed, 161 insertions(+), 18 deletions(-) diff --git a/docs/api-reference/narwhals.md b/docs/api-reference/narwhals.md index c1c9fe2e64..41c456b02d 100644 --- a/docs/api-reference/narwhals.md +++ b/docs/api-reference/narwhals.md @@ -21,6 +21,7 @@ Here are the top-level functions available in Narwhals. - generate_temporary_column_name - get_level - get_native_namespace + - int_range - is_ordered_categorical - len - lit diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index becec1d051..886a4c27ba 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -472,16 +472,13 @@ def to_dict( def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self: plx = self.__narwhals_namespace__() if order_by is None: - import numpy as np # ignore-banned-import - - data = pa.array(np.arange(len(self), dtype=np.int64)) - row_index = plx._expr._from_series( - plx._series.from_iterable(data, context=self, name=name) + row_index = plx.int_range( + start=0, end=len(self), step=1, dtype=self._version.dtypes.Int64() ) else: rank = plx.col(order_by[0]).rank("ordinal", descending=False) - row_index = (rank.over(partition_by=[], order_by=order_by) - 1).alias(name) - return self.select(row_index, plx.all()) + row_index = rank.over(partition_by=[], order_by=order_by) - 1 + return self.select(row_index.alias(name), plx.all()) def filter(self, predicate: ArrowExpr | list[bool | None]) -> Self: if isinstance(predicate, list): diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index badafde27a..fbe1e96d61 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -413,8 +413,7 @@ def estimated_size(self, unit: SizeUnit) -> int | float: def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self: plx = self.__narwhals_namespace__() if order_by is None: - size = len(self) - data = self._array_funcs.arange(size) + data = self._array_funcs.arange(len(self)) row_index = plx._expr._from_series( plx._series.from_iterable( diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 9e65976e0a..3d7652807c 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -384,7 +384,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: start_eval = start(df)[0] name = start_eval.name start_value = start_eval.item() - else: + else: # pragma: no cover name = "literal" start_value = start end_value = end(df)[0].item() if isinstance(end, PandasLikeExpr) else end diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index e0dddd9e41..39599a096d 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -192,8 +192,7 @@ def _int_range( context: _LimitedContext, name: str, ) -> Self: - impl = context._implementation - array_funcs = import_array_module(impl) + array_funcs = import_array_module(context._implementation) data = array_funcs.arange(start, end, step) return cls.from_iterable(data, context=context, name=name, dtype=dtype) diff --git a/narwhals/functions.py b/narwhals/functions.py index b0c3e1ab08..7df4552c93 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1928,7 +1928,7 @@ def int_range( step: int = 1, *, dtype: IntegerType | type[IntegerType], - eager: None = None, + eager: Literal[False] | None = False, ) -> Expr: ... @@ -1949,7 +1949,71 @@ def int_range( step: int = 1, *, dtype: IntegerType | type[IntegerType] = Int64, - eager: ModuleType | Implementation | str | None = None, + eager: ModuleType | Implementation | str | Literal[False] | None = False, +) -> Expr | Series[Any]: + """Generate a range of integers. + + Arguments: + start: Start of the range (inclusive). Defaults to 0. + end: End of the range (exclusive). If set to `None` (default), + the value of `start` is used and `start` is set to `0`. + step: Step size of the range. + dtype: Data type of the range (must be an integer data type). + eager: If set to `False` (default) or `None`, then an expression is returned. + If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then + a `Series` is returned. + + Returns: + Expr or Series: Column of integer data type `dtype`. + + Examples: + >>> import narwhals as nw + >>> nw.int_range(0, 5, step=2, eager="pandas") + ┌───────────────────────────┐ + | Narwhals Series | + |---------------------------| + |0 0 | + |1 2 | + |2 4 | + |Name: literal, dtype: int64| + └───────────────────────────┘ + + `end` can be omitted for a shorter syntax. + + >>> nw.int_range(5, step=2, eager="pandas") + ┌───────────────────────────┐ + | Narwhals Series | + |---------------------------| + |0 0 | + |1 2 | + |2 4 | + |Name: literal, dtype: int64| + └───────────────────────────┘ + + Generate an index column by using `int_range` in conjunction with :func:`len`. + + >>> import pandas as pd + >>> df = nw.from_native(pd.DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})) + >>> df.select(nw.int_range(nw.len(), dtype=nw.UInt32).alias("index"), nw.all()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | index a b | + | 0 0 1 2 | + | 1 1 3 4 | + | 2 2 5 6 | + └──────────────────┘ + """ + return _int_range_impl(start, end, step, dtype=dtype, eager=eager) + + +def _int_range_impl( + start: int | Expr, + end: int | Expr | None, + step: int, + *, + dtype: IntegerType | type[IntegerType], + eager: ModuleType | Implementation | str | Literal[False] | None, ) -> Expr | Series[Any]: from narwhals._utils import isinstance_or_issubclass from narwhals.dtypes import IntegerType @@ -1971,11 +2035,11 @@ def int_range( end = end if isinstance(end, Expr) else lit(end, dtype=dtype) if start._metadata.expansion_kind.is_multi_output(): - msg = "`start` must contain exactly one value, got multiple values" + msg = "`start` must contain exactly one value, got expression returning multiple values" raise ComputeError(msg) if end._metadata.expansion_kind.is_multi_output(): - msg = "`end` must contain exactly one value, got multiple values" + msg = "`end` must contain exactly one value, got expression returning multiple values" raise ComputeError(msg) return Expr( diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 22d2cf796d..fd0352ff77 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -26,7 +26,7 @@ from narwhals.dependencies import get_polars from narwhals.exceptions import InvalidIntoExprError from narwhals.expr import Expr as NwExpr -from narwhals.functions import _new_series_impl, concat, show_versions +from narwhals.functions import _int_range_impl, _new_series_impl, concat, show_versions from narwhals.schema import Schema as NwSchema from narwhals.series import Series as NwSeries from narwhals.stable.v1 import dependencies, dtypes, selectors @@ -48,6 +48,7 @@ Int32, Int64, Int128, + IntegerType, List, Object, String, @@ -1889,6 +1890,56 @@ def scan_parquet( return _stableify(nw_f.scan_parquet(source, backend=backend, **kwargs)) +@overload +def int_range( + start: int | Expr, + end: int | Expr | None = None, + step: int = 1, + *, + dtype: IntegerType | type[IntegerType], + eager: Literal[False] | None = False, +) -> Expr: ... + + +@overload +def int_range( + start: int | Expr, + end: int | Expr | None = None, + step: int = 1, + *, + dtype: IntegerType | type[IntegerType], + eager: ModuleType | Implementation | str, +) -> Series[Any]: ... + + +def int_range( + start: int | Expr, + end: int | Expr | None = None, + step: int = 1, + *, + dtype: IntegerType | type[IntegerType] = Int64, + eager: ModuleType | Implementation | str | Literal[False] | None = False, +) -> Expr | Series[Any]: + """Generate a range of integers. + + Arguments: + start: Start of the range (inclusive). Defaults to 0. + end: End of the range (exclusive). If set to `None` (default), + the value of `start` is used and `start` is set to `0`. + step: Step size of the range. + dtype: Data type of the range (must be an integer data type). + eager: If set to `False` (default) or `None`, then an expression is returned. + If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then + a `Series` is returned. + + Returns: + Expr or Series: Column of integer data type `dtype`. + """ + return _stableify( + _int_range_impl(start=start, end=end, step=step, dtype=dtype, eager=eager) + ) + + __all__ = [ "Array", "Binary", @@ -1942,6 +1993,7 @@ def scan_parquet( "generate_temporary_column_name", "get_level", "get_native_namespace", + "int_range", "is_ordered_categorical", "len", "lit", diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index cecc70dce7..7e43540fc9 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -81,4 +81,23 @@ def test_int_range_lazy( def test_int_range_non_int_dtype(dtype: DType) -> None: msg = f"non-integer `dtype` passed to `int_range`: {dtype}" with pytest.raises(ComputeError, match=msg): - nw.int_range(start=0, end=3, dtype=dtype, eager=None) # type: ignore[arg-type] + nw.int_range(start=0, end=3, dtype=dtype) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + ("start", "end"), + [ + (nw.col("foo", "bar").sum(), nw.col("foo", "bar").sum()), + (1, nw.col("foo", "bar").sum()), + ], +) +def test_int_range_multi_named(start: int | nw.Expr, end: int | nw.Expr) -> None: + prefix = "`start`" if isinstance(start, nw.Expr) else "`end`" + msg = f"{prefix} must contain exactly one value, got expression returning multiple values" + with pytest.raises(ComputeError, match=msg): + nw.int_range(start=start, end=end) # type: ignore[arg-type] + + +def test_int_range_eager_set_to_lazy_backend() -> None: + with pytest.raises(ValueError, match="Cannot create a Series from a lazy backend"): + nw.int_range(start=123, eager=Implementation.DUCKDB) diff --git a/tests/v1_test.py b/tests/v1_test.py index a420508b9b..72805e8b08 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -1037,3 +1037,15 @@ def test_dataframe_from_arrow(eager_backend: EagerAllowed) -> None: assert isinstance(result.to_native(), pa.Table) else: assert not isinstance(result.to_native(), pa.Table) + + +def test_int_range() -> None: + pytest.importorskip("pandas") + + def minimal_function(data: nw_v1.Series[Any]) -> None: + data.is_null() + + col = nw_v1.int_range(0, 3, eager="pandas") + # check this doesn't raise type-checking errors + minimal_function(col) + assert isinstance(col, nw_v1.Series) From 9ee6209e2619e47fa375471d11584e1422d72b88 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 23:02:22 +0200 Subject: [PATCH 05/45] fix or ignore typing issues --- narwhals/_arrow/series.py | 2 +- narwhals/_compliant/namespace.py | 3 +++ narwhals/functions.py | 4 ++-- narwhals/stable/v1/__init__.py | 4 ++-- tests/expr_and_series/int_range_test.py | 4 ++-- tests/v1_test.py | 2 +- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index d92a625f0a..f9de6dd76a 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -178,7 +178,7 @@ def _int_range( data = np.arange(start=start, stop=end, step=step) else: - data = pc.cast( + data = pc.cast( # type: ignore[assignment] pa.arange(start=start, stop=end, step=step), # type: ignore[attr-defined] dtype_pa, ) diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index f6ab8d96fe..e5fed7d5d9 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -19,6 +19,7 @@ from narwhals._utils import ( exclude_column_names, get_column_names, + not_implemented, passthrough_column_names, ) from narwhals.dependencies import is_numpy_array_2d @@ -147,6 +148,8 @@ def from_native(self, data: NativeFrameT_co | Any, /) -> CompliantLazyFrameT: msg = f"Unsupported type: {type(data).__name__!r}" raise TypeError(msg) + int_range: not_implemented = not_implemented() + class EagerNamespace( DepthTrackingNamespace[EagerDataFrameT, EagerExprT], diff --git a/narwhals/functions.py b/narwhals/functions.py index 7df4552c93..6b35ab6bd5 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1928,7 +1928,7 @@ def int_range( step: int = 1, *, dtype: IntegerType | type[IntegerType], - eager: Literal[False] | None = False, + eager: Literal[False] = False, ) -> Expr: ... @@ -1949,7 +1949,7 @@ def int_range( step: int = 1, *, dtype: IntegerType | type[IntegerType] = Int64, - eager: ModuleType | Implementation | str | Literal[False] | None = False, + eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index fd0352ff77..79862bcae0 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1897,7 +1897,7 @@ def int_range( step: int = 1, *, dtype: IntegerType | type[IntegerType], - eager: Literal[False] | None = False, + eager: Literal[False] = False, ) -> Expr: ... @@ -1918,7 +1918,7 @@ def int_range( step: int = 1, *, dtype: IntegerType | type[IntegerType] = Int64, - eager: ModuleType | Implementation | str | Literal[False] | None = False, + eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 7e43540fc9..5a39efea28 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -67,7 +67,7 @@ def test_int_range_lazy( request.applymarker(pytest.mark.xfail(reason=reason)) data = {"a": ["foo", "bar", "baz"]} - int_range = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=None) + int_range = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=False) result = nw.from_native(constructor(data)).select(int_range) output_name = "len" if isinstance(start, nw.Expr) and end is not None else "literal" @@ -100,4 +100,4 @@ def test_int_range_multi_named(start: int | nw.Expr, end: int | nw.Expr) -> None def test_int_range_eager_set_to_lazy_backend() -> None: with pytest.raises(ValueError, match="Cannot create a Series from a lazy backend"): - nw.int_range(start=123, eager=Implementation.DUCKDB) + nw.int_range(123, eager=Implementation.DUCKDB) # type: ignore[call-overload] diff --git a/tests/v1_test.py b/tests/v1_test.py index 72805e8b08..d36ae32035 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -1045,7 +1045,7 @@ def test_int_range() -> None: def minimal_function(data: nw_v1.Series[Any]) -> None: data.is_null() - col = nw_v1.int_range(0, 3, eager="pandas") + col = nw_v1.int_range(0, 3, eager="pandas") # type: ignore[call-overload] # check this doesn't raise type-checking errors minimal_function(col) assert isinstance(col, nw_v1.Series) From a3496eda2bc264f126a57f716f037680c2a38c22 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 23:03:21 +0200 Subject: [PATCH 06/45] skip if impl not installed --- tests/expr_and_series/int_range_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 5a39efea28..82dbec0515 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -34,6 +34,7 @@ def test_int_range_eager( dtype: type[IntegerType] | IntegerType, impl: nw.Implementation, ) -> None: + pytest.importorskip(impl.value) series = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=impl) assert series.dtype == dtype From dc263c8be6a1e4d2149a0b1b93f8d03d388445d9 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 26 Jul 2025 23:30:33 +0200 Subject: [PATCH 07/45] overloads? --- narwhals/functions.py | 46 ++++++++++++++++++++++++- narwhals/stable/v1/__init__.py | 21 ++++++++--- tests/expr_and_series/int_range_test.py | 6 ++-- 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 6b35ab6bd5..bb5b3381f7 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1943,6 +1943,17 @@ def int_range( ) -> Series[Any]: ... +@overload +def int_range( + start: int | Expr, + end: int | Expr | None = None, + step: int = 1, + *, + dtype: IntegerType | type[IntegerType] = Int64, + eager: ModuleType | Implementation | str | Literal[False] = False, +) -> Expr | Series[Any]: ... + + def int_range( start: int | Expr, end: int | Expr | None = None, @@ -2007,13 +2018,46 @@ def int_range( return _int_range_impl(start, end, step, dtype=dtype, eager=eager) +@overload +def _int_range_impl( + start: int | Expr, + end: int | Expr | None, + step: int, + *, + dtype: IntegerType | type[IntegerType], + eager: Literal[False], +) -> Expr: ... + + +@overload +def _int_range_impl( + start: int | Expr, + end: int | Expr | None, + step: int, + *, + dtype: IntegerType | type[IntegerType], + eager: ModuleType | Implementation | str, +) -> Series[Any]: ... + + +@overload +def _int_range_impl( + start: int | Expr, + end: int | Expr | None, + step: int, + *, + dtype: IntegerType | type[IntegerType], + eager: ModuleType | Implementation | str | Literal[False], +) -> Expr | Series[Any]: ... + + def _int_range_impl( start: int | Expr, end: int | Expr | None, step: int, *, dtype: IntegerType | type[IntegerType], - eager: ModuleType | Implementation | str | Literal[False] | None, + eager: ModuleType | Implementation | str | Literal[False], ) -> Expr | Series[Any]: from narwhals._utils import isinstance_or_issubclass from narwhals.dtypes import IntegerType diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 79862bcae0..7e6be58c47 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1893,25 +1893,36 @@ def scan_parquet( @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None, + step: int, *, dtype: IntegerType | type[IntegerType], - eager: Literal[False] = False, + eager: Literal[False], ) -> Expr: ... @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None, + step: int, *, dtype: IntegerType | type[IntegerType], eager: ModuleType | Implementation | str, ) -> Series[Any]: ... +@overload +def int_range( + start: int | Expr, + end: int | Expr | None, + step: int, + *, + dtype: IntegerType | type[IntegerType] = Int64, + eager: ModuleType | Implementation | str | Literal[False], +) -> Expr | Series[Any]: ... + + def int_range( start: int | Expr, end: int | Expr | None = None, diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 82dbec0515..44c85cb3f6 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -92,13 +92,13 @@ def test_int_range_non_int_dtype(dtype: DType) -> None: (1, nw.col("foo", "bar").sum()), ], ) -def test_int_range_multi_named(start: int | nw.Expr, end: int | nw.Expr) -> None: +def test_int_range_multi_named(start: int | nw.Expr, end: int | nw.Expr | None) -> None: prefix = "`start`" if isinstance(start, nw.Expr) else "`end`" msg = f"{prefix} must contain exactly one value, got expression returning multiple values" with pytest.raises(ComputeError, match=msg): - nw.int_range(start=start, end=end) # type: ignore[arg-type] + nw.int_range(start=start, end=end) def test_int_range_eager_set_to_lazy_backend() -> None: with pytest.raises(ValueError, match="Cannot create a Series from a lazy backend"): - nw.int_range(123, eager=Implementation.DUCKDB) # type: ignore[call-overload] + nw.int_range(123, eager=Implementation.DUCKDB) From f612646fff3ea4f82d241e25e58f526512eca30a Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sun, 27 Jul 2025 11:07:53 +0200 Subject: [PATCH 08/45] fix overloads --- narwhals/stable/v1/__init__.py | 18 +++++++++--------- tests/expr_and_series/int_range_test.py | 2 +- tests/v1_test.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 7e6be58c47..0415816c7d 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1893,21 +1893,21 @@ def scan_parquet( @overload def int_range( start: int | Expr, - end: int | Expr | None, - step: int, + end: int | Expr | None = None, + step: int = 1, *, - dtype: IntegerType | type[IntegerType], - eager: Literal[False], + dtype: IntegerType | type[IntegerType] = Int64, + eager: Literal[False] = False, ) -> Expr: ... @overload def int_range( start: int | Expr, - end: int | Expr | None, - step: int, + end: int | Expr | None = None, + step: int = 1, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerType | type[IntegerType] = Int64, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1915,8 +1915,8 @@ def int_range( @overload def int_range( start: int | Expr, - end: int | Expr | None, - step: int, + end: int | Expr | None = None, + step: int = 1, *, dtype: IntegerType | type[IntegerType] = Int64, eager: ModuleType | Implementation | str | Literal[False], diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 44c85cb3f6..9a3af7f759 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -82,7 +82,7 @@ def test_int_range_lazy( def test_int_range_non_int_dtype(dtype: DType) -> None: msg = f"non-integer `dtype` passed to `int_range`: {dtype}" with pytest.raises(ComputeError, match=msg): - nw.int_range(start=0, end=3, dtype=dtype) # type: ignore[arg-type] + nw.int_range(start=0, end=3, dtype=dtype) # type: ignore[call-overload] # pyright: ignore[reportArgumentType] @pytest.mark.parametrize( diff --git a/tests/v1_test.py b/tests/v1_test.py index d36ae32035..72805e8b08 100644 --- a/tests/v1_test.py +++ b/tests/v1_test.py @@ -1045,7 +1045,7 @@ def test_int_range() -> None: def minimal_function(data: nw_v1.Series[Any]) -> None: data.is_null() - col = nw_v1.int_range(0, 3, eager="pandas") # type: ignore[call-overload] + col = nw_v1.int_range(0, 3, eager="pandas") # check this doesn't raise type-checking errors minimal_function(col) assert isinstance(col, nw_v1.Series) From b8b6ae20830a5a83ed25371640d454a26ad3dd75 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 28 Jul 2025 16:20:40 +0200 Subject: [PATCH 09/45] add in v2.__all__ --- narwhals/stable/v2/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 432544d2cf..0564b5ce32 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1286,6 +1286,7 @@ def int_range( "from_numpy", "generate_temporary_column_name", "get_native_namespace", + "int_range", "is_ordered_categorical", "len", "lit", From 22c52eb8495ddb964c6acfcf37203a4dafdc8007 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 28 Jul 2025 16:53:48 +0200 Subject: [PATCH 10/45] factor out _native_int_range into utils --- narwhals/_arrow/dataframe.py | 13 ++++++------- narwhals/_arrow/series.py | 37 ++++++++++++++++++------------------ narwhals/_arrow/utils.py | 19 ++++++++++++++++++ 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 886a4c27ba..d7d1ad5de9 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -8,7 +8,7 @@ import pyarrow.compute as pc from narwhals._arrow.series import ArrowSeries -from narwhals._arrow.utils import native_to_narwhals_dtype +from narwhals._arrow.utils import _native_int_range, native_to_narwhals_dtype from narwhals._compliant import EagerDataFrame from narwhals._expression_parsing import ExprKind from narwhals._utils import ( @@ -643,10 +643,8 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: return None def is_unique(self) -> ArrowSeries: - import numpy as np # ignore-banned-import - col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) - row_index = pa.array(np.arange(len(self))) + row_index = _native_int_range(0, len(self), backend_version=self._backend_version) keep_idx = ( self.native.append_column(col_token, row_index) .group_by(self.columns) @@ -669,8 +667,6 @@ def unique( ) -> Self: # The param `maintain_order` is only here for compatibility with the Polars API # and has no effect on the output. - import numpy as np # ignore-banned-import - if subset and (error := self._check_columns_exist(subset)): raise error subset = list(subset or self.columns) @@ -680,8 +676,11 @@ def unique( agg_func = ArrowGroupBy._REMAP_UNIQUE[keep] col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) + col_value = _native_int_range( + 0, len(self), backend_version=self._backend_version + ) keep_idx_native = ( - self.native.append_column(col_token, pa.array(np.arange(len(self)))) + self.native.append_column(col_token, col_value) .group_by(subset) .aggregate([(col_token, agg_func)]) .column(f"{col_token}_{agg_func}") diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index f9de6dd76a..d5c372181d 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -11,6 +11,7 @@ from narwhals._arrow.series_str import ArrowSeriesStringNamespace from narwhals._arrow.series_struct import ArrowSeriesStructNamespace from narwhals._arrow.utils import ( + _native_int_range, cast_for_truediv, chunked_array, extract_native, @@ -171,17 +172,15 @@ def _int_range( context: _LimitedContext, name: str, ) -> Self: - version = context._version - dtype_pa = narwhals_to_native_dtype(dtype, version) - if cls._implementation._backend_version() < (21, 0, 0): # pragma: no cover - import numpy as np # ignore-banned-import - - data = np.arange(start=start, stop=end, step=step) - else: - data = pc.cast( # type: ignore[assignment] - pa.arange(start=start, stop=end, step=step), # type: ignore[attr-defined] - dtype_pa, - ) + dtype_pa = narwhals_to_native_dtype(dtype, context._version) + backend_version = cls._implementation._backend_version() + data = _native_int_range( + start=start, + end=end, + step=step, + dtype=dtype_pa, + backend_version=backend_version, + ) return cls.from_native( chunked_array([data], dtype_pa), name=name, context=context ) @@ -688,7 +687,9 @@ def fill_aux( # then it calculates the distance of each new index and the original index # if the distance is equal to or less than the limit and the original value is null, it is replaced valid_mask = pc.is_valid(arr) - indices = pa.array(np.arange(len(arr)), type=pa.int64()) + indices = _native_int_range( + 0, len(arr), backend_version=self._backend_version + ) if direction == "forward": valid_index = np.maximum.accumulate(np.where(valid_mask, indices, -1)) distance = indices - valid_index @@ -735,9 +736,9 @@ def is_unique(self) -> ArrowSeries: return self.to_frame().is_unique().alias(self.name) def is_first_distinct(self) -> Self: - import numpy as np # ignore-banned-import - - row_number = pa.array(np.arange(len(self))) + row_number = _native_int_range( + 0, len(self), backend_version=self._backend_version + ) col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) first_distinct_index = ( pa.Table.from_arrays([self.native], names=[self.name]) @@ -750,9 +751,9 @@ def is_first_distinct(self) -> Self: return self._with_native(pc.is_in(row_number, first_distinct_index)) def is_last_distinct(self) -> Self: - import numpy as np # ignore-banned-import - - row_number = pa.array(np.arange(len(self))) + row_number = _native_int_range( + 0, len(self), backend_version=self._backend_version + ) col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) last_distinct_index = ( pa.Table.from_arrays([self.native], names=[self.name]) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 65dca4c2f4..47f80a7291 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -440,4 +440,23 @@ def cast_to_comparable_string_types( return (ca.cast(dtype) for ca in chunked_arrays), lit(separator, dtype) +def _native_int_range( + start: int, + end: int, + step: int = 1, + *, + dtype: pa.DataType | None = None, + backend_version: tuple[int, ...], +) -> ArrayAny: + dtype = dtype if dtype is not None else pa.int64() + if backend_version < (21, 0, 0): # pragma: no cover + import numpy as np # ignore-banned-import + + return pa.array(np.arange(start=start, stop=end, step=step), type=dtype) + return pc.cast( # type: ignore[assignment] + pa.arange(start=start, stop=end, step=step), # type: ignore[attr-defined] + dtype, + ) + + class ArrowSeriesNamespace(EagerSeriesNamespace["ArrowSeries", "ChunkedArrayAny"]): ... From 8f4f647ca9e808a4958d08a3039b01ca9c800d6c Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 28 Jul 2025 17:07:57 +0200 Subject: [PATCH 11/45] resolve majority of typing and import issues --- narwhals/_arrow/namespace.py | 5 ++- narwhals/_arrow/utils.py | 2 +- narwhals/_pandas_like/namespace.py | 5 ++- narwhals/_polars/namespace.py | 5 ++- narwhals/functions.py | 50 +++++------------------------- narwhals/typing.py | 3 +- 6 files changed, 17 insertions(+), 53 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index f4b5e7b46e..0d1ef847e6 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -26,8 +26,7 @@ from narwhals._arrow.typing import ArrayOrScalar, ChunkedArrayAny, Incomplete from narwhals._compliant.typing import ScalarKwargs from narwhals._utils import Version - from narwhals.dtypes import IntegerType - from narwhals.typing import IntoDType, NonNestedLiteral + from narwhals.typing import IntegerDType, IntoDType, NonNestedLiteral class ArrowNamespace( @@ -291,7 +290,7 @@ def int_range( end: int | ArrowExpr, step: int, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, ) -> ArrowExpr: def func(df: ArrowDataFrame) -> list[ArrowSeries]: if isinstance(start, ArrowExpr): diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 47f80a7291..88373c81c2 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -453,7 +453,7 @@ def _native_int_range( import numpy as np # ignore-banned-import return pa.array(np.arange(start=start, stop=end, step=step), type=dtype) - return pc.cast( # type: ignore[assignment] + return pc.cast( # type: ignore[return-value] pa.arange(start=start, stop=end, step=step), # type: ignore[attr-defined] dtype, ) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 3d7652807c..ec93d0154e 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -25,8 +25,7 @@ from narwhals._compliant.typing import ScalarKwargs from narwhals._utils import Implementation, Version - from narwhals.dtypes import IntegerType - from narwhals.typing import IntoDType, NonNestedLiteral + from narwhals.typing import IntegerDType, IntoDType, NonNestedLiteral Incomplete: TypeAlias = Any @@ -377,7 +376,7 @@ def int_range( end: int | PandasLikeExpr, step: int, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, ) -> PandasLikeExpr: def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: if isinstance(start, PandasLikeExpr): diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 11d708c1f2..f24c9e47be 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -22,9 +22,8 @@ from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame from narwhals._polars.typing import FrameT from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import IntegerType from narwhals.schema import Schema - from narwhals.typing import Into1DArray, IntoDType, TimeUnit, _2DArray + from narwhals.typing import IntegerDType, Into1DArray, IntoDType, TimeUnit, _2DArray class PolarsNamespace: @@ -210,7 +209,7 @@ def int_range( end: int | PolarsExpr, step: int, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, ) -> PolarsExpr: start_ = start if isinstance(start, int) else start.native end_ = end if isinstance(end, int) else end.native diff --git a/narwhals/functions.py b/narwhals/functions.py index 633129f110..0dfa2a42ea 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -45,11 +45,12 @@ from narwhals._compliant import CompliantExpr, CompliantNamespace from narwhals._translate import IntoArrowTable from narwhals.dataframe import DataFrame, LazyFrame - from narwhals.dtypes import DType, IntegerType + from narwhals.dtypes import DType from narwhals.schema import Schema from narwhals.typing import ( ConcatMethod, FrameT, + IntegerDType, IntoDType, IntoExpr, NativeFrame, @@ -1784,7 +1785,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, eager: Literal[False] = False, ) -> Expr: ... @@ -1795,7 +1796,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1806,7 +1807,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: ... @@ -1816,7 +1817,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. @@ -1875,52 +1876,17 @@ def int_range( return _int_range_impl(start, end, step, dtype=dtype, eager=eager) -@overload -def _int_range_impl( - start: int | Expr, - end: int | Expr | None, - step: int, - *, - dtype: IntegerType | type[IntegerType], - eager: Literal[False], -) -> Expr: ... - - -@overload -def _int_range_impl( - start: int | Expr, - end: int | Expr | None, - step: int, - *, - dtype: IntegerType | type[IntegerType], - eager: ModuleType | Implementation | str, -) -> Series[Any]: ... - - -@overload -def _int_range_impl( - start: int | Expr, - end: int | Expr | None, - step: int, - *, - dtype: IntegerType | type[IntegerType], - eager: ModuleType | Implementation | str | Literal[False], -) -> Expr | Series[Any]: ... - - def _int_range_impl( start: int | Expr, end: int | Expr | None, step: int, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, eager: ModuleType | Implementation | str | Literal[False], ) -> Expr | Series[Any]: - from narwhals._utils import isinstance_or_issubclass - from narwhals.dtypes import IntegerType from narwhals.exceptions import ComputeError - if not isinstance_or_issubclass(dtype, IntegerType): + if not dtype.is_integer(): msg = f"non-integer `dtype` passed to `int_range`: {dtype}" raise ComputeError(msg) diff --git a/narwhals/typing.py b/narwhals/typing.py index 4ea3a5dbba..0fdcca4287 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -361,7 +361,8 @@ def Binary(self) -> type[dtypes.Binary]: ... NonNestedDType: TypeAlias = "dtypes.NumericType | dtypes.TemporalType | dtypes.String | dtypes.Boolean | dtypes.Binary | dtypes.Categorical | dtypes.Unknown | dtypes.Object" """Any Narwhals DType that does not have required arguments.""" - +IntegerDType: TypeAlias = "dtypes.IntegerType | type[dtypes.IntegerType]" +"""Instance or class of IntegerType""" IntoDType: TypeAlias = "dtypes.DType | type[NonNestedDType]" """Anything that can be converted into a Narwhals DType. From 7fb18bb9fd1ebc14aa10cfd41a1ddb499f6bce65 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 28 Jul 2025 19:57:46 +0200 Subject: [PATCH 12/45] replace all type hints with IntegerDType, ignore import in functions --- .pre-commit-config.yaml | 3 ++- narwhals/_arrow/series.py | 5 +++-- narwhals/_compliant/namespace.py | 5 +++-- narwhals/_compliant/series.py | 5 +++-- narwhals/_pandas_like/series.py | 5 +++-- narwhals/_polars/series.py | 12 +++++++++--- narwhals/functions.py | 4 ++-- narwhals/stable/v1/__init__.py | 10 ++++++---- narwhals/stable/v2/__init__.py | 10 +++++----- 9 files changed, 36 insertions(+), 23 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08855a538c..6e75e1ab49 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -79,7 +79,8 @@ repos: narwhals/_utils\.py| narwhals/stable/v./_?dtypes.py| narwhals/.*__init__.py| - narwhals/.*typing\.py + narwhals/.*typing\.py| + narwhals/functions.py ) - id: pull-request-target name: don't use `pull_request_target` diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index d5c372181d..3e495c61e3 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -60,10 +60,11 @@ ) from narwhals._compliant.series import HistData from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import DType, IntegerType + from narwhals.dtypes import DType from narwhals.typing import ( ClosedInterval, FillNullStrategy, + IntegerDType, Into1DArray, IntoDType, NonNestedLiteral, @@ -168,7 +169,7 @@ def _int_range( start: int, end: int, step: int, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, context: _LimitedContext, name: str, ) -> Self: diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index e5fed7d5d9..8032ea9464 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -32,10 +32,11 @@ from narwhals._compliant.selectors import CompliantSelectorNamespace from narwhals._compliant.when_then import CompliantWhen, EagerWhen from narwhals._utils import Implementation, Version - from narwhals.dtypes import DType, IntegerType + from narwhals.dtypes import DType from narwhals.schema import Schema from narwhals.typing import ( ConcatMethod, + IntegerDType, Into1DArray, IntoDType, NonNestedLiteral, @@ -99,7 +100,7 @@ def int_range( end: int | CompliantExprT, step: int, *, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, ) -> CompliantExprT: ... @property def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 13110f705e..96d0b048b0 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -39,11 +39,12 @@ from narwhals._compliant.expr import CompliantExpr, EagerExpr from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace from narwhals._utils import Implementation, Version, _LimitedContext - from narwhals.dtypes import DType, IntegerType + from narwhals.dtypes import DType from narwhals.series import Series from narwhals.typing import ( ClosedInterval, FillNullStrategy, + IntegerDType, Into1DArray, IntoDType, MultiIndexSelector, @@ -128,7 +129,7 @@ def _int_range( start: int, end: int, step: int, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, context: _LimitedContext, name: str, ) -> Self: ... diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 39599a096d..2253df1ead 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -41,10 +41,11 @@ from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.namespace import PandasLikeNamespace from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import DType, IntegerType + from narwhals.dtypes import DType from narwhals.typing import ( ClosedInterval, FillNullStrategy, + IntegerDType, Into1DArray, IntoDType, NonNestedLiteral, @@ -188,7 +189,7 @@ def _int_range( start: int, end: int, step: int, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, context: _LimitedContext, name: str, ) -> Self: diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 3b7f066ee6..520572bc8b 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -29,9 +29,15 @@ from narwhals._polars.expr import PolarsExpr from narwhals._polars.namespace import PolarsNamespace from narwhals._utils import Version, _LimitedContext - from narwhals.dtypes import DType, IntegerType + from narwhals.dtypes import DType from narwhals.series import Series - from narwhals.typing import Into1DArray, IntoDType, MultiIndexSelector, _1DArray + from narwhals.typing import ( + IntegerDType, + Into1DArray, + IntoDType, + MultiIndexSelector, + _1DArray, + ) T = TypeVar("T") IncludeBreakpoint: TypeAlias = Literal[False, True] @@ -171,7 +177,7 @@ def _int_range( start: int, end: int, step: int, - dtype: IntegerType | type[IntegerType], + dtype: IntegerDType, context: _LimitedContext, name: str, ) -> Self: diff --git a/narwhals/functions.py b/narwhals/functions.py index 0dfa2a42ea..d72cadeaf0 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1785,7 +1785,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType, + dtype: IntegerDType = Int64, eager: Literal[False] = False, ) -> Expr: ... @@ -1796,7 +1796,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index b94e99585c..c6fb2ce907 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -74,6 +74,7 @@ from narwhals.dataframe import MultiColSelector, MultiIndexSelector from narwhals.dtypes import DType from narwhals.typing import ( + IntegerDType, IntoDType, IntoExpr, IntoFrame, @@ -1313,7 +1314,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: Literal[False] = False, ) -> Expr: ... @@ -1324,7 +1325,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1335,7 +1336,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False], ) -> Expr | Series[Any]: ... @@ -1345,7 +1346,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. @@ -1389,6 +1390,7 @@ def int_range( "Int32", "Int64", "Int128", + "IntegerType", "LazyFrame", "List", "Object", diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 0564b5ce32..8102d45daf 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -38,7 +38,6 @@ Int32, Int64, Int128, - IntegerType, List, Object, String, @@ -69,6 +68,7 @@ from narwhals.dataframe import MultiColSelector, MultiIndexSelector from narwhals.dtypes import DType from narwhals.typing import ( + IntegerDType, IntoDType, IntoExpr, IntoFrame, @@ -1178,7 +1178,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: Literal[False] = False, ) -> Expr: ... @@ -1189,7 +1189,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1200,7 +1200,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False], ) -> Expr | Series[Any]: ... @@ -1210,7 +1210,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerType | type[IntegerType] = Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. From dde4a990b1f1708d6b3f24c846204592a4a1be14 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 28 Jul 2025 20:44:20 +0200 Subject: [PATCH 13/45] Dan's suggestion --- .pre-commit-config.yaml | 3 +-- narwhals/functions.py | 12 +++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6e75e1ab49..08855a538c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -79,8 +79,7 @@ repos: narwhals/_utils\.py| narwhals/stable/v./_?dtypes.py| narwhals/.*__init__.py| - narwhals/.*typing\.py| - narwhals/functions.py + narwhals/.*typing\.py ) - id: pull-request-target name: don't use `pull_request_target` diff --git a/narwhals/functions.py b/narwhals/functions.py index d72cadeaf0..808da1b8c7 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -31,7 +31,6 @@ is_numpy_array_2d, is_pyarrow_table, ) -from narwhals.dtypes import Int64 from narwhals.exceptions import InvalidOperationError from narwhals.expr import Expr from narwhals.series import Series @@ -64,6 +63,9 @@ _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None" +dtypes = Version.MAIN.dtypes + + def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT: """Concatenate multiple DataFrames, LazyFrames into a single entity. @@ -1785,7 +1787,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = dtypes.Int64, eager: Literal[False] = False, ) -> Expr: ... @@ -1796,7 +1798,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = dtypes.Int64, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1807,7 +1809,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = dtypes.Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: ... @@ -1817,7 +1819,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = dtypes.Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. From 876a771065c5e0603dafe21f72fc44362ff1ac22 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 18:58:08 +0000 Subject: [PATCH 14/45] refactor: Remove unused `IntegerType` Haning around after https://github.com/narwhals-dev/narwhals/pull/2895#issuecomment-3127556227 --- narwhals/stable/v1/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index c6fb2ce907..ff4dafb930 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -48,7 +48,6 @@ Int32, Int64, Int128, - IntegerType, List, Object, String, @@ -1390,7 +1389,6 @@ def int_range( "Int32", "Int64", "Int128", - "IntegerType", "LazyFrame", "List", "Object", From 5eeda2d5ab624d4129aa09190dd16ddf73cf0841 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:01:01 +0000 Subject: [PATCH 15/45] refactor: Reuse a single `Implementation.PYARROW._backend_version()` Added the same for `_polars.utils` in #2764 --- narwhals/_arrow/dataframe.py | 6 ++---- narwhals/_arrow/series.py | 21 ++++----------------- narwhals/_arrow/utils.py | 14 ++++++-------- 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index d7d1ad5de9..cfc85f5e6a 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -644,7 +644,7 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: def is_unique(self) -> ArrowSeries: col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) - row_index = _native_int_range(0, len(self), backend_version=self._backend_version) + row_index = _native_int_range(0, len(self)) keep_idx = ( self.native.append_column(col_token, row_index) .group_by(self.columns) @@ -676,9 +676,7 @@ def unique( agg_func = ArrowGroupBy._REMAP_UNIQUE[keep] col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) - col_value = _native_int_range( - 0, len(self), backend_version=self._backend_version - ) + col_value = _native_int_range(0, len(self)) keep_idx_native = ( self.native.append_column(col_token, col_value) .group_by(subset) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 3e495c61e3..c520353a5b 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -174,14 +174,7 @@ def _int_range( name: str, ) -> Self: dtype_pa = narwhals_to_native_dtype(dtype, context._version) - backend_version = cls._implementation._backend_version() - data = _native_int_range( - start=start, - end=end, - step=step, - dtype=dtype_pa, - backend_version=backend_version, - ) + data = _native_int_range(start=start, end=end, step=step, dtype=dtype_pa) return cls.from_native( chunked_array([data], dtype_pa), name=name, context=context ) @@ -688,9 +681,7 @@ def fill_aux( # then it calculates the distance of each new index and the original index # if the distance is equal to or less than the limit and the original value is null, it is replaced valid_mask = pc.is_valid(arr) - indices = _native_int_range( - 0, len(arr), backend_version=self._backend_version - ) + indices = _native_int_range(0, len(arr)) if direction == "forward": valid_index = np.maximum.accumulate(np.where(valid_mask, indices, -1)) distance = indices - valid_index @@ -737,9 +728,7 @@ def is_unique(self) -> ArrowSeries: return self.to_frame().is_unique().alias(self.name) def is_first_distinct(self) -> Self: - row_number = _native_int_range( - 0, len(self), backend_version=self._backend_version - ) + row_number = _native_int_range(0, len(self)) col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) first_distinct_index = ( pa.Table.from_arrays([self.native], names=[self.name]) @@ -752,9 +741,7 @@ def is_first_distinct(self) -> Self: return self._with_native(pc.is_in(row_number, first_distinct_index)) def is_last_distinct(self) -> Self: - row_number = _native_int_range( - 0, len(self), backend_version=self._backend_version - ) + row_number = _native_int_range(0, len(self)) col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) last_distinct_index = ( pa.Table.from_arrays([self.native], names=[self.name]) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 88373c81c2..28c917e054 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -7,7 +7,7 @@ import pyarrow.compute as pc from narwhals._compliant import EagerSeriesNamespace -from narwhals._utils import isinstance_or_issubclass +from narwhals._utils import Implementation, isinstance_or_issubclass if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping @@ -58,6 +58,9 @@ def extract_regex( is_timestamp, ) +BACKEND_VERSION = Implementation.PYARROW._backend_version() +"""Static backend version for `pyarrow`.""" + UNITS_DICT: Mapping[IntervalUnit, NativeIntervalUnit] = { "y": "year", "q": "quarter", @@ -441,15 +444,10 @@ def cast_to_comparable_string_types( def _native_int_range( - start: int, - end: int, - step: int = 1, - *, - dtype: pa.DataType | None = None, - backend_version: tuple[int, ...], + start: int, end: int, step: int = 1, *, dtype: pa.DataType | None = None ) -> ArrayAny: dtype = dtype if dtype is not None else pa.int64() - if backend_version < (21, 0, 0): # pragma: no cover + if BACKEND_VERSION < (21, 0, 0): # pragma: no cover import numpy as np # ignore-banned-import return pa.array(np.arange(start=start, stop=end, step=step), type=dtype) From 704da84bb5e8bd9865a3e334457626195466c0f4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:19:06 +0000 Subject: [PATCH 16/45] fix(typing): Kinda fix `_native_int_range` --- narwhals/_arrow/utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 28c917e054..3b3bb9a03e 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -21,6 +21,7 @@ ArrayOrScalarT1, ArrayOrScalarT2, ChunkedArrayAny, + Incomplete, NativeIntervalUnit, ScalarAny, ) @@ -451,10 +452,10 @@ def _native_int_range( import numpy as np # ignore-banned-import return pa.array(np.arange(start=start, stop=end, step=step), type=dtype) - return pc.cast( # type: ignore[return-value] - pa.arange(start=start, stop=end, step=step), # type: ignore[attr-defined] - dtype, - ) + # NOTE: Added in https://github.com/apache/arrow/pull/46778 + pa_arange = cast("Incomplete", pa.arange) # type: ignore[attr-defined] + arr: ArrayAny = pa_arange(start=start, stop=end, step=step) + return arr.cast(dtype) class ArrowSeriesNamespace(EagerSeriesNamespace["ArrowSeries", "ChunkedArrayAny"]): ... From 7ec01e0d166f26dcc497c357e64dc19abc25d64b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:24:10 +0000 Subject: [PATCH 17/45] refactor: Add `int64` --- narwhals/_arrow/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 3b3bb9a03e..9404d5f5fd 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from functools import lru_cache -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Final, cast import pyarrow as pa import pyarrow.compute as pc @@ -78,6 +78,9 @@ def extract_regex( lit = pa.scalar """Alias for `pyarrow.scalar`.""" +int64: Final = pa.int64() +"""Initialized `pyarrow.types.Int64Type`.""" + def extract_py_scalar(value: Any, /) -> Any: from narwhals._arrow.series import maybe_extract_py_scalar @@ -445,9 +448,8 @@ def cast_to_comparable_string_types( def _native_int_range( - start: int, end: int, step: int = 1, *, dtype: pa.DataType | None = None + start: int, end: int, step: int = 1, *, dtype: pa.DataType = int64 ) -> ArrayAny: - dtype = dtype if dtype is not None else pa.int64() if BACKEND_VERSION < (21, 0, 0): # pragma: no cover import numpy as np # ignore-banned-import From 57393ec07a130a866ec6d2b20895760ced16c622 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:35:55 +0000 Subject: [PATCH 18/45] =?UTF-8?q?fix(typing):=20make=20marco's=20checker?= =?UTF-8?q?=20happy=20=F0=9F=98=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves https://github.com/narwhals-dev/narwhals/pull/2895#discussion_r2240954364 --- narwhals/functions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 808da1b8c7..a74aad9558 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -44,7 +44,7 @@ from narwhals._compliant import CompliantExpr, CompliantNamespace from narwhals._translate import IntoArrowTable from narwhals.dataframe import DataFrame, LazyFrame - from narwhals.dtypes import DType + from narwhals.dtypes import DType, Int64 as _Int64 from narwhals.schema import Schema from narwhals.typing import ( ConcatMethod, @@ -63,7 +63,7 @@ _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None" -dtypes = Version.MAIN.dtypes +Int64: _Int64 = Version.MAIN.dtypes.Int64() def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT: @@ -1787,7 +1787,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = dtypes.Int64, + dtype: IntegerDType = Int64, eager: Literal[False] = False, ) -> Expr: ... @@ -1798,7 +1798,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = dtypes.Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1809,7 +1809,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = dtypes.Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: ... @@ -1819,7 +1819,7 @@ def int_range( end: int | Expr | None = None, step: int = 1, *, - dtype: IntegerDType = dtypes.Int64, + dtype: IntegerDType = Int64, eager: ModuleType | Implementation | str | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. From f1857f84b466b53c1bfb54049dffdf87e36b15b8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:45:03 +0000 Subject: [PATCH 19/45] refactor(typing): Omit defaults in overloads --- narwhals/functions.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index a74aad9558..9aae8a0585 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1784,21 +1784,21 @@ def coalesce( @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, - eager: Literal[False] = False, + dtype: IntegerDType = ..., + eager: Literal[False] = ..., ) -> Expr: ... @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = ..., eager: ModuleType | Implementation | str, ) -> Series[Any]: ... @@ -1806,11 +1806,11 @@ def int_range( @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, - eager: ModuleType | Implementation | str | Literal[False] = False, + dtype: IntegerDType = ..., + eager: ModuleType | Implementation | str | Literal[False] = ..., ) -> Expr | Series[Any]: ... From c94b4d4c2fd36cf21e9bb6884ecd7a2dde726fef Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:46:15 +0000 Subject: [PATCH 20/45] refactor(typing): Remove unreached overload We can add this back if we have a test that requires it --- narwhals/functions.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 9aae8a0585..ded756a713 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1803,17 +1803,6 @@ def int_range( ) -> Series[Any]: ... -@overload -def int_range( - start: int | Expr, - end: int | Expr | None = ..., - step: int = ..., - *, - dtype: IntegerDType = ..., - eager: ModuleType | Implementation | str | Literal[False] = ..., -) -> Expr | Series[Any]: ... - - def int_range( start: int | Expr, end: int | Expr | None = None, From fe2bc9d06152053735bd8b7d82173ca534521917 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 29 Jul 2025 20:49:41 +0000 Subject: [PATCH 21/45] refactor(typing): rinse/repeat for stable --- narwhals/stable/v1/__init__.py | 25 +++++++------------------ narwhals/stable/v2/__init__.py | 25 +++++++------------------ 2 files changed, 14 insertions(+), 36 deletions(-) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 0e0c19fe8a..261f2d7f98 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1308,36 +1308,25 @@ def scan_parquet( @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, - eager: Literal[False] = False, + dtype: IntegerDType = ..., + eager: Literal[False] = ..., ) -> Expr: ... @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = ..., eager: ModuleType | Implementation | str, ) -> Series[Any]: ... -@overload -def int_range( - start: int | Expr, - end: int | Expr | None = None, - step: int = 1, - *, - dtype: IntegerDType = Int64, - eager: ModuleType | Implementation | str | Literal[False], -) -> Expr | Series[Any]: ... - - def int_range( start: int | Expr, end: int | Expr | None = None, diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 8102d45daf..7cdfe5dfb2 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1175,36 +1175,25 @@ def scan_parquet( @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, - eager: Literal[False] = False, + dtype: IntegerDType = ..., + eager: Literal[False] = ..., ) -> Expr: ... @overload def int_range( start: int | Expr, - end: int | Expr | None = None, - step: int = 1, + end: int | Expr | None = ..., + step: int = ..., *, - dtype: IntegerDType = Int64, + dtype: IntegerDType = ..., eager: ModuleType | Implementation | str, ) -> Series[Any]: ... -@overload -def int_range( - start: int | Expr, - end: int | Expr | None = None, - step: int = 1, - *, - dtype: IntegerDType = Int64, - eager: ModuleType | Implementation | str | Literal[False], -) -> Expr | Series[Any]: ... - - def int_range( start: int | Expr, end: int | Expr | None = None, From fc2986438b49d6861b5dab541f78ec86534a6d57 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 30 Jul 2025 09:53:32 +0000 Subject: [PATCH 22/45] refactor: fix `polars` typing, use kwargs when required --- narwhals/_polars/namespace.py | 12 +++--------- narwhals/_polars/series.py | 10 +++------- narwhals/_polars/utils.py | 7 ++++++- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index f24c9e47be..761f90f95e 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -16,8 +16,6 @@ from collections.abc import Iterable, Mapping, Sequence from datetime import timezone - from polars.datatypes import IntegerType as PlIntegerType - from narwhals._compliant import CompliantSelectorNamespace, CompliantWhen from narwhals._polars.dataframe import Method, PolarsDataFrame, PolarsLazyFrame from narwhals._polars.typing import FrameT @@ -213,13 +211,9 @@ def int_range( ) -> PolarsExpr: start_ = start if isinstance(start, int) else start.native end_ = end if isinstance(end, int) else end.native - pl_dtype: PlIntegerType = narwhals_to_native_dtype( - dtype=dtype, version=self._version - ) # type: ignore[assignment] - return self._expr( - pl.int_range(start=start_, end=end_, step=step, dtype=pl_dtype, eager=False), - version=self._version, - ) + pl_dtype = narwhals_to_native_dtype(dtype, self._version) + native = pl.int_range(start_, end_, step, dtype=pl_dtype) + return self._expr(native, self._version) # NOTE: Implementation is too different to annotate correctly (vs other `*SelectorNamespace`) # 1. Others have lots of private stuff for code reuse diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 520572bc8b..cda78a4636 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -22,7 +22,6 @@ import pandas as pd import pyarrow as pa - from polars.datatypes import IntegerType as PlIntegerType from typing_extensions import Self, TypeAlias, TypeIs from narwhals._polars.dataframe import Method, PolarsDataFrame @@ -181,12 +180,9 @@ def _int_range( context: _LimitedContext, name: str, ) -> Self: - version = context._version - dtype_pl: PlIntegerType = narwhals_to_native_dtype(dtype, version) # type: ignore[assignment] - return cls.from_native( - pl.int_range(start=start, end=end, step=step, dtype=dtype_pl, eager=True), - context=context, - ).alias(name) + dtype_pl = narwhals_to_native_dtype(dtype, context._version) + native = pl.int_range(start, end, step, dtype=dtype_pl, eager=True) + return cls.from_native(native, context=context).alias(name) @staticmethod def _is_native(obj: pl.Series | Any) -> TypeIs[pl.Series]: diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 5f56a39044..3b58233b68 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -23,11 +23,12 @@ if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping + from polars.datatypes import IntegerType as PlIntegerType from typing_extensions import TypeIs from narwhals._utils import _StoresNative from narwhals.dtypes import DType - from narwhals.typing import IntoDType + from narwhals.typing import IntegerDType, IntoDType T = TypeVar("T") NativeT = TypeVar( @@ -141,6 +142,10 @@ def native_to_narwhals_dtype( # noqa: C901, PLR0912 return dtypes.Unknown() +@overload +def narwhals_to_native_dtype(dtype: IntegerDType, version: Version) -> PlIntegerType: ... +@overload +def narwhals_to_native_dtype(dtype: IntoDType, version: Version) -> pl.DataType: ... def narwhals_to_native_dtype( # noqa: C901, PLR0912 dtype: IntoDType, version: Version ) -> pl.DataType: From 749edfcea5e8141c0178f0e4026cbe3744e3be2d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:39:03 +0000 Subject: [PATCH 23/45] chore(typing): Remove unused asserts --- narwhals/functions.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index ded756a713..f69dd38906 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1886,9 +1886,6 @@ def _int_range_impl( start = 0 if not eager: - assert start is not None # noqa: S101, help mypy - assert end is not None # noqa: S101, help mypy - start = start if isinstance(start, Expr) else lit(start, dtype=dtype) end = end if isinstance(end, Expr) else lit(end, dtype=dtype) From 4706a30f8652c143ed42a208f259f18a6b1246a9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 30 Jul 2025 13:15:29 +0000 Subject: [PATCH 24/45] test: Add failing `Expr` + `eager` case --- tests/expr_and_series/int_range_test.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 9a3af7f759..4cbc30bc6d 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -7,7 +7,7 @@ import narwhals as nw from narwhals import Implementation from narwhals.exceptions import ComputeError -from tests.utils import Constructor, assert_equal_data +from tests.utils import Constructor, ConstructorEager, assert_equal_data if TYPE_CHECKING: from narwhals.dtypes import DType, IntegerType @@ -44,6 +44,22 @@ def test_int_range_eager( assert_equal_data({"a": series}, {"a": list(range(start, end, step))}) +# NOTE: Two options for solving +# (1): Remove `Expr` inputs from the overloads that return `Series` +# - Then check that this gives a helpful runtime message + requires a `type: ignore[call-overload]` +# (2): Add support for this at runtime, like `polars` +# - Then check that this produces the same result for (polars) lazy and eager constructors +# - https://github.com/pola-rs/polars/blob/867443ce3875da30791021e4072e5a6fb2249d91/py-polars/polars/functions/range/int_range.py#L214-L229 +def test_int_range_eager_expr(constructor_eager: ConstructorEager) -> None: + data = {"a": [0, 2, 3, 6, 5, 1]} + expected = {"a": [0, 2, 4, 6, 8, 10]} + df = nw.from_native(constructor_eager(data)) + impl = df.implementation + int_range = nw.int_range(nw.col("a").min(), nw.col("a").max() * 2, eager=impl) + result = df.select(int_range) + assert_equal_data(result, expected) + + @pytest.mark.parametrize( ("start", "end", "step", "dtype", "expected"), [ From bc0355f0e865e5e3fe4f383aef7ae28409feb9be Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 31 Jul 2025 10:06:11 +0200 Subject: [PATCH 25/45] low hanging feedback adjustments --- narwhals/_arrow/dataframe.py | 6 +++--- narwhals/_arrow/series.py | 10 +++++----- narwhals/_arrow/utils.py | 2 +- narwhals/functions.py | 2 +- narwhals/stable/v1/__init__.py | 2 +- narwhals/stable/v2/__init__.py | 2 +- narwhals/typing.py | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index cfc85f5e6a..78b2371d79 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -8,7 +8,7 @@ import pyarrow.compute as pc from narwhals._arrow.series import ArrowSeries -from narwhals._arrow.utils import _native_int_range, native_to_narwhals_dtype +from narwhals._arrow.utils import int_range, native_to_narwhals_dtype from narwhals._compliant import EagerDataFrame from narwhals._expression_parsing import ExprKind from narwhals._utils import ( @@ -644,7 +644,7 @@ def write_csv(self, file: str | Path | BytesIO | None) -> str | None: def is_unique(self) -> ArrowSeries: col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) - row_index = _native_int_range(0, len(self)) + row_index = int_range(0, len(self)) keep_idx = ( self.native.append_column(col_token, row_index) .group_by(self.columns) @@ -676,7 +676,7 @@ def unique( agg_func = ArrowGroupBy._REMAP_UNIQUE[keep] col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) - col_value = _native_int_range(0, len(self)) + col_value = int_range(0, len(self)) keep_idx_native = ( self.native.append_column(col_token, col_value) .group_by(subset) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index c520353a5b..1eba7d287c 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -11,11 +11,11 @@ from narwhals._arrow.series_str import ArrowSeriesStringNamespace from narwhals._arrow.series_struct import ArrowSeriesStructNamespace from narwhals._arrow.utils import ( - _native_int_range, cast_for_truediv, chunked_array, extract_native, floordiv_compat, + int_range, lit, narwhals_to_native_dtype, native_to_narwhals_dtype, @@ -174,7 +174,7 @@ def _int_range( name: str, ) -> Self: dtype_pa = narwhals_to_native_dtype(dtype, context._version) - data = _native_int_range(start=start, end=end, step=step, dtype=dtype_pa) + data = int_range(start=start, end=end, step=step, dtype=dtype_pa) return cls.from_native( chunked_array([data], dtype_pa), name=name, context=context ) @@ -681,7 +681,7 @@ def fill_aux( # then it calculates the distance of each new index and the original index # if the distance is equal to or less than the limit and the original value is null, it is replaced valid_mask = pc.is_valid(arr) - indices = _native_int_range(0, len(arr)) + indices = int_range(0, len(arr)) if direction == "forward": valid_index = np.maximum.accumulate(np.where(valid_mask, indices, -1)) distance = indices - valid_index @@ -728,7 +728,7 @@ def is_unique(self) -> ArrowSeries: return self.to_frame().is_unique().alias(self.name) def is_first_distinct(self) -> Self: - row_number = _native_int_range(0, len(self)) + row_number = int_range(0, len(self)) col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) first_distinct_index = ( pa.Table.from_arrays([self.native], names=[self.name]) @@ -741,7 +741,7 @@ def is_first_distinct(self) -> Self: return self._with_native(pc.is_in(row_number, first_distinct_index)) def is_last_distinct(self) -> Self: - row_number = _native_int_range(0, len(self)) + row_number = int_range(0, len(self)) col_token = generate_temporary_column_name(n_bytes=8, columns=[self.name]) last_distinct_index = ( pa.Table.from_arrays([self.native], names=[self.name]) diff --git a/narwhals/_arrow/utils.py b/narwhals/_arrow/utils.py index 9404d5f5fd..d7abf2df30 100644 --- a/narwhals/_arrow/utils.py +++ b/narwhals/_arrow/utils.py @@ -447,7 +447,7 @@ def cast_to_comparable_string_types( return (ca.cast(dtype) for ca in chunked_arrays), lit(separator, dtype) -def _native_int_range( +def int_range( start: int, end: int, step: int = 1, *, dtype: pa.DataType = int64 ) -> ArrayAny: if BACKEND_VERSION < (21, 0, 0): # pragma: no cover diff --git a/narwhals/functions.py b/narwhals/functions.py index f69dd38906..976d929412 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1824,7 +1824,7 @@ def int_range( a `Series` is returned. Returns: - Expr or Series: Column of integer data type `dtype`. + Expr or Series of integer data type `dtype`. Examples: >>> import narwhals as nw diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 261f2d7f98..f383ad4696 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1348,7 +1348,7 @@ def int_range( a `Series` is returned. Returns: - Expr or Series: Column of integer data type `dtype`. + Expr or Series of integer data type `dtype`. """ return _stableify( _int_range_impl(start=start, end=end, step=step, dtype=dtype, eager=eager) diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 7cdfe5dfb2..07bf3b7d99 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1215,7 +1215,7 @@ def int_range( a `Series` is returned. Returns: - Expr or Series: Column of integer data type `dtype`. + Expr or Series of integer data type `dtype`. """ return _stableify( _int_range_impl(start=start, end=end, step=step, dtype=dtype, eager=eager) diff --git a/narwhals/typing.py b/narwhals/typing.py index 0fdcca4287..cc18465044 100644 --- a/narwhals/typing.py +++ b/narwhals/typing.py @@ -362,7 +362,7 @@ def Binary(self) -> type[dtypes.Binary]: ... NonNestedDType: TypeAlias = "dtypes.NumericType | dtypes.TemporalType | dtypes.String | dtypes.Boolean | dtypes.Binary | dtypes.Categorical | dtypes.Unknown | dtypes.Object" """Any Narwhals DType that does not have required arguments.""" IntegerDType: TypeAlias = "dtypes.IntegerType | type[dtypes.IntegerType]" -"""Instance or class of IntegerType""" +"""Any signed or unsigned integer DType.""" IntoDType: TypeAlias = "dtypes.DType | type[NonNestedDType]" """Anything that can be converted into a Narwhals DType. From 352fd4fa50acfbbea3dcf91bc0723cea8867ccdf Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 31 Jul 2025 10:47:13 +0200 Subject: [PATCH 26/45] refactor into int_range_eager --- narwhals/_arrow/namespace.py | 32 ++++++++++++++++++------- narwhals/_arrow/series.py | 17 ------------- narwhals/_compliant/namespace.py | 11 +++++++++ narwhals/_compliant/series.py | 11 --------- narwhals/_pandas_like/namespace.py | 25 ++++++++++++------- narwhals/_pandas_like/series.py | 15 ------------ narwhals/_polars/namespace.py | 15 +++++++++++- narwhals/_polars/series.py | 22 +---------------- narwhals/functions.py | 20 ++++++++++------ narwhals/stable/v1/__init__.py | 2 +- narwhals/stable/v2/__init__.py | 2 +- tests/expr_and_series/int_range_test.py | 18 +++++--------- 12 files changed, 86 insertions(+), 104 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 0d1ef847e6..e9912e6ea2 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -12,13 +12,19 @@ from narwhals._arrow.expr import ArrowExpr from narwhals._arrow.selectors import ArrowSelectorNamespace from narwhals._arrow.series import ArrowSeries -from narwhals._arrow.utils import cast_to_comparable_string_types +from narwhals._arrow.utils import ( + cast_to_comparable_string_types, + chunked_array, + int_range, + narwhals_to_native_dtype, +) from narwhals._compliant import CompliantThen, EagerNamespace, EagerWhen from narwhals._expression_parsing import ( combine_alias_output_names, combine_evaluate_output_names, ) from narwhals._utils import Implementation +from narwhals.dtypes import Int64 if TYPE_CHECKING: from collections.abc import Sequence @@ -284,6 +290,21 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: context=self, ) + def int_range_eager( + self, + start: int, + end: int, + step: int = 1, + *, + dtype: IntegerDType = Int64, + name: str = "literal", + ) -> ArrowSeries: + dtype_pa = narwhals_to_native_dtype(dtype, version=self._version) + data = int_range(start=start, end=end, step=step, dtype=dtype_pa) + return ArrowSeries.from_native( + chunked_array([data], dtype_pa), name=name, context=self + ) + def int_range( self, start: int | ArrowExpr, @@ -303,14 +324,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: end_value = end(df)[0].item() if isinstance(end, ArrowExpr) else end return [ - ArrowSeries._int_range( - start=start_value, - end=end_value, - step=step, - dtype=dtype, - context=self, - name=name, - ) + self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name) ] evaluate_output_names = ( diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 1eba7d287c..fa102310ba 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -64,7 +64,6 @@ from narwhals.typing import ( ClosedInterval, FillNullStrategy, - IntegerDType, Into1DArray, IntoDType, NonNestedLiteral, @@ -163,22 +162,6 @@ def from_iterable( chunked_array([data], dtype_pa), name=name, context=context ) - @classmethod - def _int_range( - cls, - start: int, - end: int, - step: int, - dtype: IntegerDType, - context: _LimitedContext, - name: str, - ) -> Self: - dtype_pa = narwhals_to_native_dtype(dtype, context._version) - data = int_range(start=start, end=end, step=step, dtype=dtype_pa) - return cls.from_native( - chunked_array([data], dtype_pa), name=name, context=context - ) - def _from_scalar(self, value: Any) -> Self: if hasattr(value, "as_py"): value = value.as_py() diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index 8032ea9464..062c1d7353 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -23,6 +23,7 @@ passthrough_column_names, ) from narwhals.dependencies import is_numpy_array_2d +from narwhals.dtypes import Int64 if TYPE_CHECKING: from collections.abc import Container, Iterable, Mapping, Sequence @@ -221,3 +222,13 @@ def concat( else: # pragma: no cover raise NotImplementedError return self._dataframe.from_native(native, context=self) + + def int_range_eager( + self, + start: int, + end: int, + step: int = 1, + *, + dtype: IntegerDType = Int64, + name: str = "literal", + ) -> EagerSeriesT: ... diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 96d0b048b0..788ec0bc9f 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -44,7 +44,6 @@ from narwhals.typing import ( ClosedInterval, FillNullStrategy, - IntegerDType, Into1DArray, IntoDType, MultiIndexSelector, @@ -123,16 +122,6 @@ def from_iterable( name: str = "", dtype: IntoDType | None = None, ) -> Self: ... - @classmethod - def _int_range( - cls, - start: int, - end: int, - step: int, - dtype: IntegerDType, - context: _LimitedContext, - name: str, - ) -> Self: ... def to_narwhals(self) -> Series[NativeSeriesT]: return self._version.series(self, level="full") diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index ec93d0154e..0bb40a2553 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -16,7 +16,8 @@ from narwhals._pandas_like.selectors import PandasSelectorNamespace from narwhals._pandas_like.series import PandasLikeSeries from narwhals._pandas_like.typing import NativeDataFrameT, NativeSeriesT -from narwhals._pandas_like.utils import is_non_nullable_boolean +from narwhals._pandas_like.utils import import_array_module, is_non_nullable_boolean +from narwhals.dtypes import Int64 if TYPE_CHECKING: from collections.abc import Iterable, Sequence @@ -370,6 +371,19 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: context=self, ) + def int_range_eager( + self, + start: int, + end: int, + step: int = 1, + *, + dtype: IntegerDType = Int64, + name: str = "literal", + ) -> PandasLikeSeries: + array_funcs = import_array_module(self._implementation) + data = array_funcs.arange(start, end, step) + return PandasLikeSeries.from_iterable(data, context=self, name=name, dtype=dtype) + def int_range( self, start: int | PandasLikeExpr, @@ -388,14 +402,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: start_value = start end_value = end(df)[0].item() if isinstance(end, PandasLikeExpr) else end return [ - PandasLikeSeries._int_range( - start=start_value, - end=end_value, - step=step, - dtype=dtype, - context=self, - name=name, - ) + self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name) ] evaluate_output_names = ( diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 2253df1ead..9d6ae422bd 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -45,7 +45,6 @@ from narwhals.typing import ( ClosedInterval, FillNullStrategy, - IntegerDType, Into1DArray, IntoDType, NonNestedLiteral, @@ -183,20 +182,6 @@ def from_iterable( kwds["index"] = index return cls.from_native(ns.Series(data, name=name, **kwds), context=context) - @classmethod - def _int_range( - cls, - start: int, - end: int, - step: int, - dtype: IntegerDType, - context: _LimitedContext, - name: str, - ) -> Self: - array_funcs = import_array_module(context._implementation) - data = array_funcs.arange(start, end, step) - return cls.from_iterable(data, context=context, name=name, dtype=dtype) - @staticmethod def _is_native(obj: Any) -> TypeIs[Any]: return is_pandas_like_series(obj) # pragma: no cover diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index 761f90f95e..c12cf557e9 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -10,7 +10,7 @@ from narwhals._polars.utils import extract_args_kwargs, narwhals_to_native_dtype from narwhals._utils import Implementation, requires from narwhals.dependencies import is_numpy_array_2d -from narwhals.dtypes import DType +from narwhals.dtypes import DType, Int64 if TYPE_CHECKING: from collections.abc import Iterable, Mapping, Sequence @@ -201,6 +201,19 @@ def concat_str( version=self._version, ) + def int_range_eager( + self, + start: int, + end: int, + step: int = 1, + *, + dtype: IntegerDType = Int64, + name: str = "literal", + ) -> PolarsSeries: + dtype_pl = narwhals_to_native_dtype(dtype, self._version) + native = pl.int_range(start, end, step, dtype=dtype_pl, eager=True).alias(name) + return PolarsSeries.from_native(native, context=self) + def int_range( self, start: int | PolarsExpr, diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index cda78a4636..170c6e5150 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -30,13 +30,7 @@ from narwhals._utils import Version, _LimitedContext from narwhals.dtypes import DType from narwhals.series import Series - from narwhals.typing import ( - IntegerDType, - Into1DArray, - IntoDType, - MultiIndexSelector, - _1DArray, - ) + from narwhals.typing import Into1DArray, IntoDType, MultiIndexSelector, _1DArray T = TypeVar("T") IncludeBreakpoint: TypeAlias = Literal[False, True] @@ -170,20 +164,6 @@ def from_iterable( native = pl.Series(name=name, values=cast("Sequence[Any]", data), dtype=dtype_pl) return cls.from_native(native, context=context) - @classmethod - def _int_range( - cls, - start: int, - end: int, - step: int, - dtype: IntegerDType, - context: _LimitedContext, - name: str, - ) -> Self: - dtype_pl = narwhals_to_native_dtype(dtype, context._version) - native = pl.int_range(start, end, step, dtype=dtype_pl, eager=True) - return cls.from_native(native, context=context).alias(name) - @staticmethod def _is_native(obj: pl.Series | Any) -> TypeIs[pl.Series]: return isinstance(obj, pl.Series) diff --git a/narwhals/functions.py b/narwhals/functions.py index 976d929412..5a00daa5ce 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1819,7 +1819,7 @@ def int_range( the value of `start` is used and `start` is set to `0`. step: Step size of the range. dtype: Data type of the range (must be an integer data type). - eager: If set to `False` (default) or `None`, then an expression is returned. + eager: If set to `False` (default), then an expression is returned. If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then a `Series` is returned. @@ -1911,13 +1911,19 @@ def _int_range_impl( impl = Implementation.from_backend(eager) if is_eager_allowed(impl): - assert isinstance(start, int) # noqa: S101, help mypy - assert isinstance(end, int) # noqa: S101, help mypy + if not (isinstance(start, int) and isinstance(end, int)): + msg = ( + f"Expected `start` and `end` to be integer values since `eager={eager}`.\n" + f"Found: `start` of type {type(start)} and `end` of type {type(end)}\n\n" + "Hint: Calling with `nw.int_range` expressions requires a context " + "such as `select` or `with_columns`" + ) + raise InvalidOperationError(msg) + ns = Version.MAIN.namespace.from_backend(impl).compliant - series = ns._series._int_range( - start=start, end=end, step=step, dtype=dtype, context=ns, name="literal" - ) - return series.to_narwhals() + return ns.int_range_eager( + start=start, end=end, step=step, dtype=dtype + ).to_narwhals() msg = f"Cannot create a Series from a lazy backend. Found: {impl}" raise ValueError(msg) diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index f383ad4696..f6fecd221b 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1343,7 +1343,7 @@ def int_range( the value of `start` is used and `start` is set to `0`. step: Step size of the range. dtype: Data type of the range (must be an integer data type). - eager: If set to `False` (default) or `None`, then an expression is returned. + eager: If set to `False` (default), then an expression is returned. If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then a `Series` is returned. diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 07bf3b7d99..6514abbb41 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1210,7 +1210,7 @@ def int_range( the value of `start` is used and `start` is set to `0`. step: Step size of the range. dtype: Data type of the range (must be an integer data type). - eager: If set to `False` (default) or `None`, then an expression is returned. + eager: If set to `False` (default), then an expression is returned. If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then a `Series` is returned. diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index 4cbc30bc6d..b360502490 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -6,7 +6,7 @@ import narwhals as nw from narwhals import Implementation -from narwhals.exceptions import ComputeError +from narwhals.exceptions import ComputeError, InvalidOperationError from tests.utils import Constructor, ConstructorEager, assert_equal_data if TYPE_CHECKING: @@ -44,20 +44,14 @@ def test_int_range_eager( assert_equal_data({"a": series}, {"a": list(range(start, end, step))}) -# NOTE: Two options for solving -# (1): Remove `Expr` inputs from the overloads that return `Series` -# - Then check that this gives a helpful runtime message + requires a `type: ignore[call-overload]` -# (2): Add support for this at runtime, like `polars` -# - Then check that this produces the same result for (polars) lazy and eager constructors -# - https://github.com/pola-rs/polars/blob/867443ce3875da30791021e4072e5a6fb2249d91/py-polars/polars/functions/range/int_range.py#L214-L229 -def test_int_range_eager_expr(constructor_eager: ConstructorEager) -> None: +def test_int_range_eager_expr_raises(constructor_eager: ConstructorEager) -> None: data = {"a": [0, 2, 3, 6, 5, 1]} - expected = {"a": [0, 2, 4, 6, 8, 10]} df = nw.from_native(constructor_eager(data)) impl = df.implementation - int_range = nw.int_range(nw.col("a").min(), nw.col("a").max() * 2, eager=impl) - result = df.select(int_range) - assert_equal_data(result, expected) + + msg = "Expected `start` and `end` to be integer values" + with pytest.raises(InvalidOperationError, match=msg): + nw.int_range(nw.col("a").min(), nw.col("a").max() * 2, eager=impl) @pytest.mark.parametrize( From 8b195bab293d6b49d71e9c04752939afc4fd41d9 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 31 Jul 2025 10:53:44 +0200 Subject: [PATCH 27/45] typo --- narwhals/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 5a00daa5ce..6c7b713876 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1915,7 +1915,7 @@ def _int_range_impl( msg = ( f"Expected `start` and `end` to be integer values since `eager={eager}`.\n" f"Found: `start` of type {type(start)} and `end` of type {type(end)}\n\n" - "Hint: Calling with `nw.int_range` expressions requires a context " + "Hint: Calling `nw.int_range` with expressions requires a context " "such as `select` or `with_columns`" ) raise InvalidOperationError(msg) From 466d85de1b1bc5c40c88adfa5ca4c6873e87bb41 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 31 Jul 2025 11:03:01 +0200 Subject: [PATCH 28/45] forgot to mention about eager value in suggestion --- narwhals/functions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 6c7b713876..9d48a4c4fd 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1915,15 +1915,15 @@ def _int_range_impl( msg = ( f"Expected `start` and `end` to be integer values since `eager={eager}`.\n" f"Found: `start` of type {type(start)} and `end` of type {type(end)}\n\n" - "Hint: Calling `nw.int_range` with expressions requires a context " - "such as `select` or `with_columns`" + "Hint: Calling `nw.int_range` with expressions requires:\n" + " - `eager=False`" + " - a context such as `select` or `with_columns`" ) raise InvalidOperationError(msg) ns = Version.MAIN.namespace.from_backend(impl).compliant - return ns.int_range_eager( - start=start, end=end, step=step, dtype=dtype - ).to_narwhals() + series = ns.int_range_eager(start=start, end=end, step=step, dtype=dtype) + return series.to_narwhals() msg = f"Cannot create a Series from a lazy backend. Found: {impl}" raise ValueError(msg) From 1575ae8ad55813b063df90e2d97a3a2c0ecb2c0e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:19:55 +0000 Subject: [PATCH 29/45] ci: Update `dtypes-import` See https://github.com/narwhals-dev/narwhals/pull/2895#issuecomment-3139503974 --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08855a538c..94cd6b217e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,12 +65,12 @@ repos: name: don't import from narwhals.dtypes (use `Version.dtypes` instead) entry: | (?x) - import\ narwhals.dtypes| - from\ narwhals\ import\ dtypes| - from\ narwhals.dtypes\ import\ [^D_]+| - import\ narwhals.stable.v1.dtypes| - from\ narwhals.stable\.v.\ import\ dtypes| - from\ narwhals.stable\.v.\.dtypes\ import + import\ narwhals(\.stable\.v\d)?\.dtypes| + from\ narwhals(\.stable\.v\d)?\ import\ dtypes| + ^from\ narwhals(\.stable\.v\d)?\.dtypes\ import + \ (DType,\ )? + ((Datetime|Duration|Enum)(,\ )?)+ + ((,\ )?DType)? language: pygrep files: ^narwhals/ exclude: | From 82e93fa5a827c5a1fb51eab6c6eeb7aeee3c169f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:43:45 +0000 Subject: [PATCH 30/45] refactor: Add `PandasLikeNamespace._array_funcs` --- narwhals/_pandas_like/namespace.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 0bb40a2553..8d2d79ed7a 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -66,6 +66,15 @@ def _series(self) -> type[PandasLikeSeries]: def selectors(self) -> PandasSelectorNamespace: return PandasSelectorNamespace.from_namespace(self) + @property + def _array_funcs(self): # type: ignore[no-untyped-def] # noqa: ANN202 + if TYPE_CHECKING: + import numpy as np + + return np + else: + return import_array_module(self._implementation) + def __init__(self, implementation: Implementation, version: Version) -> None: self._implementation = implementation self._version = version @@ -380,9 +389,8 @@ def int_range_eager( dtype: IntegerDType = Int64, name: str = "literal", ) -> PandasLikeSeries: - array_funcs = import_array_module(self._implementation) - data = array_funcs.arange(start, end, step) - return PandasLikeSeries.from_iterable(data, context=self, name=name, dtype=dtype) + data = self._array_funcs.arange(start, end, step) + return self._series.from_iterable(data, context=self, name=name, dtype=dtype) def int_range( self, From 2c52f918dfad8ae8aca460f682993b562e7716d4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:48:14 +0000 Subject: [PATCH 31/45] refactor(suggestion): Move impl to `EagerNamespace.int_range` --- narwhals/_arrow/namespace.py | 36 ------------------------------ narwhals/_compliant/namespace.py | 36 ++++++++++++++++++++++++++++++ narwhals/_pandas_like/namespace.py | 35 ----------------------------- 3 files changed, 36 insertions(+), 71 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index e9912e6ea2..879e9116c7 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -305,42 +305,6 @@ def int_range_eager( chunked_array([data], dtype_pa), name=name, context=self ) - def int_range( - self, - start: int | ArrowExpr, - end: int | ArrowExpr, - step: int, - *, - dtype: IntegerDType, - ) -> ArrowExpr: - def func(df: ArrowDataFrame) -> list[ArrowSeries]: - if isinstance(start, ArrowExpr): - start_eval = start(df)[0] - name = start_eval.name - start_value = start_eval.item() - else: - name = "literal" - start_value = start - - end_value = end(df)[0].item() if isinstance(end, ArrowExpr) else end - return [ - self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name) - ] - - evaluate_output_names = ( - combine_evaluate_output_names(start) - if isinstance(start, ArrowExpr) - else lambda _df: ["literal"] - ) - return self._expr._from_callable( - func=func, - depth=0, - function_name="int_range", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - context=self, - ) - class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ChunkedArrayAny"]): @property diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index 062c1d7353..7039a0519f 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -16,6 +16,7 @@ NativeFrameT_co, NativeSeriesT, ) +from narwhals._expression_parsing import combine_evaluate_output_names from narwhals._utils import ( exclude_column_names, get_column_names, @@ -232,3 +233,38 @@ def int_range_eager( dtype: IntegerDType = Int64, name: str = "literal", ) -> EagerSeriesT: ... + + def int_range( + self, + start: int | EagerExprT, + end: int | EagerExprT, + step: int, + *, + dtype: IntegerDType, + ) -> EagerExprT: + def func(df: EagerDataFrameT) -> list[EagerSeriesT]: + if isinstance(start, int): # pragma: no cover + name = "literal" + start_value = start + else: + start_eval = start(df)[0] + name = start_eval.name + start_value = start_eval.item() + end_value = end if isinstance(end, int) else end(df)[0].item() + return [ + self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name) + ] + + evaluate_output_names = ( + (lambda _df: ["literal"]) + if isinstance(start, int) + else combine_evaluate_output_names(start) + ) + return self._expr._from_callable( + func=func, + depth=0, + function_name="int_range", + evaluate_output_names=evaluate_output_names, + alias_output_names=None, + context=self, + ) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 8d2d79ed7a..62ecd1207e 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -392,41 +392,6 @@ def int_range_eager( data = self._array_funcs.arange(start, end, step) return self._series.from_iterable(data, context=self, name=name, dtype=dtype) - def int_range( - self, - start: int | PandasLikeExpr, - end: int | PandasLikeExpr, - step: int, - *, - dtype: IntegerDType, - ) -> PandasLikeExpr: - def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: - if isinstance(start, PandasLikeExpr): - start_eval = start(df)[0] - name = start_eval.name - start_value = start_eval.item() - else: # pragma: no cover - name = "literal" - start_value = start - end_value = end(df)[0].item() if isinstance(end, PandasLikeExpr) else end - return [ - self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name) - ] - - evaluate_output_names = ( - combine_evaluate_output_names(start) - if isinstance(start, PandasLikeExpr) - else lambda _df: ["literal"] - ) - return self._expr._from_callable( - func=func, - depth=0, - function_name="int_range", - evaluate_output_names=evaluate_output_names, - alias_output_names=None, - context=self, - ) - class _NativeConcat(Protocol[NativeDataFrameT, NativeSeriesT]): @overload From f13d667eda841d6be643d16279b6897855a357e4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:56:58 +0000 Subject: [PATCH 32/45] defaults for `int_range` as well `ArrowDataFrame.with_row_index` was one of the motivating cases in (https://github.com/narwhals-dev/narwhals/pull/2895#issuecomment-3136001189) --- narwhals/_arrow/dataframe.py | 4 +--- narwhals/_compliant/namespace.py | 8 ++++---- narwhals/_polars/namespace.py | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 78b2371d79..d70c0e1c9c 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -472,9 +472,7 @@ def to_dict( def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self: plx = self.__narwhals_namespace__() if order_by is None: - row_index = plx.int_range( - start=0, end=len(self), step=1, dtype=self._version.dtypes.Int64() - ) + row_index = plx.int_range(start=0, end=len(self)) else: rank = plx.col(order_by[0]).rank("ordinal", descending=False) row_index = rank.over(partition_by=[], order_by=order_by) - 1 diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index 7039a0519f..c90313fc66 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -100,9 +100,9 @@ def int_range( self, start: int | CompliantExprT, end: int | CompliantExprT, - step: int, + step: int = 1, *, - dtype: IntegerDType, + dtype: IntegerDType = Int64, ) -> CompliantExprT: ... @property def selectors(self) -> CompliantSelectorNamespace[Any, Any]: ... @@ -238,9 +238,9 @@ def int_range( self, start: int | EagerExprT, end: int | EagerExprT, - step: int, + step: int = 1, *, - dtype: IntegerDType, + dtype: IntegerDType = Int64, ) -> EagerExprT: def func(df: EagerDataFrameT) -> list[EagerSeriesT]: if isinstance(start, int): # pragma: no cover diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index c12cf557e9..c9b0a05261 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -218,9 +218,9 @@ def int_range( self, start: int | PolarsExpr, end: int | PolarsExpr, - step: int, + step: int = 1, *, - dtype: IntegerDType, + dtype: IntegerDType = Int64, ) -> PolarsExpr: start_ = start if isinstance(start, int) else start.native end_ = end if isinstance(end, int) else end.native From 447a27666be297d7d4f2e97182cb1025d2f821e9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 13:03:42 +0000 Subject: [PATCH 33/45] refactor: Use `_series`, pass `dtype_pa` once --- narwhals/_arrow/namespace.py | 4 +--- narwhals/_polars/namespace.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 879e9116c7..d87f11cd5c 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -301,9 +301,7 @@ def int_range_eager( ) -> ArrowSeries: dtype_pa = narwhals_to_native_dtype(dtype, version=self._version) data = int_range(start=start, end=end, step=step, dtype=dtype_pa) - return ArrowSeries.from_native( - chunked_array([data], dtype_pa), name=name, context=self - ) + return self._series.from_native(chunked_array([data]), name=name, context=self) class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ChunkedArrayAny"]): diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index c9b0a05261..c7eda47587 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -212,7 +212,7 @@ def int_range_eager( ) -> PolarsSeries: dtype_pl = narwhals_to_native_dtype(dtype, self._version) native = pl.int_range(start, end, step, dtype=dtype_pl, eager=True).alias(name) - return PolarsSeries.from_native(native, context=self) + return self._series.from_native(native, context=self) def int_range( self, From 1098afb2f6e064bed8e073c74188d76784809eaf Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 14:05:27 +0000 Subject: [PATCH 34/45] revert: Undo `Int64` hack Resolves https://github.com/narwhals-dev/narwhals/pull/2895#discussion_r2245376071 Reverts https://github.com/narwhals-dev/narwhals/pull/2895#discussion_r2240954364 --- narwhals/functions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 9d48a4c4fd..e93f28b0a6 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -31,6 +31,7 @@ is_numpy_array_2d, is_pyarrow_table, ) +from narwhals.dtypes import Int64 from narwhals.exceptions import InvalidOperationError from narwhals.expr import Expr from narwhals.series import Series @@ -44,7 +45,7 @@ from narwhals._compliant import CompliantExpr, CompliantNamespace from narwhals._translate import IntoArrowTable from narwhals.dataframe import DataFrame, LazyFrame - from narwhals.dtypes import DType, Int64 as _Int64 + from narwhals.dtypes import DType from narwhals.schema import Schema from narwhals.typing import ( ConcatMethod, @@ -63,9 +64,6 @@ _IntoSchema: TypeAlias = "Mapping[str, DType] | Schema | Sequence[str] | None" -Int64: _Int64 = Version.MAIN.dtypes.Int64() - - def concat(items: Iterable[FrameT], *, how: ConcatMethod = "vertical") -> FrameT: """Concatenate multiple DataFrames, LazyFrames into a single entity. From 15ae42a480fde18807a5a1fd42873869cd52d9ec Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 18:58:54 +0000 Subject: [PATCH 35/45] refactor: Use `int_range_eager` in `with_row_index` https://github.com/narwhals-dev/narwhals/pull/2895#discussion_r2246153275 --- narwhals/_arrow/dataframe.py | 2 +- narwhals/_arrow/series.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index d70c0e1c9c..166781ef86 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -472,7 +472,7 @@ def to_dict( def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self: plx = self.__narwhals_namespace__() if order_by is None: - row_index = plx.int_range(start=0, end=len(self)) + row_index = plx.int_range_eager(0, len(self))._to_expr() else: rank = plx.col(order_by[0]).rank("ordinal", descending=False) row_index = rank.over(partition_by=[], order_by=order_by) - 1 diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index fa102310ba..1d39af0d06 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -44,6 +44,7 @@ from typing_extensions import Self, TypeAlias, TypeIs from narwhals._arrow.dataframe import ArrowDataFrame + from narwhals._arrow.expr import ArrowExpr from narwhals._arrow.namespace import ArrowNamespace from narwhals._arrow.typing import ( # type: ignore[attr-defined] ArrayAny, @@ -167,6 +168,9 @@ def _from_scalar(self, value: Any) -> Self: value = value.as_py() return super()._from_scalar(value) + def _to_expr(self) -> ArrowExpr: + return cast("ArrowExpr", super()._to_expr()) + @staticmethod def _is_native(obj: ChunkedArrayAny | Any) -> TypeIs[ChunkedArrayAny]: return isinstance(obj, pa.ChunkedArray) From 71e98566a50f375b48befe157d407ac7832b67e5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 31 Jul 2025 19:05:14 +0000 Subject: [PATCH 36/45] always require `Expr` in `CompliantNamespace.int_range` https://github.com/narwhals-dev/narwhals/pull/2895#discussion_r2246153275 --- narwhals/_compliant/namespace.py | 27 +++++++++------------------ narwhals/_polars/namespace.py | 8 +++----- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index c90313fc66..e2c371c8ec 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -98,8 +98,8 @@ def concat_str( ) -> CompliantExprT: ... def int_range( self, - start: int | CompliantExprT, - end: int | CompliantExprT, + start: CompliantExprT, + end: CompliantExprT, step: int = 1, *, dtype: IntegerDType = Int64, @@ -236,35 +236,26 @@ def int_range_eager( def int_range( self, - start: int | EagerExprT, - end: int | EagerExprT, + start: EagerExprT, + end: EagerExprT, step: int = 1, *, dtype: IntegerDType = Int64, ) -> EagerExprT: def func(df: EagerDataFrameT) -> list[EagerSeriesT]: - if isinstance(start, int): # pragma: no cover - name = "literal" - start_value = start - else: - start_eval = start(df)[0] - name = start_eval.name - start_value = start_eval.item() - end_value = end if isinstance(end, int) else end(df)[0].item() + start_eval = start(df)[0] + name = start_eval.name + start_value = start_eval.item() + end_value = end(df)[0].item() return [ self.int_range_eager(start_value, end_value, step, dtype=dtype, name=name) ] - evaluate_output_names = ( - (lambda _df: ["literal"]) - if isinstance(start, int) - else combine_evaluate_output_names(start) - ) return self._expr._from_callable( func=func, depth=0, function_name="int_range", - evaluate_output_names=evaluate_output_names, + evaluate_output_names=combine_evaluate_output_names(start), alias_output_names=None, context=self, ) diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index c7eda47587..03cc8968b3 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -216,16 +216,14 @@ def int_range_eager( def int_range( self, - start: int | PolarsExpr, - end: int | PolarsExpr, + start: PolarsExpr, + end: PolarsExpr, step: int = 1, *, dtype: IntegerDType = Int64, ) -> PolarsExpr: - start_ = start if isinstance(start, int) else start.native - end_ = end if isinstance(end, int) else end.native pl_dtype = narwhals_to_native_dtype(dtype, self._version) - native = pl.int_range(start_, end_, step, dtype=pl_dtype) + native = pl.int_range(start.native, end.native, step, dtype=pl_dtype) return self._expr(native, self._version) # NOTE: Implementation is too different to annotate correctly (vs other `*SelectorNamespace`) From f3387c05833abdee03ef94695374b331b63f7b2c Mon Sep 17 00:00:00 2001 From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> Date: Fri, 1 Aug 2025 08:52:20 +0200 Subject: [PATCH 37/45] Update narwhals/functions.py Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- narwhals/functions.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index e93f28b0a6..25470e9dd1 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1895,14 +1895,10 @@ def _int_range_impl( msg = "`end` must contain exactly one value, got expression returning multiple values" raise ComputeError(msg) + args = start, end, step return Expr( lambda plx: apply_n_ary_operation( - plx, - lambda *args: plx.int_range(*args, dtype=dtype), - start, - end, - step, - str_as_lit=False, + plx, partial(plx.int_range, dtype=dtype), *args, str_as_lit=False ), ExprMetadata.selector_single(), ) From c8af1498e4e7586fbf03e33674614598ec8d1cbb Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 1 Aug 2025 13:34:31 +0000 Subject: [PATCH 38/45] chore: Note remaining `np.arange` usage Can tackle in a follow-up --- narwhals/_arrow/dataframe.py | 2 ++ narwhals/_arrow/series.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 166781ef86..98cf931cf2 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -695,6 +695,8 @@ def gather_every(self, n: int, offset: int) -> Self: def to_arrow(self) -> pa.Table: return self.native + # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range` + # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688 def sample( self, n: int | None, diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 1d39af0d06..a865256be5 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -634,6 +634,8 @@ def zip_with(self, mask: Self, other: Self) -> Self: cond = mask.native.combine_chunks() return self._with_native(pc.if_else(cond, self.native, other.native)) + # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range` + # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688 def sample( self, n: int | None, @@ -784,6 +786,8 @@ def sort(self, *, descending: bool, nulls_last: bool) -> Self: ) return self._with_native(self.native.take(sorted_indices)) + # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range` + # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688 def to_dummies(self, *, separator: str, drop_first: bool) -> ArrowDataFrame: import numpy as np # ignore-banned-import @@ -1149,6 +1153,8 @@ def _calculate_bins(self, bin_count: int) -> _1DArray: upper += 0.5 return self._linear_space(lower, upper, bin_count + 1) + # TODO @dangotbanned: Replace `np.arange` w/ `utils.int_range` + # https://github.com/narwhals-dev/narwhals/issues/2722#issuecomment-3097350688 def _calculate_hist(self, bins: list[float] | _1DArray) -> ArrowHistData: ser = self.native # NOTE: `mypy` refuses to resolve `ndarray.__getitem__` From ea155ca0a23fc908b42f5de151c8db0efb897623 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 2 Aug 2025 00:22:55 +0200 Subject: [PATCH 39/45] tag as unstable --- narwhals/_arrow/dataframe.py | 3 +-- narwhals/functions.py | 6 ++++++ narwhals/stable/v1/__init__.py | 4 ++++ narwhals/stable/v2/__init__.py | 4 ++++ 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 98cf931cf2..13109b3471 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -674,9 +674,8 @@ def unique( agg_func = ArrowGroupBy._REMAP_UNIQUE[keep] col_token = generate_temporary_column_name(n_bytes=8, columns=self.columns) - col_value = int_range(0, len(self)) keep_idx_native = ( - self.native.append_column(col_token, col_value) + self.native.append_column(col_token, int_range(0, len(self))) .group_by(subset) .aggregate([(col_token, agg_func)]) .column(f"{col_token}_{agg_func}") diff --git a/narwhals/functions.py b/narwhals/functions.py index 25470e9dd1..4424ea6ff2 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -23,6 +23,7 @@ is_eager_allowed, is_sequence_but_not_str, supports_arrow_c_stream, + unstable, validate_laziness, ) from narwhals.dependencies import ( @@ -1801,6 +1802,7 @@ def int_range( ) -> Series[Any]: ... +@unstable def int_range( start: int | Expr, end: int | Expr | None = None, @@ -1811,6 +1813,10 @@ def int_range( ) -> Expr | Series[Any]: """Generate a range of integers. + Warning: + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. + Arguments: start: Start of the range (inclusive). Defaults to 0. end: End of the range (exclusive). If set to `None` (default), diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 9f8dacabef..98bee79365 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1351,6 +1351,10 @@ def int_range( ) -> Expr | Series[Any]: """Generate a range of integers. + Warning: + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. + Arguments: start: Start of the range (inclusive). Defaults to 0. end: End of the range (exclusive). If set to `None` (default), diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index 6514abbb41..d2757aae73 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1204,6 +1204,10 @@ def int_range( ) -> Expr | Series[Any]: """Generate a range of integers. + Warning: + This functionality is considered **unstable**. It may be changed at any point + without it being considered a breaking change. + Arguments: start: Start of the range (inclusive). Defaults to 0. end: End of the range (exclusive). If set to `None` (default), From c430d5b5f3b880e5aacdade88c08b4501796d7ed Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Aug 2025 18:55:46 +0000 Subject: [PATCH 40/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- narwhals/_pandas_like/namespace.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 62ecd1207e..8d27ce7059 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -72,8 +72,7 @@ def _array_funcs(self): # type: ignore[no-untyped-def] # noqa: ANN202 import numpy as np return np - else: - return import_array_module(self._implementation) + return import_array_module(self._implementation) def __init__(self, implementation: Implementation, version: Version) -> None: self._implementation = implementation From 0c6495fbccc9f546aa1451cbc6ad6cc4707bb2e9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:18:33 +0000 Subject: [PATCH 41/45] refactor(typing): Use `IntoBackend[EagerAllowed]` https://github.com/narwhals-dev/narwhals/pull/2895#discussion_r2284834069 --- narwhals/functions.py | 6 +++--- narwhals/stable/v1/__init__.py | 4 ++-- narwhals/stable/v2/__init__.py | 4 ++-- tests/expr_and_series/int_range_test.py | 28 ++++++++++--------------- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index b81297b8cb..22aa430d72 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1822,7 +1822,7 @@ def int_range( step: int = ..., *, dtype: IntegerDType = ..., - eager: ModuleType | Implementation | str, + eager: IntoBackend[EagerAllowed], ) -> Series[Any]: ... @@ -1833,7 +1833,7 @@ def int_range( step: int = 1, *, dtype: IntegerDType = Int64, - eager: ModuleType | Implementation | str | Literal[False] = False, + eager: IntoBackend[EagerAllowed] | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. @@ -1901,7 +1901,7 @@ def _int_range_impl( step: int, *, dtype: IntegerDType, - eager: ModuleType | Implementation | str | Literal[False], + eager: IntoBackend[EagerAllowed] | Literal[False], ) -> Expr | Series[Any]: from narwhals.exceptions import ComputeError diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 6e6e6abde6..2d7cf26802 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1356,7 +1356,7 @@ def int_range( step: int = ..., *, dtype: IntegerDType = ..., - eager: ModuleType | Implementation | str, + eager: IntoBackend[EagerAllowed], ) -> Series[Any]: ... @@ -1366,7 +1366,7 @@ def int_range( step: int = 1, *, dtype: IntegerDType = Int64, - eager: ModuleType | Implementation | str | Literal[False] = False, + eager: IntoBackend[EagerAllowed] | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index ddfba2ecce..dfff0bb234 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1215,7 +1215,7 @@ def int_range( step: int = ..., *, dtype: IntegerDType = ..., - eager: ModuleType | Implementation | str, + eager: IntoBackend[EagerAllowed], ) -> Series[Any]: ... @@ -1225,7 +1225,7 @@ def int_range( step: int = 1, *, dtype: IntegerDType = Int64, - eager: ModuleType | Implementation | str | Literal[False] = False, + eager: IntoBackend[EagerAllowed] | Literal[False] = False, ) -> Expr | Series[Any]: """Generate a range of integers. diff --git a/tests/expr_and_series/int_range_test.py b/tests/expr_and_series/int_range_test.py index b360502490..7b6b6b6637 100644 --- a/tests/expr_and_series/int_range_test.py +++ b/tests/expr_and_series/int_range_test.py @@ -7,15 +7,13 @@ import narwhals as nw from narwhals import Implementation from narwhals.exceptions import ComputeError, InvalidOperationError -from tests.utils import Constructor, ConstructorEager, assert_equal_data +from tests.utils import Constructor, assert_equal_data if TYPE_CHECKING: from narwhals.dtypes import DType, IntegerType + from narwhals.typing import EagerAllowed -EAGER_BACKENDS = (Implementation.PANDAS, Implementation.PYARROW, Implementation.POLARS) - -@pytest.mark.parametrize("impl", EAGER_BACKENDS) @pytest.mark.parametrize( ("start", "end", "step", "dtype"), [ @@ -32,10 +30,10 @@ def test_int_range_eager( end: int | None, step: int, dtype: type[IntegerType] | IntegerType, - impl: nw.Implementation, + eager_implementation: EagerAllowed, ) -> None: - pytest.importorskip(impl.value) - series = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=impl) + pytest.importorskip(str(eager_implementation)) + series = nw.int_range(start, end, step, dtype=dtype, eager=eager_implementation) assert series.dtype == dtype if end is None: @@ -44,14 +42,10 @@ def test_int_range_eager( assert_equal_data({"a": series}, {"a": list(range(start, end, step))}) -def test_int_range_eager_expr_raises(constructor_eager: ConstructorEager) -> None: - data = {"a": [0, 2, 3, 6, 5, 1]} - df = nw.from_native(constructor_eager(data)) - impl = df.implementation - +def test_int_range_eager_expr_raises(eager_implementation: EagerAllowed) -> None: msg = "Expected `start` and `end` to be integer values" with pytest.raises(InvalidOperationError, match=msg): - nw.int_range(nw.col("a").min(), nw.col("a").max() * 2, eager=impl) + nw.int_range(nw.col("a").min(), nw.col("a").max() * 2, eager=eager_implementation) @pytest.mark.parametrize( @@ -78,8 +72,8 @@ def test_int_range_lazy( request.applymarker(pytest.mark.xfail(reason=reason)) data = {"a": ["foo", "bar", "baz"]} - int_range = nw.int_range(start=start, end=end, step=step, dtype=dtype, eager=False) - result = nw.from_native(constructor(data)).select(int_range) + frame = nw.from_native(constructor(data)) + result = frame.select(nw.int_range(start, end, step, dtype=dtype)) output_name = "len" if isinstance(start, nw.Expr) and end is not None else "literal" assert_equal_data(result, {output_name: expected}) @@ -92,7 +86,7 @@ def test_int_range_lazy( def test_int_range_non_int_dtype(dtype: DType) -> None: msg = f"non-integer `dtype` passed to `int_range`: {dtype}" with pytest.raises(ComputeError, match=msg): - nw.int_range(start=0, end=3, dtype=dtype) # type: ignore[call-overload] # pyright: ignore[reportArgumentType] + nw.int_range(start=0, end=3, dtype=dtype) # type: ignore[call-overload] @pytest.mark.parametrize( @@ -111,4 +105,4 @@ def test_int_range_multi_named(start: int | nw.Expr, end: int | nw.Expr | None) def test_int_range_eager_set_to_lazy_backend() -> None: with pytest.raises(ValueError, match="Cannot create a Series from a lazy backend"): - nw.int_range(123, eager=Implementation.DUCKDB) + nw.int_range(123, eager=Implementation.DUCKDB) # type: ignore[call-overload] From 45cf54a412cca2ea9cca6106b8fff925e7390d43 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:21:48 +0000 Subject: [PATCH 42/45] docs: Remove Returns sections #2982 removed from everywhere else --- narwhals/functions.py | 3 --- narwhals/stable/v1/__init__.py | 3 --- narwhals/stable/v2/__init__.py | 3 --- 3 files changed, 9 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 22aa430d72..0cf9799662 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1851,9 +1851,6 @@ def int_range( If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then a `Series` is returned. - Returns: - Expr or Series of integer data type `dtype`. - Examples: >>> import narwhals as nw >>> nw.int_range(0, 5, step=2, eager="pandas") diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 2d7cf26802..6503b4adc2 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1383,9 +1383,6 @@ def int_range( eager: If set to `False` (default), then an expression is returned. If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then a `Series` is returned. - - Returns: - Expr or Series of integer data type `dtype`. """ return _stableify( _int_range_impl(start=start, end=end, step=step, dtype=dtype, eager=eager) diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index dfff0bb234..a4f8d77de2 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -1242,9 +1242,6 @@ def int_range( eager: If set to `False` (default), then an expression is returned. If set to an (eager) implementation ("pandas", "polars" or "pyarrow"), then a `Series` is returned. - - Returns: - Expr or Series of integer data type `dtype`. """ return _stableify( _int_range_impl(start=start, end=end, step=step, dtype=dtype, eager=eager) From fb6c328a880286a2f66a02c3c2f7449c90a6f837 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 5 Sep 2025 17:02:47 +0000 Subject: [PATCH 43/45] fix: Update for (#3045) --- narwhals/_arrow/dataframe.py | 2 +- narwhals/_arrow/series.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/narwhals/_arrow/dataframe.py b/narwhals/_arrow/dataframe.py index 1ccdb735ae..dcae968c76 100644 --- a/narwhals/_arrow/dataframe.py +++ b/narwhals/_arrow/dataframe.py @@ -489,7 +489,7 @@ def to_dict( def with_row_index(self, name: str, order_by: Sequence[str] | None) -> Self: plx = self.__narwhals_namespace__() if order_by is None: - row_index = plx.int_range_eager(0, len(self))._to_expr() + row_index = plx._expr._from_series(plx.int_range_eager(0, len(self))) else: rank = plx.col(order_by[0]).rank("ordinal", descending=False) row_index = rank.over(partition_by=[], order_by=order_by) - 1 diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index f39ee1481b..7034719001 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -45,7 +45,6 @@ from typing_extensions import Self, TypeAlias, TypeIs from narwhals._arrow.dataframe import ArrowDataFrame - from narwhals._arrow.expr import ArrowExpr from narwhals._arrow.namespace import ArrowNamespace from narwhals._arrow.typing import ( # type: ignore[attr-defined] ArrayAny, @@ -175,9 +174,6 @@ def _from_scalar(self, value: Any) -> Self: value = value.as_py() return super()._from_scalar(value) - def _to_expr(self) -> ArrowExpr: - return cast("ArrowExpr", super()._to_expr()) - @staticmethod def _is_native(obj: ChunkedArrayAny | Any) -> TypeIs[ChunkedArrayAny]: return isinstance(obj, pa.ChunkedArray) From 35ece713a504554cd7d11537d178dc86e0eaa439 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 5 Sep 2025 17:16:13 +0000 Subject: [PATCH 44/45] fix: Don't treat `step` as an `Expr` > TypeError: argument 'step': 'PolarsExpr' object cannot be interpreted as an integer https://github.com/narwhals-dev/narwhals/actions/runs/17499840563/job/49709898155?pr=2895 --- narwhals/functions.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/narwhals/functions.py b/narwhals/functions.py index 02f6c036b5..f89e655efc 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1893,10 +1893,13 @@ def _int_range_impl( msg = "`end` must contain exactly one value, got expression returning multiple values" raise ComputeError(msg) - args = start, end, step + args = start, end return Expr( lambda plx: apply_n_ary_operation( - plx, partial(plx.int_range, dtype=dtype), *args, str_as_lit=False + plx, + partial(plx.int_range, step=step, dtype=dtype), + *args, + str_as_lit=False, ), ExprMetadata.selector_single(), ) From 0de173e20102203ffcc5b9abb29e1102b3ed2233 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 13 Sep 2025 09:52:15 +0000 Subject: [PATCH 45/45] chore(typing): fix incompatible override related https://github.com/narwhals-dev/narwhals/pull/3096#discussion_r2328717976 --- narwhals/_compliant/namespace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_compliant/namespace.py b/narwhals/_compliant/namespace.py index dc857038de..75585d76d7 100644 --- a/narwhals/_compliant/namespace.py +++ b/narwhals/_compliant/namespace.py @@ -167,7 +167,7 @@ def from_native(self, data: NativeFrameT_co | Any, /) -> CompliantLazyFrameT: msg = f"Unsupported type: {type(data).__name__!r}" # pragma: no cover raise TypeError(msg) - int_range: not_implemented = not_implemented() + int_range = not_implemented() # type: ignore[misc] class EagerNamespace(