From 02a746e00004b064f312cbfa6190858e67c74510 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 00:26:55 +0200 Subject: [PATCH 01/20] WIP: Pyarrow series --- narwhals/_arrow/series.py | 20 ++++++++++++ narwhals/_compliant/series.py | 9 ++++++ narwhals/series.py | 60 +++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 7236ef0ce6..d4f845b884 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1069,6 +1069,26 @@ def exp(self) -> Self: def sqrt(self) -> Self: return self._with_native(pc.sqrt(self.native)) + def is_close( + self, + other: Self | NonNestedLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + ser, other_ = extract_native(self, other) + left = pc.abs(pc.subtract(ser, other_)) + _max = pc.max_element_wise(pc.abs(ser), pc.abs(other_)) + right = pc.max_element_wise(pc.multiply(rel_tol, _max), abs_tol) + result = left <= right + + if nans_equal: + left_is_nan, right_is_nan = pc.is_nan(ser), pc.is_nan(other_) + result = pc.or_kleene(result, pc.and_kleene(left_is_nan, right_is_nan)) + + return self._with_native(result) + @property def dt(self) -> ArrowSeriesDateTimeNamespace: return ArrowSeriesDateTimeNamespace(self) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index dcb376ed06..a6acf80aa9 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -284,6 +284,15 @@ def hist_from_bin_count( """`Series.hist(bins=None, bin_count=...)`.""" ... + def is_close( + self, + other: Self | NonNestedLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: ... + @property def str(self) -> StringNamespace[Self]: ... @property diff --git a/narwhals/series.py b/narwhals/series.py index 34bcb2152a..d3915db5ca 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2763,6 +2763,66 @@ def sqrt(self) -> Self: """ return self._with_compliant(self._compliant_series.sqrt()) + def is_close( + self, + other: Self | NonNestedLiteral, + *, + abs_tol: float = 0.0, + rel_tol: float = 1e-09, + nans_equal: bool = False, + ) -> Self: + r"""Get a boolean mask of the values being close to the other values. + + Two values `a` and `b` are considered close if the following condition holds: + + $$|a-b| \le max \{ \text{rel_tol} \cdot max \{ |a|, |b| \}, \text{abs_tol} \}$$ + + Arguments: + other: Values to compare with. + abs_tol: Absolute tolerance. This is the maximum allowed absolute difference + between two values. Must be non-negative. + rel_tol: Relative tolerance. This is the maximum allowed difference between + two values, relative to the larger absolute value. Must be in the range [0, 1). + nans_equal: Whether NaN values should be considered equal. + + Returns: + Series of Boolean data type. + + Notes: + The implementation of this method is symmetric and mirrors the behavior of + `math.isclose`. + + Examples: # TODO + >>> s = pl.Series("s", [1.0, 1.2, 1.4, 1.45, 1.6]) + >>> s.is_close(1.4, abs_tol=0.1) + shape: (5,) + Series: 's' [bool] + [ + false + false + true + true + false + ] + """ + if not self.dtype.is_numeric(): + from narwhals.exceptions import InvalidOperationError + + msg = ( + f"is_close operation not supported for dtype `{self.dtype}`\n\n" + "Hint: `is_close` is only supported for numeric types" + ) + raise InvalidOperationError(msg) + + return self._with_compliant( + self._compliant_series.is_close( + self._extract_native(other), + abs_tol=abs_tol, + rel_tol=rel_tol, + nans_equal=nans_equal, + ) + ) + @property def str(self) -> SeriesStringNamespace[Self]: return SeriesStringNamespace(self) From 69907eb879306a8976c58da85f120dbec51884ed Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 10:58:15 +0200 Subject: [PATCH 02/20] pandas --- narwhals/_arrow/series.py | 8 ++++---- narwhals/_compliant/expr.py | 25 +++++++++++++++++++++++++ narwhals/_compliant/series.py | 2 +- narwhals/_pandas_like/series.py | 24 ++++++++++++++++++++++++ narwhals/series.py | 2 +- 5 files changed, 55 insertions(+), 6 deletions(-) diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index d4f845b884..39b2d013f9 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1071,7 +1071,7 @@ def sqrt(self) -> Self: def is_close( self, - other: Self | NonNestedLiteral, + other: Self | NumericLiteral, *, abs_tol: float, rel_tol: float, @@ -1081,11 +1081,11 @@ def is_close( left = pc.abs(pc.subtract(ser, other_)) _max = pc.max_element_wise(pc.abs(ser), pc.abs(other_)) right = pc.max_element_wise(pc.multiply(rel_tol, _max), abs_tol) - result = left <= right + result = pc.less_equal(left, right) if nans_equal: - left_is_nan, right_is_nan = pc.is_nan(ser), pc.is_nan(other_) - result = pc.or_kleene(result, pc.and_kleene(left_is_nan, right_is_nan)) + self_is_nan, other_is_nan = pc.is_nan(ser), pc.is_nan(other_) + result = pc.or_kleene(result, pc.and_kleene(self_is_nan, other_is_nan)) return self._with_native(result) diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 12ef9180f5..55d6fc89df 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -194,6 +194,15 @@ def rolling_std( self, window_size: int, *, min_samples: int, center: bool, ddof: int ) -> Self: ... + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: ... + def __and__(self, other: Any) -> Self: ... def __or__(self, other: Any) -> Self: ... def __add__(self, other: Any) -> Self: ... @@ -892,6 +901,22 @@ def exp(self) -> Self: def sqrt(self) -> Self: return self._reuse_series("sqrt") + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + return self._reuse_series( + "is_close", + other=other, + abs_tol=abs_tol, + rel_tol=rel_tol, + nans_equal=nans_equal, + ) + @property def cat(self) -> EagerExprCatNamespace[Self]: return EagerExprCatNamespace(self) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index a6acf80aa9..9109c9ce3d 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -286,7 +286,7 @@ def hist_from_bin_count( def is_close( self, - other: Self | NonNestedLiteral, + other: Self | NumericLiteral, *, abs_tol: float, rel_tol: float, diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index 873680ce5b..b55c8d2aa7 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -1028,6 +1028,30 @@ def exp(self) -> Self: def sqrt(self) -> Self: return self._with_native(self.native.pow(0.5)) + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + left = (self - other).abs() + + other_is_series = isinstance(other, PandasLikeSeries) + other_abs = other.abs() if other_is_series else abs(other) + _max = self.abs().clip(lower_bound=other_abs, upper_bound=None) + right = (_max * rel_tol).clip(lower_bound=abs_tol, upper_bound=None) + result = left <= right + + if nans_equal: + import math + + self_is_nan = self.is_nan() + other_is_nan = other.is_nan() if other_is_series else math.isnan(other) + result = result | (self_is_nan & other_is_nan) + return result + @property def str(self) -> PandasLikeSeriesStringNamespace: return PandasLikeSeriesStringNamespace(self) diff --git a/narwhals/series.py b/narwhals/series.py index d3915db5ca..614e41f66f 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2765,7 +2765,7 @@ def sqrt(self) -> Self: def is_close( self, - other: Self | NonNestedLiteral, + other: Self | NumericLiteral, *, abs_tol: float = 0.0, rel_tol: float = 1e-09, From 7be5176440cd04f59ac4b5c285a0d42f661885c9 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 12:23:01 +0200 Subject: [PATCH 03/20] fix pyarrow case --- narwhals/_arrow/series.py | 32 +++++++++++++++++++----- narwhals/series.py | 12 ++++++--- tests/expr_and_series/is_close_test.py | 34 ++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 9 deletions(-) create mode 100644 tests/expr_and_series/is_close_test.py diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 39b2d013f9..4ecd6bbcb3 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1078,14 +1078,34 @@ def is_close( nans_equal: bool, ) -> Self: ser, other_ = extract_native(self, other) - left = pc.abs(pc.subtract(ser, other_)) - _max = pc.max_element_wise(pc.abs(ser), pc.abs(other_)) - right = pc.max_element_wise(pc.multiply(rel_tol, _max), abs_tol) - result = pc.less_equal(left, right) + abs_diff = pc.abs(pc.subtract(ser, other_)) + rel_threshold = pc.multiply( + lit(rel_tol), pc.max_element_wise(pc.abs(ser), pc.abs(other_)) + ) + tolerance = pc.max_element_wise(rel_threshold, lit(abs_tol)) + ser_is_inf, other_is_inf = pc.is_inf(ser), pc.is_inf(other_) + + # Values are close if abs_diff <= tolerance, and both finite + is_close = pc.and_kleene( + pc.less_equal(abs_diff, tolerance), + pc.and_kleene(pc.invert(ser_is_inf), pc.invert(other_is_inf)), + ) + # Handle infinity cases: infinities are "close" only if they have the same sign + ser_sign, other_sign = pc.sign(ser), pc.sign(other_) + both_inf = pc.and_kleene(ser_is_inf, other_is_inf) + is_same_inf = pc.and_kleene(both_inf, pc.equal(ser_sign, other_sign)) + result = pc.or_kleene(is_close, is_same_inf) + + # Handle nan cases: + # * nans_equals = True => if both values are NaN, then True + # * nans_equals = False => if any value is NaN, then False if nans_equal: - self_is_nan, other_is_nan = pc.is_nan(ser), pc.is_nan(other_) - result = pc.or_kleene(result, pc.and_kleene(self_is_nan, other_is_nan)) + both_nan = pc.and_kleene(pc.is_nan(ser), pc.is_nan(other_)) + result = pc.or_kleene(result, both_nan) + else: + either_nan = pc.or_kleene(pc.is_nan(ser), pc.is_nan(other_)) + result = pc.and_kleene(result, pc.invert(either_nan)) return self._with_native(result) diff --git a/narwhals/series.py b/narwhals/series.py index 614e41f66f..77ee996da8 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -17,7 +17,7 @@ ) from narwhals.dependencies import is_numpy_array_1d, is_numpy_scalar from narwhals.dtypes import _validate_dtype, _validate_into_dtype -from narwhals.exceptions import ComputeError +from narwhals.exceptions import ComputeError, InvalidOperationError from narwhals.series_cat import SeriesCatNamespace from narwhals.series_dt import SeriesDateTimeNamespace from narwhals.series_list import SeriesListNamespace @@ -2806,14 +2806,20 @@ def is_close( ] """ if not self.dtype.is_numeric(): - from narwhals.exceptions import InvalidOperationError - msg = ( f"is_close operation not supported for dtype `{self.dtype}`\n\n" "Hint: `is_close` is only supported for numeric types" ) raise InvalidOperationError(msg) + if abs_tol < 0: + msg = f"`abs_tol` must be non-negative but got {abs_tol}" + raise ComputeError(msg) + + if not (0 <= rel_tol < 1): + msg = f"`rel_tol` must be in the range [0, 1) but got {rel_tol}" + raise ComputeError(msg) + return self._with_compliant( self._compliant_series.is_close( self._extract_native(other), diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py new file mode 100644 index 0000000000..41f1b9cff6 --- /dev/null +++ b/tests/expr_and_series/is_close_test.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import pytest + +import narwhals as nw +from tests.utils import ConstructorEager, assert_equal_data + +data = { + "left": [1.0, None, float("nan"), float("inf"), float("-inf")], + "right": [1.005, None, float("nan"), float("inf"), 3.0], +} + + +@pytest.mark.parametrize( + ("abs_tol", "rel_tol", "nans_equal", "expected"), + [ + (0.1, 0.0, False, [True, None, False, True, False]), + (0.0001, 0.0, True, [False, None, True, True, False]), + (0.0, 0.1, False, [True, None, False, True, False]), + (0.0, 0.001, True, [False, None, True, True, False]), + ], +) +def test_is_close_series_with_series( + constructor_eager: ConstructorEager, + abs_tol: float, + rel_tol: float, + *, + nans_equal: bool, + expected: list[float], +) -> None: + df = nw.from_native(constructor_eager(data), eager_only=True) + left, right = df["left"], df["right"] + result = left.is_close(right, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) + assert_equal_data({"x": result}, {"x": expected}) From 1df7bf22540aba231f59c12b5cf0179c4eb6636f Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 13:00:00 +0200 Subject: [PATCH 04/20] polars series and expr --- narwhals/_polars/expr.py | 50 +++++++++++++++++++++++++++++++++++++- narwhals/_polars/series.py | 44 +++++++++++++++++++++++++++++---- 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 3854810ffa..4eba234234 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -27,7 +27,7 @@ from narwhals._polars.dataframe import Method, PolarsDataFrame from narwhals._polars.namespace import PolarsNamespace from narwhals._utils import Version, _LimitedContext - from narwhals.typing import IntoDType + from narwhals.typing import IntoDType, NumericLiteral class PolarsExpr: @@ -232,6 +232,54 @@ def __narwhals_namespace__(self) -> PolarsNamespace: # pragma: no cover return PolarsNamespace(version=self._version) + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + native_expr = self.native + other_expr = ( + extract_native(other) if isinstance(other, PolarsExpr) else pl.lit(other) + ) + + if self._backend_version < (1, 32, 0): + abs_diff = (native_expr - other_expr).abs() + rel_threshold = native_expr.abs().clip(lower_bound=other_expr.abs()) * rel_tol + tolerance = rel_threshold.clip(lower_bound=pl.lit(abs_tol)) + + self_is_inf, other_is_inf = ( + native_expr.is_infinite(), + other_expr.is_infinite(), + ) + + # Values are close if abs_diff <= tolerance, and both finite + is_close = (abs_diff <= tolerance) & self_is_inf.not_() & other_is_inf.not_() + + # Handle infinity cases: infinities are "close" only if they have the same sign + self_sign, other_sign = native_expr.sign(), other_expr.sign() + both_inf = self_is_inf & other_is_inf + is_same_inf = both_inf & (self_sign == other_sign) + result = is_close | is_same_inf + + # Handle nan cases: + # * nans_equals = True => if both values are NaN, then True + # * nans_equals = False => if any value is NaN, then False + if nans_equal: + both_nan = native_expr.is_nan() & other_expr.is_nan() + result = result | both_nan + else: + either_nan = native_expr.is_nan() | other_expr.is_nan() + result = result & either_nan.not_() + + else: + result = native_expr.is_close( + other=other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + ) + return self._with_native(result) + @property def dt(self) -> PolarsExprDateTimeNamespace: return PolarsExprDateTimeNamespace(self) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index a57f859aeb..cc9597ee87 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -36,7 +36,13 @@ from narwhals._utils import Version, _LimitedContext from narwhals.dtypes import DType from narwhals.series import Series - from narwhals.typing import Into1DArray, IntoDType, MultiIndexSelector, _1DArray + from narwhals.typing import ( + Into1DArray, + IntoDType, + MultiIndexSelector, + NumericLiteral, + _1DArray, + ) T = TypeVar("T") IncludeBreakpoint: TypeAlias = Literal[False, True] @@ -84,6 +90,7 @@ "gather_every", "head", "is_between", + "is_close", "is_finite", "is_first_distinct", "is_in", @@ -125,6 +132,11 @@ class PolarsSeries: _implementation = Implementation.POLARS + _HIST_EMPTY_SCHEMA: ClassVar[Mapping[IncludeBreakpoint, Sequence[str]]] = { + True: ["breakpoint", "count"], + False: ["count"], + } + def __init__(self, series: pl.Series, *, version: Version) -> None: self._native_series: pl.Series = series self._version = version @@ -472,10 +484,32 @@ def __contains__(self, other: Any) -> bool: except Exception as e: # noqa: BLE001 raise catch_polars_exception(e) from None - _HIST_EMPTY_SCHEMA: ClassVar[Mapping[IncludeBreakpoint, Sequence[str]]] = { - True: ["breakpoint", "count"], - False: ["count"], - } + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + kwargs = { + "other": extract_native(other), + "abs_tol": abs_tol, + "rel_tol": rel_tol, + "nans_equal": nans_equal, + } + if self._backend_version < (1, 32, 0): + name = self.name + ns = self.__narwhals_namespace__() + result = ( + self.to_frame() + .select(ns.col(name).is_close(**kwargs)) + .get_column(name) + .native + ) + else: + result = self.native.is_close(**kwargs) + return self._with_native(result) def hist_from_bins( self, bins: list[float], *, include_breakpoint: bool From 0b4de268f2939303ff7ee33247c3797480255b50 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 16:33:00 +0200 Subject: [PATCH 05/20] move implementation to compliant level --- docs/api-reference/expr.md | 1 + docs/api-reference/series.md | 1 + narwhals/_arrow/series.py | 40 -------------- narwhals/_compliant/series.py | 53 ++++++++++++++++++ narwhals/_dask/expr.py | 1 + narwhals/_pandas_like/series.py | 24 --------- narwhals/_polars/expr.py | 3 +- narwhals/_sql/expr.py | 1 + narwhals/expr.py | 61 ++++++++++++++++++++- narwhals/series.py | 38 ++++++++----- pyproject.toml | 1 + tests/expr_and_series/is_close_test.py | 75 +++++++++++++++++++++++--- 12 files changed, 213 insertions(+), 86 deletions(-) diff --git a/docs/api-reference/expr.md b/docs/api-reference/expr.md index dbd613d134..7e182ac000 100644 --- a/docs/api-reference/expr.md +++ b/docs/api-reference/expr.md @@ -23,6 +23,7 @@ - filter - clip - is_between + - is_close - is_duplicated - is_finite - is_first_distinct diff --git a/docs/api-reference/series.md b/docs/api-reference/series.md index 57c0ab313b..01b7c1ff8b 100644 --- a/docs/api-reference/series.md +++ b/docs/api-reference/series.md @@ -35,6 +35,7 @@ - hist - implementation - is_between + - is_close - is_duplicated - is_empty - is_finite diff --git a/narwhals/_arrow/series.py b/narwhals/_arrow/series.py index 4ecd6bbcb3..7236ef0ce6 100644 --- a/narwhals/_arrow/series.py +++ b/narwhals/_arrow/series.py @@ -1069,46 +1069,6 @@ def exp(self) -> Self: def sqrt(self) -> Self: return self._with_native(pc.sqrt(self.native)) - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: - ser, other_ = extract_native(self, other) - abs_diff = pc.abs(pc.subtract(ser, other_)) - rel_threshold = pc.multiply( - lit(rel_tol), pc.max_element_wise(pc.abs(ser), pc.abs(other_)) - ) - tolerance = pc.max_element_wise(rel_threshold, lit(abs_tol)) - ser_is_inf, other_is_inf = pc.is_inf(ser), pc.is_inf(other_) - - # Values are close if abs_diff <= tolerance, and both finite - is_close = pc.and_kleene( - pc.less_equal(abs_diff, tolerance), - pc.and_kleene(pc.invert(ser_is_inf), pc.invert(other_is_inf)), - ) - - # Handle infinity cases: infinities are "close" only if they have the same sign - ser_sign, other_sign = pc.sign(ser), pc.sign(other_) - both_inf = pc.and_kleene(ser_is_inf, other_is_inf) - is_same_inf = pc.and_kleene(both_inf, pc.equal(ser_sign, other_sign)) - result = pc.or_kleene(is_close, is_same_inf) - - # Handle nan cases: - # * nans_equals = True => if both values are NaN, then True - # * nans_equals = False => if any value is NaN, then False - if nans_equal: - both_nan = pc.and_kleene(pc.is_nan(ser), pc.is_nan(other_)) - result = pc.or_kleene(result, both_nan) - else: - either_nan = pc.or_kleene(pc.is_nan(ser), pc.is_nan(other_)) - result = pc.and_kleene(result, pc.invert(either_nan)) - - return self._with_native(result) - @property def dt(self) -> ArrowSeriesDateTimeNamespace: return ArrowSeriesDateTimeNamespace(self) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 9109c9ce3d..33d7326d1f 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -367,6 +367,59 @@ def __getitem__(self, item: MultiIndexSelector[Self]) -> Self: return self._gather(item) assert_never(item) + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + from decimal import Decimal + + if isinstance(other, (float, int, Decimal)): + from math import isinf, isnan + + other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) + + # Define the other_is_not_inf variable to prevent triggering: + # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be removed in + # > Python 3.16. This returns the bitwise inversion of the underlying int object and is usually + # > not what you expect from negating a bool. Use the 'not' operator for boolean negation or + # > ~int(x) if you really want the bitwise inversion of the underlying int. + other_is_not_inf = not other_is_inf + + else: + other_abs, other_is_nan = other.abs(), other.is_nan() + other_is_not_inf = other.is_finite() | other_is_nan + other_is_inf = ~other_is_not_inf + + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol + tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) + + self_is_nan = self.is_nan() + self_is_inf = ~(self.is_finite() | self_is_nan) + + # Values are close if abs_diff <= tolerance, and both finite + is_close = ((self - other).abs() <= tolerance) & (~self_is_inf) & other_is_not_inf + + # Handle infinity cases: infinities are "close" only if they have the same sign + self_sign, other_sign = self > 0, other > 0 + is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) + result = is_close | is_same_inf + + # Handle nan cases: + # * nans_equals = True => if both values are NaN, then True + # * nans_equals = False => if any value is NaN, then False + if nans_equal: + both_nan = self_is_nan & other_is_nan + result = result | both_nan + else: + either_nan = self_is_nan | other_is_nan + result = result & ~either_nan + + return result + @property def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ... @property diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 5f6952ba1a..ea9b71c0d9 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -675,6 +675,7 @@ def dt(self) -> DaskExprDateTimeNamespace: ewm_mean: not_implemented = not_implemented() gather_every: not_implemented = not_implemented() head: not_implemented = not_implemented() + is_close: not_implemented = not_implemented() map_batches: not_implemented = not_implemented() mode: not_implemented = not_implemented() sample: not_implemented = not_implemented() diff --git a/narwhals/_pandas_like/series.py b/narwhals/_pandas_like/series.py index b55c8d2aa7..873680ce5b 100644 --- a/narwhals/_pandas_like/series.py +++ b/narwhals/_pandas_like/series.py @@ -1028,30 +1028,6 @@ def exp(self) -> Self: def sqrt(self) -> Self: return self._with_native(self.native.pow(0.5)) - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: - left = (self - other).abs() - - other_is_series = isinstance(other, PandasLikeSeries) - other_abs = other.abs() if other_is_series else abs(other) - _max = self.abs().clip(lower_bound=other_abs, upper_bound=None) - right = (_max * rel_tol).clip(lower_bound=abs_tol, upper_bound=None) - result = left <= right - - if nans_equal: - import math - - self_is_nan = self.is_nan() - other_is_nan = other.is_nan() if other_is_series else math.isnan(other) - result = result | (self_is_nan & other_is_nan) - return result - @property def str(self) -> PandasLikeSeriesStringNamespace: return PandasLikeSeriesStringNamespace(self) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 4eba234234..743bfdff75 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -260,8 +260,7 @@ def is_close( # Handle infinity cases: infinities are "close" only if they have the same sign self_sign, other_sign = native_expr.sign(), other_expr.sign() - both_inf = self_is_inf & other_is_inf - is_same_inf = both_inf & (self_sign == other_sign) + is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) result = is_close | is_same_inf # Handle nan cases: diff --git a/narwhals/_sql/expr.py b/narwhals/_sql/expr.py index c925944c89..e8df422041 100644 --- a/narwhals/_sql/expr.py +++ b/narwhals/_sql/expr.py @@ -755,6 +755,7 @@ def str(self) -> SQLExprStringNamespace[Self]: ... ewm_mean: not_implemented = not_implemented() gather_every: not_implemented = not_implemented() head: not_implemented = not_implemented() + is_close: not_implemented = not_implemented() map_batches: not_implemented = not_implemented() mode: not_implemented = not_implemented() replace_strict: not_implemented = not_implemented() diff --git a/narwhals/expr.py b/narwhals/expr.py index b1ca36f974..2828434a5a 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -12,7 +12,7 @@ ) from narwhals._utils import _validate_rolling_arguments, ensure_type, flatten from narwhals.dtypes import _validate_dtype -from narwhals.exceptions import InvalidOperationError +from narwhals.exceptions import ComputeError, InvalidOperationError from narwhals.expr_cat import ExprCatNamespace from narwhals.expr_dt import ExprDateTimeNamespace from narwhals.expr_list import ExprListNamespace @@ -2347,6 +2347,65 @@ def sqrt(self) -> Self: """ return self._with_elementwise(lambda plx: self._to_compliant_expr(plx).sqrt()) + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float = 0.0, + rel_tol: float = 1e-09, + nans_equal: bool = False, + ) -> Self: + r"""Check if this expression is close, i.e. almost equal, to the other expression. + + Two values `a` and `b` are considered close if the following condition holds: + + $$|a-b| \le max \{ \text{rel_tol} \cdot max \{ |a|, |b| \}, \text{abs_tol} \}$$ + + Arguments: + other: Values to compare with. + abs_tol: Absolute tolerance. This is the maximum allowed absolute difference + between two values. Must be non-negative. + rel_tol: Relative tolerance. This is the maximum allowed difference between + two values, relative to the larger absolute value. Must be in the range + [0, 1). + nans_equal: Whether NaN values should be considered equal. + + Returns: + Expression of Boolean data type. + + Notes: + The implementation of this method is symmetric and mirrors the behavior of + `math.isclose`. Specifically note that this behavior is different to + `numpy.isclose`. + + Examples: TODO + """ + if abs_tol < 0: + msg = f"`abs_tol` must be non-negative but got {abs_tol}" + raise ComputeError(msg) + + if not (0 <= rel_tol < 1): + msg = f"`rel_tol` must be in the range [0, 1) but got {rel_tol}" + raise ComputeError(msg) + + kwargs = {"abs_tol": abs_tol, "rel_tol": rel_tol, "nans_equal": nans_equal} + return self.__class__( + lambda plx: apply_n_ary_operation( + plx, + lambda *exprs: exprs[0].is_close(exprs[1], **kwargs), + self, + other, + str_as_lit=False, + ), + combine_metadata( + self, + other, + str_as_lit=False, + allow_multi_output=False, + to_single_output=False, + ), + ) + @property def str(self) -> ExprStringNamespace[Self]: return ExprStringNamespace(self) diff --git a/narwhals/series.py b/narwhals/series.py index 77ee996da8..7fd7f9aa1c 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2782,7 +2782,8 @@ def is_close( abs_tol: Absolute tolerance. This is the maximum allowed absolute difference between two values. Must be non-negative. rel_tol: Relative tolerance. This is the maximum allowed difference between - two values, relative to the larger absolute value. Must be in the range [0, 1). + two values, relative to the larger absolute value. Must be in the range + [0, 1). nans_equal: Whether NaN values should be considered equal. Returns: @@ -2790,20 +2791,31 @@ def is_close( Notes: The implementation of this method is symmetric and mirrors the behavior of - `math.isclose`. + `math.isclose`. Specifically note that this behavior is different to + `numpy.isclose`. - Examples: # TODO - >>> s = pl.Series("s", [1.0, 1.2, 1.4, 1.45, 1.6]) + Examples: + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> data = [1.0, float("inf"), 1.41, None, float("nan")] + >>> s_native = pa.chunked_array([data]) + >>> s = nw.from_native(s_native, series_only=True) >>> s.is_close(1.4, abs_tol=0.1) - shape: (5,) - Series: 's' [bool] - [ - false - false - true - true - false - ] + ┌────────────────────────────────────────────────┐ + | Narwhals Series | + |------------------------------------------------| + || + |[ | + | [ | + | false, | + | false, | + | true, | + | null, | + | false | + | ] | + |] | + └────────────────────────────────────────────────┘ """ if not self.dtype.is_numeric(): msg = ( diff --git a/pyproject.toml b/pyproject.toml index ee06d75a05..bc3984f8ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -245,6 +245,7 @@ filterwarnings = [ "ignore:.*np.find_common_type is deprecated:DeprecationWarning:pandas", # Warning raised when calling PandasLikeNamespace.from_arrow with old pyarrow "ignore:.*is_sparse is deprecated and will be removed in a future version.*:DeprecationWarning:pyarrow", + 'ignore:.*invalid value encountered in cast:RuntimeWarning:pandas', ] xfail_strict = true markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"] diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index 41f1b9cff6..b1ad223792 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -3,21 +3,65 @@ import pytest import narwhals as nw +from narwhals.exceptions import ComputeError, InvalidOperationError +from tests.conftest import modin_constructor, pandas_constructor from tests.utils import ConstructorEager, assert_equal_data +NON_NULLABLE_CONSTRUCTORS = (pandas_constructor, modin_constructor) +NULL_PLACEHOLDER = 999.0 +NAN_PLACEHOLDER = -1.0 +INF = float("inf") + data = { - "left": [1.0, None, float("nan"), float("inf"), float("-inf")], - "right": [1.005, None, float("nan"), float("inf"), 3.0], + "left": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF, -INF, INF], + "right": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF, 3.0, -INF], + "non_numeric": list("number"), } +def test_is_close_series_raise_non_numeric(constructor_eager: ConstructorEager) -> None: + df = nw.from_native(constructor_eager(data), eager_only=True) + left, right = df["non_numeric"], df["right"] + + msg = "is_close operation not supported for dtype" + with pytest.raises(InvalidOperationError, match=msg): + left.is_close(right) + + +def test_is_close_series_raise_negative_abs_tol( + constructor_eager: ConstructorEager, +) -> None: + df = nw.from_native(constructor_eager(data), eager_only=True) + left, right = df["left"], df["right"] + + abs_tol = -2 + msg = rf"`abs_tol` must be non-negative but got {abs_tol}" + with pytest.raises(ComputeError, match=msg): + left.is_close(right, abs_tol=abs_tol) + + with pytest.raises(ComputeError, match=msg): + left.is_close(right, abs_tol=abs_tol, rel_tol=999) + + +@pytest.mark.parametrize("rel_tol", [-0.0001, 1.0, 1.1]) +def test_is_close_series_raise_invalid_rel_tol( + constructor_eager: ConstructorEager, rel_tol: float +) -> None: + df = nw.from_native(constructor_eager(data), eager_only=True) + left, right = df["left"], df["right"] + + msg = rf"`rel_tol` must be in the range \[0, 1\) but got {rel_tol}" + with pytest.raises(ComputeError, match=msg): + left.is_close(right, rel_tol=rel_tol) + + @pytest.mark.parametrize( ("abs_tol", "rel_tol", "nans_equal", "expected"), [ - (0.1, 0.0, False, [True, None, False, True, False]), - (0.0001, 0.0, True, [False, None, True, True, False]), - (0.0, 0.1, False, [True, None, False, True, False]), - (0.0, 0.001, True, [False, None, True, True, False]), + (0.1, 0.0, False, [True, None, False, True, False, False]), + (0.0001, 0.0, True, [False, None, True, True, False, False]), + (0.0, 0.1, False, [True, None, False, True, False, False]), + (0.0, 0.001, True, [False, None, True, True, False, False]), ], ) def test_is_close_series_with_series( @@ -30,5 +74,24 @@ def test_is_close_series_with_series( ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) left, right = df["left"], df["right"] + + nulls = nw.new_series( + name="nulls", + values=[None] * len(left), + dtype=nw.Float64(), + backend=df.implementation, + ) + # Tricks to generate nan's and null's for pandas with nullable backends: + # * Square rooting a negative number will generate a NaN + # * Replacing a value with None once the dtype is nullable will generate 's + left = left.zip_with(~left.is_finite(), left**0.5).zip_with( + left != NULL_PLACEHOLDER, nulls + ) + right = right.zip_with(~right.is_finite(), right**0.5).zip_with( + left != NULL_PLACEHOLDER, nulls + ) result = left.is_close(right, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) + + if constructor_eager in NON_NULLABLE_CONSTRUCTORS: + expected = [v if v is not None else False for v in expected] assert_equal_data({"x": result}, {"x": expected}) From c6ce52cfeb6985cf5a3a34ac9d89235cb16296a7 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 16:42:46 +0200 Subject: [PATCH 06/20] help typing --- narwhals/_compliant/series.py | 18 ++++++++++++------ narwhals/_polars/series.py | 24 ++++++++++++++++-------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 33d7326d1f..6bd01982bc 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -377,16 +377,22 @@ def is_close( ) -> Self: from decimal import Decimal + other_abs: Self | NumericLiteral + other_is_nan: Self | bool + other_is_inf: Self | bool + other_is_not_inf: Self | bool + if isinstance(other, (float, int, Decimal)): from math import isinf, isnan - other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) + other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) # type: ignore[assignment] - # Define the other_is_not_inf variable to prevent triggering: - # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be removed in - # > Python 3.16. This returns the bitwise inversion of the underlying int object and is usually - # > not what you expect from negating a bool. Use the 'not' operator for boolean negation or - # > ~int(x) if you really want the bitwise inversion of the underlying int. + # Define the other_is_not_inf variable to prevent triggering the following warning: + # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be + # > removed in Python 3.16. This returns the bitwise inversion of the + # > underlying int object and is usually not what you expect from negating + # > a bool. Use the 'not' operator for boolean negation or ~int(x) if you + # > really want the bitwise inversion of the underlying int. other_is_not_inf = not other_is_inf else: diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index cc9597ee87..ce8058d303 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -492,23 +492,31 @@ def is_close( rel_tol: float, nans_equal: bool, ) -> Self: - kwargs = { - "other": extract_native(other), - "abs_tol": abs_tol, - "rel_tol": rel_tol, - "nans_equal": nans_equal, - } + other_native = extract_native(other) + if self._backend_version < (1, 32, 0): name = self.name ns = self.__narwhals_namespace__() result = ( self.to_frame() - .select(ns.col(name).is_close(**kwargs)) + .select( + ns.col(name).is_close( + other=other_native, # type: ignore[arg-type] + abs_tol=abs_tol, + rel_tol=rel_tol, + nans_equal=nans_equal, + ) + ) .get_column(name) .native ) else: - result = self.native.is_close(**kwargs) + result = self.native.is_close( + other=other_native, # pyright: ignore[reportArgumentType] + abs_tol=abs_tol, + rel_tol=rel_tol, + nans_equal=nans_equal, + ) return self._with_native(result) def hist_from_bins( From 89c947b3053e597dc606c8841bcf251038115bbf Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 17:10:44 +0200 Subject: [PATCH 07/20] lazy almost there --- narwhals/_compliant/expr.py | 68 ++++++++++++++++++++---- narwhals/_dask/expr.py | 1 - narwhals/_sql/expr.py | 1 - tests/expr_and_series/is_close_test.py | 73 +++++++++++++++++++++++--- 4 files changed, 124 insertions(+), 19 deletions(-) diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 55d6fc89df..af74d6eef8 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -194,15 +194,6 @@ def rolling_std( self, window_size: int, *, min_samples: int, center: bool, ddof: int ) -> Self: ... - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: ... - def __and__(self, other: Any) -> Self: ... def __or__(self, other: Any) -> Self: ... def __add__(self, other: Any) -> Self: ... @@ -240,6 +231,65 @@ def _evaluate_aliases( names = self._evaluate_output_names(frame) return alias(names) if (alias := self._alias_output_names) else names + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + from decimal import Decimal + + other_abs: Self | NumericLiteral + other_is_nan: Self | bool + other_is_inf: Self | bool + other_is_not_inf: Self | bool + + if isinstance(other, (float, int, Decimal)): + from math import isinf, isnan + + other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) # type: ignore[assignment] + + # Define the other_is_not_inf variable to prevent triggering the following warning: + # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be + # > removed in Python 3.16. This returns the bitwise inversion of the + # > underlying int object and is usually not what you expect from negating + # > a bool. Use the 'not' operator for boolean negation or ~int(x) if you + # > really want the bitwise inversion of the underlying int. + other_is_not_inf = not other_is_inf + + else: + other_abs, other_is_nan = other.abs(), other.is_nan() + other_is_not_inf = other.is_finite() | other_is_nan + other_is_inf = ~other_is_not_inf + + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol + tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) + + self_is_nan = self.is_nan() + self_is_inf = ~(self.is_finite() | self_is_nan) + + # Values are close if abs_diff <= tolerance, and both finite + is_close = ((self - other).abs() <= tolerance) & (~self_is_inf) & other_is_not_inf + + # Handle infinity cases: infinities are "close" only if they have the same sign + self_sign, other_sign = self > 0, other > 0 + is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) + result = is_close | is_same_inf + + # Handle nan cases: + # * nans_equals = True => if both values are NaN, then True + # * nans_equals = False => if any value is NaN, then False + if nans_equal: + both_nan = self_is_nan & other_is_nan + result = result | both_nan + else: + either_nan = self_is_nan | other_is_nan + result = result & ~either_nan + + return result + @property def str(self) -> StringNamespace[Self]: ... @property diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index ea9b71c0d9..5f6952ba1a 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -675,7 +675,6 @@ def dt(self) -> DaskExprDateTimeNamespace: ewm_mean: not_implemented = not_implemented() gather_every: not_implemented = not_implemented() head: not_implemented = not_implemented() - is_close: not_implemented = not_implemented() map_batches: not_implemented = not_implemented() mode: not_implemented = not_implemented() sample: not_implemented = not_implemented() diff --git a/narwhals/_sql/expr.py b/narwhals/_sql/expr.py index e8df422041..c925944c89 100644 --- a/narwhals/_sql/expr.py +++ b/narwhals/_sql/expr.py @@ -755,7 +755,6 @@ def str(self) -> SQLExprStringNamespace[Self]: ... ewm_mean: not_implemented = not_implemented() gather_every: not_implemented = not_implemented() head: not_implemented = not_implemented() - is_close: not_implemented = not_implemented() map_batches: not_implemented = not_implemented() mode: not_implemented = not_implemented() replace_strict: not_implemented = not_implemented() diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index b1ad223792..80a24beb3c 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -4,18 +4,28 @@ import narwhals as nw from narwhals.exceptions import ComputeError, InvalidOperationError -from tests.conftest import modin_constructor, pandas_constructor -from tests.utils import ConstructorEager, assert_equal_data +from tests.conftest import ( + dask_lazy_p1_constructor, + dask_lazy_p2_constructor, + modin_constructor, + pandas_constructor, +) +from tests.utils import Constructor, ConstructorEager, assert_equal_data -NON_NULLABLE_CONSTRUCTORS = (pandas_constructor, modin_constructor) -NULL_PLACEHOLDER = 999.0 -NAN_PLACEHOLDER = -1.0 -INF = float("inf") +NON_NULLABLE_CONSTRUCTORS = ( + pandas_constructor, + dask_lazy_p1_constructor, + dask_lazy_p2_constructor, + modin_constructor, +) +NULL_PLACEHOLDER, NAN_PLACEHOLDER = 9999.0, -1.0 +INF_POS, INF_NEG = float("inf"), float("-inf") data = { - "left": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF, -INF, INF], - "right": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF, 3.0, -INF], + "left": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, INF_NEG, INF_POS], + "right": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, 3.0, INF_NEG], "non_numeric": list("number"), + "idx": list(range(6)), } @@ -95,3 +105,50 @@ def test_is_close_series_with_series( if constructor_eager in NON_NULLABLE_CONSTRUCTORS: expected = [v if v is not None else False for v in expected] assert_equal_data({"x": result}, {"x": expected}) + + +def test_is_close_series_with_scalar() -> None: ... # TODO + + +@pytest.mark.parametrize( + ("abs_tol", "rel_tol", "nans_equal", "expected"), + [ + (0.1, 0.0, False, [True, None, False, True, False, False]), + (0.0001, 0.0, True, [False, None, True, True, False, False]), + (0.0, 0.1, False, [True, None, False, True, False, False]), + (0.0, 0.001, True, [False, None, True, True, False, False]), + ], +) +def test_is_close_expr_with_expr( + constructor: Constructor, + abs_tol: float, + rel_tol: float, + *, + nans_equal: bool, + expected: list[float], +) -> None: + _left, _right = nw.col("left"), nw.col("right") + result = ( + nw.from_native(constructor(data)) + # Tricks to generate nan's and null's for pandas with nullable backends: + # * Square rooting a negative number will generate a NaN + # * Replacing a value with None once the dtype is nullable will generate 's + .with_columns( + left=nw.when(_left != NULL_PLACEHOLDER).then(_left).otherwise(None), + right=nw.when(_right != NULL_PLACEHOLDER).then(_right).otherwise(None), + ) + .with_columns( + left=nw.when(~_left.is_finite()).then(_left).otherwise(_left**0.5), + right=nw.when(~_right.is_finite()).then(_right).otherwise(_right**0.5), + ) + .select( + "idx", + x=_left.is_close( + _right, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + ), + ) + .sort("idx") + ) + if constructor in NON_NULLABLE_CONSTRUCTORS: + expected = [v if v is not None else False for v in expected] + assert_equal_data(result, {"idx": data["idx"], "x": expected}) From a271a94b3ee3c18e455ca8ee53a8fb310d464c57 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 17:22:04 +0200 Subject: [PATCH 08/20] simplify --- narwhals/_compliant/expr.py | 55 +++-------------------------- narwhals/_compliant/series.py | 65 +++-------------------------------- narwhals/_compliant/typing.py | 4 +++ narwhals/_utils.py | 63 ++++++++++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 112 deletions(-) diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index af74d6eef8..e409e45b4f 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -27,7 +27,7 @@ LazyExprT, NativeExprT, ) -from narwhals._utils import _StoresCompliant +from narwhals._utils import _is_close_impl, _StoresCompliant from narwhals.dependencies import get_numpy, is_numpy_array if TYPE_CHECKING: @@ -239,56 +239,9 @@ def is_close( rel_tol: float, nans_equal: bool, ) -> Self: - from decimal import Decimal - - other_abs: Self | NumericLiteral - other_is_nan: Self | bool - other_is_inf: Self | bool - other_is_not_inf: Self | bool - - if isinstance(other, (float, int, Decimal)): - from math import isinf, isnan - - other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) # type: ignore[assignment] - - # Define the other_is_not_inf variable to prevent triggering the following warning: - # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be - # > removed in Python 3.16. This returns the bitwise inversion of the - # > underlying int object and is usually not what you expect from negating - # > a bool. Use the 'not' operator for boolean negation or ~int(x) if you - # > really want the bitwise inversion of the underlying int. - other_is_not_inf = not other_is_inf - - else: - other_abs, other_is_nan = other.abs(), other.is_nan() - other_is_not_inf = other.is_finite() | other_is_nan - other_is_inf = ~other_is_not_inf - - rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol - tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) - - self_is_nan = self.is_nan() - self_is_inf = ~(self.is_finite() | self_is_nan) - - # Values are close if abs_diff <= tolerance, and both finite - is_close = ((self - other).abs() <= tolerance) & (~self_is_inf) & other_is_not_inf - - # Handle infinity cases: infinities are "close" only if they have the same sign - self_sign, other_sign = self > 0, other > 0 - is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) - result = is_close | is_same_inf - - # Handle nan cases: - # * nans_equals = True => if both values are NaN, then True - # * nans_equals = False => if any value is NaN, then False - if nans_equal: - both_nan = self_is_nan & other_is_nan - result = result | both_nan - else: - either_nan = self_is_nan | other_is_nan - result = result & ~either_nan - - return result + return _is_close_impl( + self, other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + ) @property def str(self) -> StringNamespace[Self]: ... diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 6bd01982bc..1691892e79 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -19,6 +19,7 @@ from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals from narwhals._typing_compat import TypeVar, assert_never from narwhals._utils import ( + _is_close_impl, _StoresCompliant, _StoresNative, is_compliant_series, @@ -291,7 +292,10 @@ def is_close( abs_tol: float, rel_tol: float, nans_equal: bool, - ) -> Self: ... + ) -> Self: + return _is_close_impl( + self, other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + ) @property def str(self) -> StringNamespace[Self]: ... @@ -367,65 +371,6 @@ def __getitem__(self, item: MultiIndexSelector[Self]) -> Self: return self._gather(item) assert_never(item) - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: - from decimal import Decimal - - other_abs: Self | NumericLiteral - other_is_nan: Self | bool - other_is_inf: Self | bool - other_is_not_inf: Self | bool - - if isinstance(other, (float, int, Decimal)): - from math import isinf, isnan - - other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) # type: ignore[assignment] - - # Define the other_is_not_inf variable to prevent triggering the following warning: - # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be - # > removed in Python 3.16. This returns the bitwise inversion of the - # > underlying int object and is usually not what you expect from negating - # > a bool. Use the 'not' operator for boolean negation or ~int(x) if you - # > really want the bitwise inversion of the underlying int. - other_is_not_inf = not other_is_inf - - else: - other_abs, other_is_nan = other.abs(), other.is_nan() - other_is_not_inf = other.is_finite() | other_is_nan - other_is_inf = ~other_is_not_inf - - rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol - tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) - - self_is_nan = self.is_nan() - self_is_inf = ~(self.is_finite() | self_is_nan) - - # Values are close if abs_diff <= tolerance, and both finite - is_close = ((self - other).abs() <= tolerance) & (~self_is_inf) & other_is_not_inf - - # Handle infinity cases: infinities are "close" only if they have the same sign - self_sign, other_sign = self > 0, other > 0 - is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) - result = is_close | is_same_inf - - # Handle nan cases: - # * nans_equals = True => if both values are NaN, then True - # * nans_equals = False => if any value is NaN, then False - if nans_equal: - both_nan = self_is_nan & other_is_nan - result = result | both_nan - else: - either_nan = self_is_nan | other_is_nan - result = result & ~either_nan - - return result - @property def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT]: ... @property diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 0fb3c301e9..630ac16804 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -69,6 +69,7 @@ class ScalarKwargs(TypedDict, total=False): CompliantExprAny: TypeAlias = "CompliantExpr[Any, Any]" CompliantSeriesAny: TypeAlias = "CompliantSeries[Any]" CompliantSeriesOrNativeExprAny: TypeAlias = "CompliantSeriesAny | NativeExpr" +CompliantSeriesOrExprAny: TypeAlias = "CompliantSeriesAny | CompliantExprAny" CompliantDataFrameAny: TypeAlias = "CompliantDataFrame[Any, Any, Any, Any]" CompliantLazyFrameAny: TypeAlias = "CompliantLazyFrame[Any, Any, Any]" CompliantFrameAny: TypeAlias = "CompliantDataFrameAny | CompliantLazyFrameAny" @@ -113,6 +114,9 @@ class ScalarKwargs(TypedDict, total=False): bound=CompliantSeriesOrNativeExprAny, covariant=True, ) +CompliantSeriesOrExprT = TypeVar( + "CompliantSeriesOrExprT", bound="CompliantSeriesOrExprAny" +) CompliantFrameT = TypeVar("CompliantFrameT", bound=CompliantFrameAny) CompliantFrameT_co = TypeVar( "CompliantFrameT_co", bound=CompliantFrameAny, covariant=True diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 601bf9a0ef..70d41d0548 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -72,7 +72,7 @@ NativeFrameT_co, NativeSeriesT_co, ) - from narwhals._compliant.typing import EvalNames + from narwhals._compliant.typing import CompliantSeriesOrExprT, EvalNames from narwhals._namespace import EagerAllowedImplementation, Namespace from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals.dataframe import DataFrame, LazyFrame @@ -86,6 +86,7 @@ DTypes, IntoSeriesT, MultiIndexSelector, + NumericLiteral, SingleIndexSelector, SizedMultiIndexSelector, SizeUnit, @@ -2035,3 +2036,63 @@ def deep_attrgetter(attr: str, *nested: str) -> attrgetter[Any]: def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: """Perform a nested attribute lookup on `obj`.""" return deep_attrgetter(name_1, *nested)(obj) + + +def _is_close_impl( + self: CompliantSeriesOrExprT, + other: CompliantSeriesOrExprT | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, +) -> CompliantSeriesOrExprT: + from decimal import Decimal + + other_abs: CompliantSeriesOrExprT | NumericLiteral + other_is_nan: CompliantSeriesOrExprT | bool + other_is_inf: CompliantSeriesOrExprT | bool + other_is_not_inf: CompliantSeriesOrExprT | bool + + if isinstance(other, (float, int, Decimal)): + from math import isinf, isnan + + other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) # type: ignore[assignment] + + # Define the other_is_not_inf variable to prevent triggering the following warning: + # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be + # > removed in Python 3.16. This returns the bitwise inversion of the + # > underlying int object and is usually not what you expect from negating + # > a bool. Use the 'not' operator for boolean negation or ~int(x) if you + # > really want the bitwise inversion of the underlying int. + other_is_not_inf = not other_is_inf + + else: + other_abs, other_is_nan = other.abs(), other.is_nan() + other_is_not_inf = other.is_finite() | other_is_nan + other_is_inf = ~other_is_not_inf + + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol + tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) + + self_is_nan = self.is_nan() + self_is_inf = ~(self.is_finite() | self_is_nan) + + # Values are close if abs_diff <= tolerance, and both finite + is_close = ((self - other).abs() <= tolerance) & (~self_is_inf) & other_is_not_inf + + # Handle infinity cases: infinities are "close" only if they have the same sign + self_sign, other_sign = self > 0, other > 0 + is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) + result = is_close | is_same_inf + + # Handle nan cases: + # * nans_equals = True => if both values are NaN, then True + # * nans_equals = False => if any value is NaN, then False + if nans_equal: + both_nan = self_is_nan & other_is_nan + result = result | both_nan + else: + either_nan = self_is_nan | other_is_nan + result = result & ~either_nan + + return result From e59d8e7d188dfe704b937a343216c32137029103 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 20:26:12 +0200 Subject: [PATCH 09/20] coverage --- narwhals/_utils.py | 8 +- tests/expr_and_series/is_close_test.py | 165 ++++++++++++++++++++----- 2 files changed, 136 insertions(+), 37 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 70d41d0548..d1d84a0110 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2071,18 +2071,18 @@ def _is_close_impl( other_is_not_inf = other.is_finite() | other_is_nan other_is_inf = ~other_is_not_inf - rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol # type: ignore[arg-type] tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) self_is_nan = self.is_nan() - self_is_inf = ~(self.is_finite() | self_is_nan) + self_is_not_inf = self.is_finite() | self_is_nan # Values are close if abs_diff <= tolerance, and both finite - is_close = ((self - other).abs() <= tolerance) & (~self_is_inf) & other_is_not_inf + is_close = ((self - other).abs() <= tolerance) & self_is_not_inf & other_is_not_inf # Handle infinity cases: infinities are "close" only if they have the same sign self_sign, other_sign = self > 0, other > 0 - is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) + is_same_inf = (~self_is_not_inf) & other_is_inf & (self_sign == other_sign) result = is_close | is_same_inf # Handle nan cases: diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index 80a24beb3c..a674026b34 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import pytest import narwhals as nw @@ -12,6 +14,9 @@ ) from tests.utils import Constructor, ConstructorEager, assert_equal_data +if TYPE_CHECKING: + from narwhals.typing import NumericLiteral + NON_NULLABLE_CONSTRUCTORS = ( pandas_constructor, dask_lazy_p1_constructor, @@ -22,49 +27,58 @@ INF_POS, INF_NEG = float("inf"), float("-inf") data = { - "left": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, INF_NEG, INF_POS], - "right": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, 3.0, INF_NEG], + "x": [1.001, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, INF_NEG, INF_POS], + "y": [1.005, NULL_PLACEHOLDER, NAN_PLACEHOLDER, INF_POS, 3.0, INF_NEG], "non_numeric": list("number"), "idx": list(range(6)), } +# Exceptions def test_is_close_series_raise_non_numeric(constructor_eager: ConstructorEager) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) - left, right = df["non_numeric"], df["right"] + x, y = df["non_numeric"], df["y"] msg = "is_close operation not supported for dtype" with pytest.raises(InvalidOperationError, match=msg): - left.is_close(right) + x.is_close(y) -def test_is_close_series_raise_negative_abs_tol( - constructor_eager: ConstructorEager, -) -> None: +def test_is_close_raise_negative_abs_tol(constructor_eager: ConstructorEager) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) - left, right = df["left"], df["right"] + x, y = df["x"], df["y"] abs_tol = -2 msg = rf"`abs_tol` must be non-negative but got {abs_tol}" with pytest.raises(ComputeError, match=msg): - left.is_close(right, abs_tol=abs_tol) + x.is_close(y, abs_tol=abs_tol) with pytest.raises(ComputeError, match=msg): - left.is_close(right, abs_tol=abs_tol, rel_tol=999) + x.is_close(y, abs_tol=abs_tol, rel_tol=999) + + with pytest.raises(ComputeError, match=msg): + df.select(nw.col("x").is_close(nw.col("y"), abs_tol=abs_tol)) + + with pytest.raises(ComputeError, match=msg): + df.select(nw.col("x").is_close(nw.col("y"), abs_tol=abs_tol, rel_tol=999)) @pytest.mark.parametrize("rel_tol", [-0.0001, 1.0, 1.1]) -def test_is_close_series_raise_invalid_rel_tol( +def test_is_close_raise_invalid_rel_tol( constructor_eager: ConstructorEager, rel_tol: float ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) - left, right = df["left"], df["right"] + x, y = df["x"], df["y"] msg = rf"`rel_tol` must be in the range \[0, 1\) but got {rel_tol}" with pytest.raises(ComputeError, match=msg): - left.is_close(right, rel_tol=rel_tol) + x.is_close(y, rel_tol=rel_tol) + with pytest.raises(ComputeError, match=msg): + df.select(nw.col("x").is_close(nw.col("y"), rel_tol=rel_tol)) + +# Series @pytest.mark.parametrize( ("abs_tol", "rel_tol", "nans_equal", "expected"), [ @@ -83,33 +97,65 @@ def test_is_close_series_with_series( expected: list[float], ) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) - left, right = df["left"], df["right"] + x, y = df["x"], df["y"] nulls = nw.new_series( name="nulls", - values=[None] * len(left), + values=[None] * len(x), dtype=nw.Float64(), backend=df.implementation, ) # Tricks to generate nan's and null's for pandas with nullable backends: # * Square rooting a negative number will generate a NaN # * Replacing a value with None once the dtype is nullable will generate 's - left = left.zip_with(~left.is_finite(), left**0.5).zip_with( - left != NULL_PLACEHOLDER, nulls - ) - right = right.zip_with(~right.is_finite(), right**0.5).zip_with( - left != NULL_PLACEHOLDER, nulls - ) - result = left.is_close(right, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) + x = x.zip_with(x != NAN_PLACEHOLDER, x**0.5).zip_with(x != NULL_PLACEHOLDER, nulls) + y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) + result = x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) if constructor_eager in NON_NULLABLE_CONSTRUCTORS: - expected = [v if v is not None else False for v in expected] - assert_equal_data({"x": result}, {"x": expected}) + expected = [v if v is not None else nans_equal for v in expected] + assert_equal_data({"result": result}, {"result": expected}) -def test_is_close_series_with_scalar() -> None: ... # TODO +@pytest.mark.parametrize( + ("other", "abs_tol", "rel_tol", "nans_equal", "expected"), + [ + (1.0, 0.1, 0.0, False, [True, None, False, False, False, False]), + (1.0, 0.0001, 0.0, True, [False, None, False, False, False, False]), + (2.9, 0.0, 0.1, False, [False, None, False, False, True, False]), + (2.9, 0.0, 0.001, True, [False, None, False, False, False, False]), + ], +) +def test_is_close_series_with_scalar( + constructor_eager: ConstructorEager, + other: NumericLiteral, + abs_tol: float, + rel_tol: float, + *, + nans_equal: bool, + expected: list[float], +) -> None: + df = nw.from_native(constructor_eager(data), eager_only=True) + y = df["y"] + nulls = nw.new_series( + name="nulls", + values=[None] * len(y), + dtype=nw.Float64(), + backend=df.implementation, + ) + # Tricks to generate nan's and null's for pandas with nullable backends: + # * Square rooting a negative number will generate a NaN + # * Replacing a value with None once the dtype is nullable will generate 's + y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) + result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) + if constructor_eager in NON_NULLABLE_CONSTRUCTORS: + expected = [v if v is not None else False for v in expected] + assert_equal_data({"result": result}, {"result": expected}) + + +# Expr @pytest.mark.parametrize( ("abs_tol", "rel_tol", "nans_equal", "expected"), [ @@ -120,6 +166,7 @@ def test_is_close_series_with_scalar() -> None: ... # TODO ], ) def test_is_close_expr_with_expr( + request: pytest.FixtureRequest, constructor: Constructor, abs_tol: float, rel_tol: float, @@ -127,28 +174,80 @@ def test_is_close_expr_with_expr( nans_equal: bool, expected: list[float], ) -> None: - _left, _right = nw.col("left"), nw.col("right") + if "sqlframe" in str(constructor): + # TODO(FBruzzesi): Figure out a MRE and report upstream + reason = ( + "duckdb.duckdb.ParserException: Parser Error: syntax error at or near '='" + ) + request.applymarker(pytest.mark.xfail(reason=reason)) + + x, y = nw.col("x"), nw.col("y") result = ( nw.from_native(constructor(data)) # Tricks to generate nan's and null's for pandas with nullable backends: # * Square rooting a negative number will generate a NaN # * Replacing a value with None once the dtype is nullable will generate 's .with_columns( - left=nw.when(_left != NULL_PLACEHOLDER).then(_left).otherwise(None), - right=nw.when(_right != NULL_PLACEHOLDER).then(_right).otherwise(None), + x=nw.when(x != NAN_PLACEHOLDER).then(x).otherwise(x**0.5), + y=nw.when(y != NAN_PLACEHOLDER).then(y).otherwise(y**0.5), ) .with_columns( - left=nw.when(~_left.is_finite()).then(_left).otherwise(_left**0.5), - right=nw.when(~_right.is_finite()).then(_right).otherwise(_right**0.5), + x=nw.when(x != NULL_PLACEHOLDER).then(x).otherwise(None), + y=nw.when(y != NULL_PLACEHOLDER).then(y).otherwise(None), + ) + .select( + "idx", + result=x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal), + ) + .sort("idx") + ) + if constructor in NON_NULLABLE_CONSTRUCTORS: + expected = [v if v is not None else nans_equal for v in expected] + assert_equal_data(result, {"idx": data["idx"], "result": expected}) + + +@pytest.mark.parametrize( + ("other", "abs_tol", "rel_tol", "nans_equal", "expected"), + [ + (1.0, 0.1, 0.0, False, [True, None, False, False, False, False]), + (1.0, 0.0001, 0.0, True, [False, None, False, False, False, False]), + (2.9, 0.0, 0.1, False, [False, None, False, False, True, False]), + (2.9, 0.0, 0.001, True, [False, None, False, False, False, False]), + ], +) +def test_is_close_expr_with_scalar( + request: pytest.FixtureRequest, + constructor: Constructor, + other: NumericLiteral, + abs_tol: float, + rel_tol: float, + *, + nans_equal: bool, + expected: list[float], +) -> None: + if "sqlframe" in str(constructor): + # TODO(FBruzzesi): Figure out a MRE and report upstream + reason = ( + "duckdb.duckdb.ParserException: Parser Error: syntax error at or near '='" ) + request.applymarker(pytest.mark.xfail(reason=reason)) + + y = nw.col("y") + result = ( + nw.from_native(constructor(data)) + # Tricks to generate nan's and null's for pandas with nullable backends: + # * Square rooting a negative number will generate a NaN + # * Replacing a value with None once the dtype is nullable will generate 's + .with_columns(y=nw.when(y != NAN_PLACEHOLDER).then(y).otherwise(y**0.5)) + .with_columns(y=nw.when(y != NULL_PLACEHOLDER).then(y).otherwise(None)) .select( "idx", - x=_left.is_close( - _right, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + result=y.is_close( + other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ), ) .sort("idx") ) if constructor in NON_NULLABLE_CONSTRUCTORS: expected = [v if v is not None else False for v in expected] - assert_equal_data(result, {"idx": data["idx"], "x": expected}) + assert_equal_data(result, {"idx": data["idx"], "result": expected}) From 148c8bc742679def1a6a1be20c7ed19867dc22c3 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 20:34:06 +0200 Subject: [PATCH 10/20] docstring examples --- narwhals/expr.py | 32 +++++++++++++++++++++++++++++++- narwhals/series.py | 26 +++++++++++--------------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 2828434a5a..5f19b6eb73 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -2378,7 +2378,37 @@ def is_close( `math.isclose`. Specifically note that this behavior is different to `numpy.isclose`. - Examples: TODO + Examples: + >>> import duckdb + >>> import pyarrow as pa + >>> import narwhals as nw + >>> + >>> data = { + ... "x": [1.0, float("inf"), 1.41, None, float("nan")], + ... "y": [1.2, float("inf"), 1.40, None, float("nan")], + ... } + >>> _table = pa.table(data) + >>> df_native = duckdb.table("_table") + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... is_close=nw.col("x").is_close( + ... nw.col("y"), abs_tol=0.1, nans_equal=True + ... ) + ... ) + ┌──────────────────────────────┐ + | Narwhals LazyFrame | + |------------------------------| + |┌────────┬────────┬──────────┐| + |│ x │ y │ is_close │| + |│ double │ double │ boolean │| + |├────────┼────────┼──────────┤| + |│ 1.0 │ 1.2 │ false │| + |│ inf │ inf │ true │| + |│ 1.41 │ 1.4 │ true │| + |│ NULL │ NULL │ NULL │| + |│ nan │ nan │ true │| + |└────────┴────────┴──────────┘| + └──────────────────────────────┘ """ if abs_tol < 0: msg = f"`abs_tol` must be non-negative but got {abs_tol}" diff --git a/narwhals/series.py b/narwhals/series.py index 7fd7f9aa1c..ed1505291b 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2801,21 +2801,17 @@ def is_close( >>> data = [1.0, float("inf"), 1.41, None, float("nan")] >>> s_native = pa.chunked_array([data]) >>> s = nw.from_native(s_native, series_only=True) - >>> s.is_close(1.4, abs_tol=0.1) - ┌────────────────────────────────────────────────┐ - | Narwhals Series | - |------------------------------------------------| - || - |[ | - | [ | - | false, | - | false, | - | true, | - | null, | - | false | - | ] | - |] | - └────────────────────────────────────────────────┘ + >>> s.is_close(1.4, abs_tol=0.1).to_native() # doctest:+ELLIPSIS + + [ + [ + false, + false, + true, + null, + false + ] + ] """ if not self.dtype.is_numeric(): msg = ( From 475637128ffccea072773b5e9586979c3e30f649 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 7 Aug 2025 22:36:46 +0200 Subject: [PATCH 11/20] fixing nan's issue --- narwhals/_polars/expr.py | 8 +++----- narwhals/_utils.py | 7 +++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 743bfdff75..b581fe6330 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -261,18 +261,16 @@ def is_close( # Handle infinity cases: infinities are "close" only if they have the same sign self_sign, other_sign = native_expr.sign(), other_expr.sign() is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) - result = is_close | is_same_inf # Handle nan cases: # * nans_equals = True => if both values are NaN, then True # * nans_equals = False => if any value is NaN, then False + either_nan = native_expr.is_nan() | other_expr.is_nan() + result = (is_close | is_same_inf) & either_nan.not_() + if nans_equal: both_nan = native_expr.is_nan() & other_expr.is_nan() result = result | both_nan - else: - either_nan = native_expr.is_nan() | other_expr.is_nan() - result = result & either_nan.not_() - else: result = native_expr.is_close( other=other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal diff --git a/narwhals/_utils.py b/narwhals/_utils.py index d1d84a0110..6b44a40b94 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2083,16 +2083,15 @@ def _is_close_impl( # Handle infinity cases: infinities are "close" only if they have the same sign self_sign, other_sign = self > 0, other > 0 is_same_inf = (~self_is_not_inf) & other_is_inf & (self_sign == other_sign) - result = is_close | is_same_inf # Handle nan cases: # * nans_equals = True => if both values are NaN, then True # * nans_equals = False => if any value is NaN, then False + either_nan = self_is_nan | other_is_nan + result = (is_close | is_same_inf) & ~either_nan + if nans_equal: both_nan = self_is_nan & other_is_nan result = result | both_nan - else: - either_nan = self_is_nan | other_is_nan - result = result & ~either_nan return result From cf8f83dcc27efa89460e95dd7ad128da17932d80 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 7 Aug 2025 21:19:45 +0000 Subject: [PATCH 12/20] =?UTF-8?q?test:=20trim=20some=20fat=20=F0=9F=98=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only a tiny dent into the 600, but ... https://github.com/narwhals-dev/narwhals/pull/2962#issuecomment-3165349448 --- tests/expr_and_series/is_close_test.py | 52 ++++++++++---------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index a674026b34..a4b22d3cd2 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -78,8 +78,7 @@ def test_is_close_raise_invalid_rel_tol( df.select(nw.col("x").is_close(nw.col("y"), rel_tol=rel_tol)) -# Series -@pytest.mark.parametrize( +cases_columnar = pytest.mark.parametrize( ("abs_tol", "rel_tol", "nans_equal", "expected"), [ (0.1, 0.0, False, [True, None, False, True, False, False]), @@ -88,6 +87,19 @@ def test_is_close_raise_invalid_rel_tol( (0.0, 0.001, True, [False, None, True, True, False, False]), ], ) +cases_scalar = pytest.mark.parametrize( + ("other", "abs_tol", "rel_tol", "nans_equal", "expected"), + [ + (1.0, 0.1, 0.0, False, [True, None, False, False, False, False]), + (1.0, 0.0001, 0.0, True, [False, None, False, False, False, False]), + (2.9, 0.0, 0.1, False, [False, None, False, False, True, False]), + (2.9, 0.0, 0.001, True, [False, None, False, False, False, False]), + ], +) + + +# Series +@cases_columnar def test_is_close_series_with_series( constructor_eager: ConstructorEager, abs_tol: float, @@ -117,15 +129,7 @@ def test_is_close_series_with_series( assert_equal_data({"result": result}, {"result": expected}) -@pytest.mark.parametrize( - ("other", "abs_tol", "rel_tol", "nans_equal", "expected"), - [ - (1.0, 0.1, 0.0, False, [True, None, False, False, False, False]), - (1.0, 0.0001, 0.0, True, [False, None, False, False, False, False]), - (2.9, 0.0, 0.1, False, [False, None, False, False, True, False]), - (2.9, 0.0, 0.001, True, [False, None, False, False, False, False]), - ], -) +@cases_scalar def test_is_close_series_with_scalar( constructor_eager: ConstructorEager, other: NumericLiteral, @@ -156,15 +160,7 @@ def test_is_close_series_with_scalar( # Expr -@pytest.mark.parametrize( - ("abs_tol", "rel_tol", "nans_equal", "expected"), - [ - (0.1, 0.0, False, [True, None, False, True, False, False]), - (0.0001, 0.0, True, [False, None, True, True, False, False]), - (0.0, 0.1, False, [True, None, False, True, False, False]), - (0.0, 0.001, True, [False, None, True, True, False, False]), - ], -) +@cases_columnar def test_is_close_expr_with_expr( request: pytest.FixtureRequest, constructor: Constructor, @@ -192,8 +188,8 @@ def test_is_close_expr_with_expr( y=nw.when(y != NAN_PLACEHOLDER).then(y).otherwise(y**0.5), ) .with_columns( - x=nw.when(x != NULL_PLACEHOLDER).then(x).otherwise(None), - y=nw.when(y != NULL_PLACEHOLDER).then(y).otherwise(None), + x=nw.when(x != NULL_PLACEHOLDER).then(x), + y=nw.when(y != NULL_PLACEHOLDER).then(y), ) .select( "idx", @@ -206,15 +202,7 @@ def test_is_close_expr_with_expr( assert_equal_data(result, {"idx": data["idx"], "result": expected}) -@pytest.mark.parametrize( - ("other", "abs_tol", "rel_tol", "nans_equal", "expected"), - [ - (1.0, 0.1, 0.0, False, [True, None, False, False, False, False]), - (1.0, 0.0001, 0.0, True, [False, None, False, False, False, False]), - (2.9, 0.0, 0.1, False, [False, None, False, False, True, False]), - (2.9, 0.0, 0.001, True, [False, None, False, False, False, False]), - ], -) +@cases_scalar def test_is_close_expr_with_scalar( request: pytest.FixtureRequest, constructor: Constructor, @@ -239,7 +227,7 @@ def test_is_close_expr_with_scalar( # * Square rooting a negative number will generate a NaN # * Replacing a value with None once the dtype is nullable will generate 's .with_columns(y=nw.when(y != NAN_PLACEHOLDER).then(y).otherwise(y**0.5)) - .with_columns(y=nw.when(y != NULL_PLACEHOLDER).then(y).otherwise(None)) + .with_columns(y=nw.when(y != NULL_PLACEHOLDER).then(y)) .select( "idx", result=y.is_close( From 22e22038535b84750adf87b7f65e68bfaf87c28f Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 8 Aug 2025 22:35:19 +0200 Subject: [PATCH 13/20] fix typing --- narwhals/_compliant/typing.py | 3 --- narwhals/_utils.py | 38 +++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 630ac16804..07e9ee5391 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -114,9 +114,6 @@ class ScalarKwargs(TypedDict, total=False): bound=CompliantSeriesOrNativeExprAny, covariant=True, ) -CompliantSeriesOrExprT = TypeVar( - "CompliantSeriesOrExprT", bound="CompliantSeriesOrExprAny" -) CompliantFrameT = TypeVar("CompliantFrameT", bound=CompliantFrameAny) CompliantFrameT_co = TypeVar( "CompliantFrameT_co", bound=CompliantFrameAny, covariant=True diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 6b44a40b94..6b3029a3e0 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -72,7 +72,7 @@ NativeFrameT_co, NativeSeriesT_co, ) - from narwhals._compliant.typing import CompliantSeriesOrExprT, EvalNames + from narwhals._compliant.typing import EvalNames from narwhals._namespace import EagerAllowedImplementation, Namespace from narwhals._translate import ArrowStreamExportable, IntoArrowTable, ToNarwhalsT_co from narwhals.dataframe import DataFrame, LazyFrame @@ -2038,20 +2038,42 @@ def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: return deep_attrgetter(name_1, *nested)(obj) +@overload +def _is_close_impl( + self: CompliantExprT, + other: CompliantExprT | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, +) -> CompliantExprT: ... + + +@overload +def _is_close_impl( + self: CompliantSeriesT, + other: CompliantSeriesT | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, +) -> CompliantSeriesT: ... + + def _is_close_impl( - self: CompliantSeriesOrExprT, - other: CompliantSeriesOrExprT | NumericLiteral, + self: CompliantExprT | CompliantSeriesT, + other: CompliantExprT | CompliantSeriesT | NumericLiteral, *, abs_tol: float, rel_tol: float, nans_equal: bool, -) -> CompliantSeriesOrExprT: +) -> CompliantExprT | CompliantSeriesT: from decimal import Decimal - other_abs: CompliantSeriesOrExprT | NumericLiteral - other_is_nan: CompliantSeriesOrExprT | bool - other_is_inf: CompliantSeriesOrExprT | bool - other_is_not_inf: CompliantSeriesOrExprT | bool + other_abs: CompliantExprT | CompliantSeriesT | NumericLiteral + other_is_nan: CompliantExprT | CompliantSeriesT | bool + other_is_inf: CompliantExprT | CompliantSeriesT | bool + other_is_not_inf: CompliantExprT | CompliantSeriesT | bool if isinstance(other, (float, int, Decimal)): from math import isinf, isnan From dccd612145dc6ed4c5c6cf4c32e29f39895bdf8a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 9 Aug 2025 13:14:25 +0000 Subject: [PATCH 14/20] chore(typing): un-hide typing issues See https://github.com/narwhals-dev/narwhals/pull/2962#issuecomment-3169413348 --- narwhals/_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 6b3029a3e0..25e0ff8e59 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -2078,7 +2078,9 @@ def _is_close_impl( if isinstance(other, (float, int, Decimal)): from math import isinf, isnan - other_abs, other_is_nan, other_is_inf = abs(other), isnan(other), isinf(other) # type: ignore[assignment] + other_abs = abs(other) + other_is_nan = isnan(other) + other_is_inf = isinf(other) # Define the other_is_not_inf variable to prevent triggering the following warning: # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be @@ -2093,7 +2095,7 @@ def _is_close_impl( other_is_not_inf = other.is_finite() | other_is_nan other_is_inf = ~other_is_not_inf - rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol # type: ignore[arg-type] + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) self_is_nan = self.is_nan() From 0dfaf5a465bd5dbe0af304d88146cfa38ec4485b Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Sat, 9 Aug 2025 17:59:26 +0200 Subject: [PATCH 15/20] Dan's idea 3 in practice --- narwhals/_compliant/expr.py | 17 ++----- narwhals/_compliant/series.py | 15 +----- narwhals/_compliant/utils.py | 87 +++++++++++++++++++++++++++++++++++ narwhals/_utils.py | 84 --------------------------------- 4 files changed, 92 insertions(+), 111 deletions(-) create mode 100644 narwhals/_compliant/utils.py diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index f6d35aa689..57a790eb9e 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -27,7 +27,8 @@ LazyExprT, NativeExprT, ) -from narwhals._utils import _is_close_impl, _StoresCompliant +from narwhals._compliant.utils import IsClose +from narwhals._utils import _StoresCompliant from narwhals.dependencies import get_numpy, is_numpy_array if TYPE_CHECKING: @@ -75,7 +76,7 @@ def __eq__(self, value: Any, /) -> Self: ... # type: ignore[override] def __ne__(self, value: Any, /) -> Self: ... # type: ignore[override] -class CompliantExpr(Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co]): +class CompliantExpr(IsClose, Protocol[CompliantFrameT, CompliantSeriesOrNativeExprT_co]): _implementation: Implementation _version: Version _evaluate_output_names: EvalNames[CompliantFrameT] @@ -241,18 +242,6 @@ def _evaluate_aliases( names = self._evaluate_output_names(frame) return alias(names) if (alias := self._alias_output_names) else names - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: - return _is_close_impl( - self, other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal - ) - @property def str(self) -> StringNamespace[Self]: ... @property diff --git a/narwhals/_compliant/series.py b/narwhals/_compliant/series.py index 1691892e79..e8a88590d3 100644 --- a/narwhals/_compliant/series.py +++ b/narwhals/_compliant/series.py @@ -16,10 +16,10 @@ NativeSeriesT, NativeSeriesT_co, ) +from narwhals._compliant.utils import IsClose from narwhals._translate import FromIterable, FromNative, NumpyConvertible, ToNarwhals from narwhals._typing_compat import TypeVar, assert_never from narwhals._utils import ( - _is_close_impl, _StoresCompliant, _StoresNative, is_compliant_series, @@ -79,6 +79,7 @@ class HistData(TypedDict, Generic[NativeSeriesT, "_CountsT_co"]): class CompliantSeries( + IsClose, NumpyConvertible["_1DArray", "Into1DArray"], FromIterable, FromNative[NativeSeriesT], @@ -285,18 +286,6 @@ def hist_from_bin_count( """`Series.hist(bins=None, bin_count=...)`.""" ... - def is_close( - self, - other: Self | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, - ) -> Self: - return _is_close_impl( - self, other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal - ) - @property def str(self) -> StringNamespace[Self]: ... @property diff --git a/narwhals/_compliant/utils.py b/narwhals/_compliant/utils.py new file mode 100644 index 0000000000..374ffd82b9 --- /dev/null +++ b/narwhals/_compliant/utils.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Protocol + +if TYPE_CHECKING: + from typing_extensions import Self + + from narwhals.typing import NumericLiteral, TemporalLiteral + + +class IsClose(Protocol): + """Every member defined is a dependency of `is_close` method.""" + + def __and__(self, other: Any) -> Self: ... + def __or__(self, other: Any) -> Self: ... + def __invert__(self) -> Self: ... + def __sub__(self, other: Any) -> Self: ... + def __mul__(self, other: Any) -> Self: ... + def __eq__(self, other: Self | Any) -> Self: ... # type: ignore[override] + def __gt__(self, other: Any) -> Self: ... + def __le__(self, other: Any) -> Self: ... + def abs(self) -> Self: ... + def is_nan(self) -> Self: ... + def is_finite(self) -> Self: ... + def clip( + self, + lower_bound: Self | NumericLiteral | TemporalLiteral | None, + upper_bound: Self | NumericLiteral | TemporalLiteral | None, + ) -> Self: ... + def is_close( + self, + other: Self | NumericLiteral, + *, + abs_tol: float, + rel_tol: float, + nans_equal: bool, + ) -> Self: + from decimal import Decimal + + other_abs: Self | NumericLiteral + other_is_nan: Self | bool + other_is_inf: Self | bool + other_is_not_inf: Self | bool + + if isinstance(other, (float, int, Decimal)): + from math import isinf, isnan + + other_abs = other.__abs__() + other_is_nan = isnan(other) + other_is_inf = isinf(other) + + # Define the other_is_not_inf variable to prevent triggering the following warning: + # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be + # > removed in Python 3.16. + other_is_not_inf = not other_is_inf + + else: + other_abs, other_is_nan = other.abs(), other.is_nan() + other_is_not_inf = other.is_finite() | other_is_nan + other_is_inf = ~other_is_not_inf + + rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol + tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) + + self_is_nan = self.is_nan() + self_is_not_inf = self.is_finite() | self_is_nan + + # Values are close if abs_diff <= tolerance, and both finite + is_close = ( + ((self - other).abs() <= tolerance) & self_is_not_inf & other_is_not_inf + ) + + # Handle infinity cases: infinities are close/equal if they have the same sign + self_sign, other_sign = self > 0, other > 0 + is_same_inf = (~self_is_not_inf) & other_is_inf & (self_sign == other_sign) + + # Handle nan cases: + # * If any value is NaN, then False (via `& ~either_nan`) + # * However, if `nans_equals = True` and if _both_ values are NaN, then True + either_nan = self_is_nan | other_is_nan + result = (is_close | is_same_inf) & ~either_nan + + if nans_equal: + both_nan = self_is_nan & other_is_nan + result = result | both_nan + + return result diff --git a/narwhals/_utils.py b/narwhals/_utils.py index 25e0ff8e59..601bf9a0ef 100644 --- a/narwhals/_utils.py +++ b/narwhals/_utils.py @@ -86,7 +86,6 @@ DTypes, IntoSeriesT, MultiIndexSelector, - NumericLiteral, SingleIndexSelector, SizedMultiIndexSelector, SizeUnit, @@ -2036,86 +2035,3 @@ def deep_attrgetter(attr: str, *nested: str) -> attrgetter[Any]: def deep_getattr(obj: Any, name_1: str, *nested: str) -> Any: """Perform a nested attribute lookup on `obj`.""" return deep_attrgetter(name_1, *nested)(obj) - - -@overload -def _is_close_impl( - self: CompliantExprT, - other: CompliantExprT | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, -) -> CompliantExprT: ... - - -@overload -def _is_close_impl( - self: CompliantSeriesT, - other: CompliantSeriesT | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, -) -> CompliantSeriesT: ... - - -def _is_close_impl( - self: CompliantExprT | CompliantSeriesT, - other: CompliantExprT | CompliantSeriesT | NumericLiteral, - *, - abs_tol: float, - rel_tol: float, - nans_equal: bool, -) -> CompliantExprT | CompliantSeriesT: - from decimal import Decimal - - other_abs: CompliantExprT | CompliantSeriesT | NumericLiteral - other_is_nan: CompliantExprT | CompliantSeriesT | bool - other_is_inf: CompliantExprT | CompliantSeriesT | bool - other_is_not_inf: CompliantExprT | CompliantSeriesT | bool - - if isinstance(other, (float, int, Decimal)): - from math import isinf, isnan - - other_abs = abs(other) - other_is_nan = isnan(other) - other_is_inf = isinf(other) - - # Define the other_is_not_inf variable to prevent triggering the following warning: - # > DeprecationWarning: Bitwise inversion '~' on bool is deprecated and will be - # > removed in Python 3.16. This returns the bitwise inversion of the - # > underlying int object and is usually not what you expect from negating - # > a bool. Use the 'not' operator for boolean negation or ~int(x) if you - # > really want the bitwise inversion of the underlying int. - other_is_not_inf = not other_is_inf - - else: - other_abs, other_is_nan = other.abs(), other.is_nan() - other_is_not_inf = other.is_finite() | other_is_nan - other_is_inf = ~other_is_not_inf - - rel_threshold = self.abs().clip(lower_bound=other_abs, upper_bound=None) * rel_tol - tolerance = rel_threshold.clip(lower_bound=abs_tol, upper_bound=None) - - self_is_nan = self.is_nan() - self_is_not_inf = self.is_finite() | self_is_nan - - # Values are close if abs_diff <= tolerance, and both finite - is_close = ((self - other).abs() <= tolerance) & self_is_not_inf & other_is_not_inf - - # Handle infinity cases: infinities are "close" only if they have the same sign - self_sign, other_sign = self > 0, other > 0 - is_same_inf = (~self_is_not_inf) & other_is_inf & (self_sign == other_sign) - - # Handle nan cases: - # * nans_equals = True => if both values are NaN, then True - # * nans_equals = False => if any value is NaN, then False - either_nan = self_is_nan | other_is_nan - result = (is_close | is_same_inf) & ~either_nan - - if nans_equal: - both_nan = self_is_nan & other_is_nan - result = result | both_nan - - return result From 8a1cb1313ee511432cde9e479418c06401204a79 Mon Sep 17 00:00:00 2001 From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> Date: Sat, 9 Aug 2025 21:23:32 +0200 Subject: [PATCH 16/20] Add note on abs typing Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- narwhals/_compliant/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/narwhals/_compliant/utils.py b/narwhals/_compliant/utils.py index 374ffd82b9..3a88e39561 100644 --- a/narwhals/_compliant/utils.py +++ b/narwhals/_compliant/utils.py @@ -45,6 +45,7 @@ def is_close( if isinstance(other, (float, int, Decimal)): from math import isinf, isnan + # NOTE: See https://discuss.python.org/t/inferred-type-of-function-that-calls-dunder-abs-abs/101447 other_abs = other.__abs__() other_is_nan = isnan(other) other_is_inf = isinf(other) From df24a41906c6faee3ee8545e81e3ad0f3d38f377 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 11 Aug 2025 19:57:09 +0000 Subject: [PATCH 17/20] fix(typing): resolve polars issues --- narwhals/_polars/series.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 7fb7630b8a..9df7e02f0d 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -493,16 +493,15 @@ def is_close( rel_tol: float, nans_equal: bool, ) -> Self: - other_native = extract_native(other) - if self._backend_version < (1, 32, 0): name = self.name ns = self.__narwhals_namespace__() + other_expr = other._to_expr() if isinstance(other, PolarsSeries) else other result = ( self.to_frame() .select( ns.col(name).is_close( - other=other_native, # type: ignore[arg-type] + other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal, @@ -512,11 +511,9 @@ def is_close( .native ) else: + other_series = other.native if isinstance(other, PolarsSeries) else other result = self.native.is_close( - other=other_native, # pyright: ignore[reportArgumentType] - abs_tol=abs_tol, - rel_tol=rel_tol, - nans_equal=nans_equal, + other_series, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ) return self._with_native(result) From 58388268d2c8125ab0b047a4dc390df8a020aad6 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Mon, 11 Aug 2025 20:31:56 +0000 Subject: [PATCH 18/20] refactor(suggestion): simplify polars --- narwhals/_polars/expr.py | 35 +++++++++++++++-------------------- narwhals/_polars/series.py | 26 ++++++++------------------ 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 1c4cbafe78..1210ee08e0 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -240,40 +240,35 @@ def is_close( rel_tol: float, nans_equal: bool, ) -> Self: - native_expr = self.native - other_expr = ( - extract_native(other) if isinstance(other, PolarsExpr) else pl.lit(other) - ) + left = self.native + right = other.native if isinstance(other, PolarsExpr) else pl.lit(other) if self._backend_version < (1, 32, 0): - abs_diff = (native_expr - other_expr).abs() - rel_threshold = native_expr.abs().clip(lower_bound=other_expr.abs()) * rel_tol - tolerance = rel_threshold.clip(lower_bound=pl.lit(abs_tol)) - - self_is_inf, other_is_inf = ( - native_expr.is_infinite(), - other_expr.is_infinite(), - ) + lower_bound = right.abs() + tolerance = (left.abs().clip(lower_bound) * rel_tol).clip(abs_tol) # Values are close if abs_diff <= tolerance, and both finite - is_close = (abs_diff <= tolerance) & self_is_inf.not_() & other_is_inf.not_() + abs_diff = (left - right).abs() + all_ = pl.all_horizontal + is_close = all_((abs_diff <= tolerance), left.is_finite(), right.is_finite()) # Handle infinity cases: infinities are "close" only if they have the same sign - self_sign, other_sign = native_expr.sign(), other_expr.sign() - is_same_inf = self_is_inf & other_is_inf & (self_sign == other_sign) + is_same_inf = all_( + left.is_infinite(), right.is_infinite(), (left.sign() == right.sign()) + ) # Handle nan cases: # * nans_equals = True => if both values are NaN, then True # * nans_equals = False => if any value is NaN, then False - either_nan = native_expr.is_nan() | other_expr.is_nan() + left_is_nan, right_is_nan = left.is_nan(), right.is_nan() + either_nan = left_is_nan | right_is_nan result = (is_close | is_same_inf) & either_nan.not_() if nans_equal: - both_nan = native_expr.is_nan() & other_expr.is_nan() - result = result | both_nan + result = result | (left_is_nan & right_is_nan) else: - result = native_expr.is_close( - other=other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + result = left.is_close( + right, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ) return self._with_native(result) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 9df7e02f0d..e94f712534 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -492,29 +492,19 @@ def is_close( abs_tol: float, rel_tol: float, nans_equal: bool, - ) -> Self: + ) -> PolarsSeries: if self._backend_version < (1, 32, 0): name = self.name ns = self.__narwhals_namespace__() other_expr = other._to_expr() if isinstance(other, PolarsSeries) else other - result = ( - self.to_frame() - .select( - ns.col(name).is_close( - other_expr, - abs_tol=abs_tol, - rel_tol=rel_tol, - nans_equal=nans_equal, - ) - ) - .get_column(name) - .native - ) - else: - other_series = other.native if isinstance(other, PolarsSeries) else other - result = self.native.is_close( - other_series, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + expr = ns.col(name).is_close( + other_expr, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ) + return self.to_frame().select(expr).get_column(name) + other_series = other.native if isinstance(other, PolarsSeries) else other + result = self.native.is_close( + other_series, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal + ) return self._with_native(result) def hist_from_bins( From e4b7a1eed846ea38c47b65100b7b257bba3c7cac Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Mon, 11 Aug 2025 23:08:01 +0200 Subject: [PATCH 19/20] feedback adjustments --- narwhals/_compliant/typing.py | 1 - narwhals/expr.py | 4 +++- narwhals/series.py | 4 +++- tests/expr_and_series/is_close_test.py | 33 ++++++++++---------------- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 07e9ee5391..0fb3c301e9 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -69,7 +69,6 @@ class ScalarKwargs(TypedDict, total=False): CompliantExprAny: TypeAlias = "CompliantExpr[Any, Any]" CompliantSeriesAny: TypeAlias = "CompliantSeries[Any]" CompliantSeriesOrNativeExprAny: TypeAlias = "CompliantSeriesAny | NativeExpr" -CompliantSeriesOrExprAny: TypeAlias = "CompliantSeriesAny | CompliantExprAny" CompliantDataFrameAny: TypeAlias = "CompliantDataFrame[Any, Any, Any, Any]" CompliantLazyFrameAny: TypeAlias = "CompliantLazyFrame[Any, Any, Any]" CompliantFrameAny: TypeAlias = "CompliantDataFrameAny | CompliantLazyFrameAny" diff --git a/narwhals/expr.py b/narwhals/expr.py index 5f19b6eb73..323bed3984 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -2359,7 +2359,9 @@ def is_close( Two values `a` and `b` are considered close if the following condition holds: - $$|a-b| \le max \{ \text{rel_tol} \cdot max \{ |a|, |b| \}, \text{abs_tol} \}$$ + $$ + |a-b| \le max \{ rel\_tol \cdot max \{ |a|, |b| \}, abs\_tol \} + $$ Arguments: other: Values to compare with. diff --git a/narwhals/series.py b/narwhals/series.py index ed1505291b..191b59146e 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2775,7 +2775,9 @@ def is_close( Two values `a` and `b` are considered close if the following condition holds: - $$|a-b| \le max \{ \text{rel_tol} \cdot max \{ |a|, |b| \}, \text{abs_tol} \}$$ + $$ + |a-b| \le max \{ rel\_tol \cdot max \{ |a|, |b| \}, abs\_tol \} + $$ Arguments: other: Values to compare with. diff --git a/tests/expr_and_series/is_close_test.py b/tests/expr_and_series/is_close_test.py index a4b22d3cd2..e14b623d8b 100644 --- a/tests/expr_and_series/is_close_test.py +++ b/tests/expr_and_series/is_close_test.py @@ -1,3 +1,9 @@ +"""Tricks to generate nan's and null's for pandas with nullable backends. + +* Square rooting a negative number will generate a NaN +* Replacing a value with None once the dtype is nullable will generate 's +""" + from __future__ import annotations from typing import TYPE_CHECKING @@ -44,23 +50,20 @@ def test_is_close_series_raise_non_numeric(constructor_eager: ConstructorEager) x.is_close(y) -def test_is_close_raise_negative_abs_tol(constructor_eager: ConstructorEager) -> None: +@pytest.mark.parametrize("rel_tol", [1e-09, 999]) +def test_is_close_raise_negative_abs_tol( + constructor_eager: ConstructorEager, rel_tol: float +) -> None: df = nw.from_native(constructor_eager(data), eager_only=True) x, y = df["x"], df["y"] abs_tol = -2 msg = rf"`abs_tol` must be non-negative but got {abs_tol}" with pytest.raises(ComputeError, match=msg): - x.is_close(y, abs_tol=abs_tol) - - with pytest.raises(ComputeError, match=msg): - x.is_close(y, abs_tol=abs_tol, rel_tol=999) - - with pytest.raises(ComputeError, match=msg): - df.select(nw.col("x").is_close(nw.col("y"), abs_tol=abs_tol)) + x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol) with pytest.raises(ComputeError, match=msg): - df.select(nw.col("x").is_close(nw.col("y"), abs_tol=abs_tol, rel_tol=999)) + df.select(nw.col("x").is_close(nw.col("y"), abs_tol=abs_tol, rel_tol=rel_tol)) @pytest.mark.parametrize("rel_tol", [-0.0001, 1.0, 1.1]) @@ -117,9 +120,6 @@ def test_is_close_series_with_series( dtype=nw.Float64(), backend=df.implementation, ) - # Tricks to generate nan's and null's for pandas with nullable backends: - # * Square rooting a negative number will generate a NaN - # * Replacing a value with None once the dtype is nullable will generate 's x = x.zip_with(x != NAN_PLACEHOLDER, x**0.5).zip_with(x != NULL_PLACEHOLDER, nulls) y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = x.is_close(y, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) @@ -148,9 +148,6 @@ def test_is_close_series_with_scalar( dtype=nw.Float64(), backend=df.implementation, ) - # Tricks to generate nan's and null's for pandas with nullable backends: - # * Square rooting a negative number will generate a NaN - # * Replacing a value with None once the dtype is nullable will generate 's y = y.zip_with(y != NAN_PLACEHOLDER, y**0.5).zip_with(y != NULL_PLACEHOLDER, nulls) result = y.is_close(other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal) @@ -180,9 +177,6 @@ def test_is_close_expr_with_expr( x, y = nw.col("x"), nw.col("y") result = ( nw.from_native(constructor(data)) - # Tricks to generate nan's and null's for pandas with nullable backends: - # * Square rooting a negative number will generate a NaN - # * Replacing a value with None once the dtype is nullable will generate 's .with_columns( x=nw.when(x != NAN_PLACEHOLDER).then(x).otherwise(x**0.5), y=nw.when(y != NAN_PLACEHOLDER).then(y).otherwise(y**0.5), @@ -223,9 +217,6 @@ def test_is_close_expr_with_scalar( y = nw.col("y") result = ( nw.from_native(constructor(data)) - # Tricks to generate nan's and null's for pandas with nullable backends: - # * Square rooting a negative number will generate a NaN - # * Replacing a value with None once the dtype is nullable will generate 's .with_columns(y=nw.when(y != NAN_PLACEHOLDER).then(y).otherwise(y**0.5)) .with_columns(y=nw.when(y != NULL_PLACEHOLDER).then(y)) .select( From d61c5c77cba42a549c5a0f00862ebc8d92fd5a7a Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 12 Aug 2025 09:15:53 +0200 Subject: [PATCH 20/20] fix katex issue with \text --- narwhals/expr.py | 2 +- narwhals/series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/expr.py b/narwhals/expr.py index 323bed3984..73a5324a62 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -2360,7 +2360,7 @@ def is_close( Two values `a` and `b` are considered close if the following condition holds: $$ - |a-b| \le max \{ rel\_tol \cdot max \{ |a|, |b| \}, abs\_tol \} + |a-b| \le max \{ \text{rel\_tol} \cdot max \{ |a|, |b| \}, \text{abs\_tol} \} $$ Arguments: diff --git a/narwhals/series.py b/narwhals/series.py index 191b59146e..cc57ca48c7 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -2776,7 +2776,7 @@ def is_close( Two values `a` and `b` are considered close if the following condition holds: $$ - |a-b| \le max \{ rel\_tol \cdot max \{ |a|, |b| \}, abs\_tol \} + |a-b| \le max \{ \text{rel\_tol} \cdot max \{ |a|, |b| \}, \text{abs\_tol} \} $$ Arguments: