From fa82752409d87938d4d254cd3accd47f017579e3 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Wed, 15 Oct 2025 23:07:05 +0200
Subject: [PATCH 01/16] feat: Add testing.assert_frame_equal

---
 narwhals/testing/__init__.py      |   3 +-
 narwhals/testing/asserts/frame.py | 184 ++++++++++++++++++++++++++++++
 narwhals/testing/asserts/utils.py |   6 +
 3 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 narwhals/testing/asserts/frame.py

diff --git a/narwhals/testing/__init__.py b/narwhals/testing/__init__.py
index 6bbab67b64..649463383f 100644
--- a/narwhals/testing/__init__.py
+++ b/narwhals/testing/__init__.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from narwhals.testing.asserts.frame import assert_frame_equal
 from narwhals.testing.asserts.series import assert_series_equal
 
-__all__ = ("assert_series_equal",)
+__all__ = ("assert_frame_equal", "assert_series_equal")
diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
new file mode 100644
index 0000000000..3288813f02
--- /dev/null
+++ b/narwhals/testing/asserts/frame.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from narwhals._utils import Implementation, qualified_type_name
+from narwhals.dataframe import DataFrame, LazyFrame
+from narwhals.dependencies import is_narwhals_dataframe, is_narwhals_lazyframe
+from narwhals.testing.asserts.utils import (
+    raise_assertion_error,
+    raise_frame_assertion_error,
+)
+
+if TYPE_CHECKING:
+    from narwhals.typing import DataFrameT, FrameT
+
+GUARANTEES_ROW_ORDER = {
+    Implementation.PANDAS,
+    Implementation.MODIN,
+    Implementation.CUDF,
+    Implementation.PYARROW,
+    Implementation.POLARS,
+    Implementation.DASK,
+}
+
+
+def assert_frame_equal(
+    left: FrameT,
+    right: FrameT,
+    *,
+    check_row_order: bool = True,
+    check_column_order: bool = True,
+    check_dtypes: bool = True,
+    check_exact: bool = False,
+    rel_tol: float = 1e-5,
+    abs_tol: float = 1e-8,
+    categorical_as_str: bool = False,
+) -> None:
+    """Assert that the left and right frames are equal.
+
+    Raises a detailed `AssertionError` if the frames differ.
+    This function is intended for use in unit tests.
+
+    Arguments:
+        left: The first DataFrame or LazyFrame to compare.
+        right: The second DataFrame or LazyFrame to compare.
+        check_row_order: Requires row order to match. This flag is ignored for backends
+            that do not guarantee row order such as DuckDB, Ibis, PySpark, SQLFrame
+        check_column_order: Requires column order to match.
+        check_dtypes: Requires data types to match.
+        check_exact: Requires float values to match exactly. If set to `False`, values are
+            considered equal when within tolerance of each other (see `rel_tol` and `abs_tol`).
+            Only affects columns with a Float data type.
+        rel_tol: Relative tolerance for inexact checking. Fraction of values in `right`.
+        abs_tol: Absolute tolerance for inexact checking.
+        categorical_as_str: Cast categorical columns to string before comparing.
+            Enabling this helps compare columns that do not share the same string cache.
+
+    Examples:
+        TODO(FBruzzesi): ...
+    """
+    __tracebackhide__ = True
+
+    if any(
+        not (is_narwhals_dataframe(obj) or is_narwhals_lazyframe(obj))
+        for obj in (left, right)
+    ):
+        msg = (
+            "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found:\n"
+            f"[left]: {qualified_type_name(type(left))}\n"
+            f"[right]: {qualified_type_name(type(right))}\n\n"
+            "Hint: Use `nw.from_native(obj, allow_series=False) to convert each native "
+            "object into a `narwhals.DataFrame` or `narwhals.LazyFrame` first."
+        )
+        raise TypeError(msg)
+
+    left_impl, right_impl = left.implementation, right.implementation
+    if left_impl != right_impl:
+        raise_frame_assertion_error("implementation mismatch", left_impl, right_impl)
+
+    left_eager, right_eager = _check_correct_input_type(left, right)
+
+    _assert_dataframe_equal(
+        left=left_eager,
+        right=right_eager,
+        impl=left_impl,
+        check_row_order=check_row_order,
+        check_column_order=check_column_order,
+        check_dtypes=check_dtypes,
+        check_exact=check_exact,
+        rel_tol=rel_tol,
+        abs_tol=abs_tol,
+        categorical_as_str=categorical_as_str,
+    )
+
+
+def _check_correct_input_type(  # noqa: RET503
+    left: DataFrame[Any] | LazyFrame[Any], right: DataFrame[Any] | LazyFrame[Any]
+) -> tuple[DataFrame[Any], DataFrame[Any]]:
+    # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/py-polars/src/polars/testing/asserts/frame.py#L15-L17
+    if isinstance(left, DataFrame) and isinstance(right, DataFrame):
+        return left, right
+
+    if isinstance(left, LazyFrame) and isinstance(right, LazyFrame):
+        return left.collect(), right.collect()
+
+    raise_assertion_error(
+        "inputs",
+        "unexpected input types",
+        left=type(left).__name__,
+        right=type(right).__name__,
+    )
+
+
+def _assert_dataframe_equal(
+    left: DataFrameT,
+    right: DataFrameT,
+    impl: Implementation,
+    *,
+    check_row_order: bool,
+    check_column_order: bool,
+    check_dtypes: bool,
+    check_exact: bool,
+    rel_tol: float,
+    abs_tol: float,
+    categorical_as_str: bool,
+) -> None:
+    # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/crates/polars-testing/src/asserts/utils.rs#L829
+    _check_schema_equal(
+        left, right, check_dtypes=check_dtypes, check_column_order=check_column_order
+    )
+
+    left_len, right_len = len(left), len(right)
+    if left_len != right_len:
+        raise_frame_assertion_error("height (row count) mismatch", left_len, right_len)
+
+
+def _check_schema_equal(
+    left: DataFrameT, right: DataFrameT, *, check_dtypes: bool, check_column_order: bool
+) -> None:
+    """Compares DataFrame schema based on specified criteria.
+
+    Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/crates/polars-testing/src/asserts/utils.rs#L667-L698
+    """
+    lschema, rschema = left.schema, right.schema
+
+    # Fast path for equal DataFrames
+    if lschema == rschema:
+        return
+
+    lnames, rnames = lschema.names(), rschema.names()
+    lset, rset = set(lnames), set(rnames)
+
+    if lset != rset:
+        if left_not_in_right := list(lset.difference(rset)):
+            raise_frame_assertion_error(
+                detail=f"{left_not_in_right} in left, but not in right",
+                left=lset,
+                right=rset,
+            )
+        if right_not_in_left := list(rset.difference(lset)):
+            raise_frame_assertion_error(
+                detail=f"{right_not_in_left} in right, but not in left",
+                left=lset,
+                right=rset,
+            )
+
+    if check_column_order and lnames != rnames:
+        raise_frame_assertion_error(
+            detail="columns are not in the same order", left=lnames, right=rnames
+        )
+
+    if check_dtypes:
+        ldtypes = lschema.dtypes()
+        if check_column_order:
+            rdtypes = rschema.dtypes()
+        else:
+            rdtypes = [rschema[col_name] for col_name in lnames]
+
+        if ldtypes != rdtypes:
+            raise_frame_assertion_error(
+                detail="dtypes do not match", left=ldtypes, right=rdtypes
+            )
+
+    return
diff --git a/narwhals/testing/asserts/utils.py b/narwhals/testing/asserts/utils.py
index 13720c2a35..1dd99ee5c0 100644
--- a/narwhals/testing/asserts/utils.py
+++ b/narwhals/testing/asserts/utils.py
@@ -43,3 +43,9 @@ def raise_series_assertion_error(
     detail: SeriesDetail, left: Any, right: Any, *, cause: Exception | None = None
 ) -> Never:
     raise_assertion_error("Series", detail, left, right, cause=cause)
+
+
+def raise_frame_assertion_error(
+    detail: str, left: Any, right: Any, *, cause: Exception | None = None
+) -> Never:
+    raise_assertion_error("DataFrames", detail, left, right, cause=cause)

From e12252c8a41ba0f77cbc0855495635878ab90ff6 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 16 Oct 2025 09:40:30 +0200
Subject: [PATCH 02/16] it should be it

---
 narwhals/testing/asserts/frame.py | 39 +++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index 3288813f02..74b70574a7 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -5,6 +5,7 @@
 from narwhals._utils import Implementation, qualified_type_name
 from narwhals.dataframe import DataFrame, LazyFrame
 from narwhals.dependencies import is_narwhals_dataframe, is_narwhals_lazyframe
+from narwhals.testing.asserts.series import assert_series_equal
 from narwhals.testing.asserts.utils import (
     raise_assertion_error,
     raise_frame_assertion_error,
@@ -133,6 +134,35 @@ def _assert_dataframe_equal(
     if left_len != right_len:
         raise_frame_assertion_error("height (row count) mismatch", left_len, right_len)
 
+    left_schema = left.schema
+    if (not check_row_order) or (impl not in GUARANTEES_ROW_ORDER):
+        # NOTE: Sort by all the non-nested dtypes columns.
+        # This might lead to wrong results
+        left_cols = [name for name, dtype in left_schema.items() if not dtype.is_nested()]
+
+        left = left.sort(left_cols)
+        right = right.sort(left_cols)
+
+    for col_name in left_schema.names():
+        _series_left = left.get_column(col_name)
+        _series_right = right.get_column(col_name)
+        try:
+            assert_series_equal(
+                _series_left,
+                _series_right,
+                check_dtypes=check_dtypes,
+                check_names=False,
+                check_order=True,
+                check_exact=check_exact,
+                rel_tol=rel_tol,
+                abs_tol=abs_tol,
+                categorical_as_str=categorical_as_str,
+            )
+        except AssertionError:
+            raise_frame_assertion_error(
+                f'value mismatch for column "{col_name}"', _series_left, _series_right
+            )
+
 
 def _check_schema_equal(
     left: DataFrameT, right: DataFrameT, *, check_dtypes: bool, check_column_order: bool
@@ -171,10 +201,11 @@ def _check_schema_equal(
 
     if check_dtypes:
         ldtypes = lschema.dtypes()
-        if check_column_order:
-            rdtypes = rschema.dtypes()
-        else:
-            rdtypes = [rschema[col_name] for col_name in lnames]
+        rdtypes = (
+            rschema.dtypes()
+            if check_column_order
+            else [rschema[col_name] for col_name in lnames]
+        )
 
         if ldtypes != rdtypes:
             raise_frame_assertion_error(

From d1094488095f58948855547afaf30d1fd4641ff5 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 16 Oct 2025 17:25:22 +0200
Subject: [PATCH 03/16] WIP: Unit test

---
 tests/testing/assert_frame_equal_test.py | 197 +++++++++++++++++++++++
 1 file changed, 197 insertions(+)
 create mode 100644 tests/testing/assert_frame_equal_test.py

diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
new file mode 100644
index 0000000000..ba7e76e85c
--- /dev/null
+++ b/tests/testing/assert_frame_equal_test.py
@@ -0,0 +1,197 @@
+from __future__ import annotations
+
+import re
+from contextlib import AbstractContextManager, nullcontext as does_not_raise
+from typing import TYPE_CHECKING, Any, Callable
+
+import pytest
+
+import narwhals as nw
+from narwhals.testing import assert_frame_equal
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from typing_extensions import TypeAlias
+
+    from narwhals.typing import IntoSchema
+    from tests.conftest import Data
+    from tests.utils import Constructor, ConstructorEager
+
+    SetupFn: TypeAlias = Callable[[nw.Series[Any]], tuple[nw.Series[Any], nw.Series[Any]]]
+
+
+def _assertion_error(detail: str) -> pytest.RaisesExc:
+    return pytest.raises(
+        AssertionError, match=re.escape(f"DataFrames are different ({detail})")
+    )
+
+
+def test_check_narwhals_objects(constructor: Constructor) -> None:
+    """Test that a type error is raised if the input is not a Narwhals object."""
+    frame = constructor({"a": [1, 2, 3]})
+    msg = re.escape(
+        "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found"
+    )
+    with pytest.raises(TypeError, match=msg):
+        assert_frame_equal(frame, frame)  # type: ignore[type-var]
+
+
+def test_implementation_mismatch() -> None:
+    """Test that different implementations raise an error."""
+    pytest.importorskip("pandas")
+    pytest.importorskip("pyarrow")
+
+    import pandas as pd
+    import pyarrow as pa
+
+    with _assertion_error("implementation mismatch"):
+        assert_frame_equal(
+            nw.from_native(pd.DataFrame({"a": [1]})),
+            nw.from_native(pa.table({"a": [1]})),  # type: ignore[type-var] # pyright: ignore[reportArgumentType]
+        )
+
+
+def test_check_same_input_type(constructor_eager: ConstructorEager) -> None:
+    """Test that left and right frames are either both eager or both lazy.
+
+    NOTE: Use `constructor_eager` instead of `constructor` so that the roundtrip
+        `.lazy().collect()` preserves the same implementation (and we raise the check after)
+    """
+    frame = nw.from_native(constructor_eager({"a": [1, 2, 3]}))
+
+    msg = re.escape("inputs are different (unexpected input types)")
+    with pytest.raises(AssertionError, match=msg):
+        assert_frame_equal(frame.lazy(), frame.lazy().collect())  # type: ignore[type-var]
+
+
+@pytest.mark.parametrize(
+    ("left_schema", "right_schema", "check_dtypes", "check_column_order", "context"),
+    [
+        # Same order, same dtypes
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"a": nw.Int32(), "b": nw.Float32()},
+            True,
+            True,
+            does_not_raise(),
+        ),
+        # Same order, different dtypes
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"a": nw.Int32(), "b": nw.Float64()},
+            False,
+            True,
+            does_not_raise(),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"a": nw.Int32(), "b": nw.Float64()},
+            True,
+            True,
+            _assertion_error("dtypes do not match"),
+        ),
+        # Different order, same dtype
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float32(), "a": nw.Int32()},
+            True,
+            False,
+            does_not_raise(),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float32(), "a": nw.Int32()},
+            True,
+            True,
+            _assertion_error("columns are not in the same order"),
+        ),
+        # Different order, different dtype
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            False,
+            False,
+            does_not_raise(),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            True,
+            False,
+            _assertion_error("dtypes do not match"),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            False,
+            True,
+            _assertion_error("columns are not in the same order"),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            True,
+            True,
+            _assertion_error("columns are not in the same order"),
+        ),
+        # Different columns (left not in right)
+        (
+            {"a": nw.Int32(), "z": nw.String()},
+            {"b": nw.Float64()},
+            True,
+            True,
+            _assertion_error("['a', 'z'] in left, but not in right"),
+        ),
+        # Different columns (right not in left)
+        (
+            {"z": nw.String()},
+            {"z": nw.String(), "b": nw.Float64()},
+            True,
+            True,
+            _assertion_error("['b'] in right, but not in left"),
+        ),
+    ],
+)
+def test_check_schema_mismatch(
+    constructor: Constructor,
+    left_schema: IntoSchema,
+    right_schema: IntoSchema,
+    *,
+    check_dtypes: bool,
+    check_column_order: bool,
+    context: AbstractContextManager[Any],
+) -> None:
+    data = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["foo", "bar", "baz"]}
+    left = nw.from_native(constructor(data)).select(
+        nw.col(name).cast(dtype) for name, dtype in left_schema.items()
+    )
+    right = nw.from_native(constructor(data)).select(
+        nw.col(name).cast(dtype) for name, dtype in right_schema.items()
+    )
+
+    with context:
+        assert_frame_equal(
+            left, right, check_column_order=check_column_order, check_dtypes=check_dtypes
+        )
+
+
+def test_height_mismatch(constructor: Constructor) -> None: ...
+
+
+def test_columns_mismatch(constructor: Constructor) -> None: ...
+
+
+def test_self_equal(constructor: Constructor, data: Data) -> None:
+    """Test that a dataframe is equal to itself, including nested dtypes with nulls.
+
+    We are dropping columns which type is unsupported by _some_ backend.
+    """
+    cols_to_drop = ("categorical", "enum", "duration", "struct", "time")
+
+    if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2):  # pragma: no cover
+        reason = "Pandas too old for nested dtypes"
+        pytest.skip(reason=reason)
+
+    _data = {k: v for k, v in data.items() if k not in cols_to_drop}
+    df = nw.from_native(constructor(_data))
+    assert_frame_equal(df, df)  # type: ignore[type-var]

From db761a882fd82f02c2de443ad2adcb834ac87123 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 16 Oct 2025 17:46:19 +0200
Subject: [PATCH 04/16] arguably improve typing(?), raise if only nested dtypes
 are available

---
 narwhals/testing/asserts/frame.py        | 21 +++++++++++++--------
 tests/testing/assert_frame_equal_test.py |  8 ++++----
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index 74b70574a7..af7de28db5 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -12,7 +12,7 @@
 )
 
 if TYPE_CHECKING:
-    from narwhals.typing import DataFrameT, FrameT
+    from narwhals.typing import DataFrameT, LazyFrameT
 
 GUARANTEES_ROW_ORDER = {
     Implementation.PANDAS,
@@ -25,8 +25,8 @@
 
 
 def assert_frame_equal(
-    left: FrameT,
-    right: FrameT,
+    left: DataFrameT | LazyFrameT,
+    right: DataFrameT | LazyFrameT,
     *,
     check_row_order: bool = True,
     check_column_order: bool = True,
@@ -95,7 +95,7 @@ def assert_frame_equal(
 
 
 def _check_correct_input_type(  # noqa: RET503
-    left: DataFrame[Any] | LazyFrame[Any], right: DataFrame[Any] | LazyFrame[Any]
+    left: DataFrameT | LazyFrameT, right: DataFrameT | LazyFrameT
 ) -> tuple[DataFrame[Any], DataFrame[Any]]:
     # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/py-polars/src/polars/testing/asserts/frame.py#L15-L17
     if isinstance(left, DataFrame) and isinstance(right, DataFrame):
@@ -137,11 +137,16 @@ def _assert_dataframe_equal(
     left_schema = left.schema
     if (not check_row_order) or (impl not in GUARANTEES_ROW_ORDER):
         # NOTE: Sort by all the non-nested dtypes columns.
-        # This might lead to wrong results
-        left_cols = [name for name, dtype in left_schema.items() if not dtype.is_nested()]
+        # ! This might lead to wrong results.
+        # If only nested dtypes are available, then we raise an exception.
+        sort_by = [name for name, dtype in left_schema.items() if not dtype.is_nested()]
 
-        left = left.sort(left_cols)
-        right = right.sort(left_cols)
+        if not sort_by:
+            msg = "`check_row_order=False` is not supported (yet) with only nested data type."
+            raise NotImplementedError(msg)
+
+        left = left.sort(sort_by)
+        right = right.sort(sort_by)
 
     for col_name in left_schema.names():
         _series_left = left.get_column(col_name)
diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
index ba7e76e85c..cd8ee1c22d 100644
--- a/tests/testing/assert_frame_equal_test.py
+++ b/tests/testing/assert_frame_equal_test.py
@@ -33,7 +33,7 @@ def test_check_narwhals_objects(constructor: Constructor) -> None:
         "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found"
     )
     with pytest.raises(TypeError, match=msg):
-        assert_frame_equal(frame, frame)  # type: ignore[type-var]
+        assert_frame_equal(frame, frame)  # type: ignore[arg-type]
 
 
 def test_implementation_mismatch() -> None:
@@ -61,7 +61,7 @@ def test_check_same_input_type(constructor_eager: ConstructorEager) -> None:
 
     msg = re.escape("inputs are different (unexpected input types)")
     with pytest.raises(AssertionError, match=msg):
-        assert_frame_equal(frame.lazy(), frame.lazy().collect())  # type: ignore[type-var]
+        assert_frame_equal(frame.lazy(), frame.lazy().collect())
 
 
 @pytest.mark.parametrize(
@@ -178,7 +178,7 @@ def test_check_schema_mismatch(
 def test_height_mismatch(constructor: Constructor) -> None: ...
 
 
-def test_columns_mismatch(constructor: Constructor) -> None: ...
+def test_series_mismatch(constructor: Constructor) -> None: ...
 
 
 def test_self_equal(constructor: Constructor, data: Data) -> None:
@@ -194,4 +194,4 @@ def test_self_equal(constructor: Constructor, data: Data) -> None:
 
     _data = {k: v for k, v in data.items() if k not in cols_to_drop}
     df = nw.from_native(constructor(_data))
-    assert_frame_equal(df, df)  # type: ignore[type-var]
+    assert_frame_equal(df, df)

From 748b971adf771aafc96d8e44ad463731248d8e95 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 17 Oct 2025 16:53:45 +0200
Subject: [PATCH 05/16] almost there with the testing

---
 narwhals/testing/asserts/frame.py        |  4 +-
 tests/testing/assert_frame_equal_test.py | 60 ++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index af7de28db5..0675c6a444 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -186,13 +186,13 @@ def _check_schema_equal(
     lset, rset = set(lnames), set(rnames)
 
     if lset != rset:
-        if left_not_in_right := list(lset.difference(rset)):
+        if left_not_in_right := sorted(lset.difference(rset)):
             raise_frame_assertion_error(
                 detail=f"{left_not_in_right} in left, but not in right",
                 left=lset,
                 right=rset,
             )
-        if right_not_in_left := list(rset.difference(lset)):
+        if right_not_in_left := sorted(rset.difference(lset)):
             raise_frame_assertion_error(
                 detail=f"{right_not_in_left} in right, but not in left",
                 left=lset,
diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
index cd8ee1c22d..972b2a00a6 100644
--- a/tests/testing/assert_frame_equal_test.py
+++ b/tests/testing/assert_frame_equal_test.py
@@ -8,6 +8,7 @@
 
 import narwhals as nw
 from narwhals.testing import assert_frame_equal
+from narwhals.testing.asserts.frame import GUARANTEES_ROW_ORDER
 from tests.utils import PANDAS_VERSION
 
 if TYPE_CHECKING:
@@ -21,9 +22,8 @@
 
 
 def _assertion_error(detail: str) -> pytest.RaisesExc:
-    return pytest.raises(
-        AssertionError, match=re.escape(f"DataFrames are different ({detail})")
-    )
+    msg = f"DataFrames are different ({detail})"
+    return pytest.raises(AssertionError, match=re.escape(msg))
 
 
 def test_check_narwhals_objects(constructor: Constructor) -> None:
@@ -175,10 +175,60 @@ def test_check_schema_mismatch(
         )
 
 
-def test_height_mismatch(constructor: Constructor) -> None: ...
+def test_height_mismatch(constructor: Constructor) -> None:
+    left = nw.from_native(constructor({"a": [1, 2, 3]}))
+    right = nw.from_native(constructor({"a": [1, 3]}))
+
+    with _assertion_error("height (row count) mismatch"):
+        assert_frame_equal(left, right)
+
+
+@pytest.mark.parametrize("check_row_order", [True, False])
+def test_check_row_order(
+    constructor: Constructor, request: pytest.FixtureRequest, *, check_row_order: bool
+) -> None:
+    if "dask" in str(constructor):
+        reason = "Unsupported List type"
+        request.applymarker(pytest.mark.xfail(reason=reason))
+
+    data = {"a": [1, 2], "b": [["x", "y"], ["x", "z"]]}
+
+    b_expr = nw.col("b").cast(nw.List(nw.String()))
+    left = (
+        nw.from_native(constructor(data)).with_columns(b_expr).sort("a", descending=False)
+    )
+    right = (
+        nw.from_native(constructor(data)).with_columns(b_expr).sort("a", descending=True)
+    )
+
+    context = (
+        _assertion_error('value mismatch for column "a"')
+        if check_row_order and left.implementation in GUARANTEES_ROW_ORDER
+        else does_not_raise()
+    )
+
+    with context:
+        assert_frame_equal(left, right, check_row_order=check_row_order)
+
+
+def test_check_row_order_nested_only(
+    constructor: Constructor, request: pytest.FixtureRequest
+) -> None:
+    if "dask" in str(constructor):
+        reason = "Unsupported List type"
+        request.applymarker(pytest.mark.xfail(reason=reason))
+
+    data = {"b": [["x", "y"], ["x", "z"]]}
+
+    b_expr = nw.col("b").cast(nw.List(nw.String()))
+    left = nw.from_native(constructor(data)).select(b_expr)
+
+    msg = "`check_row_order=False` is not supported (yet) with only nested data type."
+    with pytest.raises(NotImplementedError, match=re.escape(msg)):
+        assert_frame_equal(left, left, check_row_order=False)
 
 
-def test_series_mismatch(constructor: Constructor) -> None: ...
+def test_values_mismatch(constructor: Constructor) -> None: ...
 
 
 def test_self_equal(constructor: Constructor, data: Data) -> None:

From bbf7a2fe9eeccc6ddb0ba25c75e938eae3058e9f Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sat, 18 Oct 2025 12:29:26 +0200
Subject: [PATCH 06/16] Docstrings, more comments, docs

---
 docs/api-reference/testing.md            |  1 +
 narwhals/testing/asserts/frame.py        | 54 ++++++++++++++++++++++--
 narwhals/testing/asserts/utils.py        | 34 ++++++++++++---
 tests/testing/assert_frame_equal_test.py |  2 +
 4 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/docs/api-reference/testing.md b/docs/api-reference/testing.md
index e73fb4d447..db83c6930e 100644
--- a/docs/api-reference/testing.md
+++ b/docs/api-reference/testing.md
@@ -4,4 +4,5 @@
     handler: python
     options:
       members:
+        - assert_frame_equal
         - assert_series_equal
diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index 0675c6a444..94a9fbced1 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -41,11 +41,16 @@ def assert_frame_equal(
     Raises a detailed `AssertionError` if the frames differ.
     This function is intended for use in unit tests.
 
+    Notes:
+        In the case of backends that do not guarantee the row order, such as DuckDB, Ibis,
+        PySpark, and SQLFrame, `check_row_order` argument is ignored and the comparands
+        are sorted by all the columns regardless.
+
     Arguments:
         left: The first DataFrame or LazyFrame to compare.
         right: The second DataFrame or LazyFrame to compare.
         check_row_order: Requires row order to match. This flag is ignored for backends
-            that do not guarantee row order such as DuckDB, Ibis, PySpark, SQLFrame
+            that do not guarantee row order such as DuckDB, Ibis, PySpark, SQLFrame.
         check_column_order: Requires column order to match.
         check_dtypes: Requires data types to match.
         check_exact: Requires float values to match exactly. If set to `False`, values are
@@ -57,7 +62,44 @@ def assert_frame_equal(
             Enabling this helps compare columns that do not share the same string cache.
 
     Examples:
-        TODO(FBruzzesi): ...
+        >>> import duckdb
+        >>> import narwhals as nw
+        >>> from narwhals.testing import assert_frame_equal
+        >>>
+        >>> left_native = duckdb.sql("SELECT * FROM VALUES (1, ), (2, ), (3, ) df(a)")
+        >>> right_native = duckdb.sql("SELECT * FROM VALUES (1, ), (5, ), (3, ) df(a)")
+        >>> left = nw.from_native(left_native)
+        >>> right = nw.from_native(right_native)
+        >>> assert_frame_equal(left, right)  # doctest: +ELLIPSIS
+        Traceback (most recent call last):
+            ...
+        AssertionError: DataFrames are different (value mismatch for column "a")
+        [left]:
+        ┌────────────────────────────────────────────────┐
+        |                Narwhals Series                 |
+        |------------------------------------------------|
+        |<pyarrow.lib.ChunkedArray object at ...
+        |[                                               |
+        |  [                                             |
+        |    1,                                          |
+        |    2,                                          |
+        |    3                                           |
+        |  ]                                             |
+        |]                                               |
+        └────────────────────────────────────────────────┘
+        [right]:
+        ┌────────────────────────────────────────────────┐
+        |                Narwhals Series                 |
+        |------------------------------------------------|
+        |<pyarrow.lib.ChunkedArray object at ...
+        |[                                               |
+        |  [                                             |
+        |    1,                                          |
+        |    3,                                          |
+        |    5                                           |
+        |  ]                                             |
+        |]                                               |
+        └────────────────────────────────────────────────┘
     """
     __tracebackhide__ = True
 
@@ -126,6 +168,8 @@ def _assert_dataframe_equal(
     categorical_as_str: bool,
 ) -> None:
     # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/crates/polars-testing/src/asserts/utils.rs#L829
+    # NOTE: Here `impl` comes from the original dataframe, not the `.collect`-ed one, and
+    # it's used to distinguish between backends that do and do not guarantee row order.
     _check_schema_equal(
         left, right, check_dtypes=check_dtypes, check_column_order=check_column_order
     )
@@ -155,7 +199,7 @@ def _assert_dataframe_equal(
             assert_series_equal(
                 _series_left,
                 _series_right,
-                check_dtypes=check_dtypes,
+                check_dtypes=False,
                 check_names=False,
                 check_order=True,
                 check_exact=check_exact,
@@ -192,7 +236,9 @@ def _check_schema_equal(
                 left=lset,
                 right=rset,
             )
-        if right_not_in_left := sorted(rset.difference(lset)):
+        if right_not_in_left := sorted(rset.difference(lset)):  # pragma: no cover
+            # NOTE: the `pragma: no cover` flag is due to a false negative.
+            # The last test in `test_check_schema_mismatch` does cover this case.
             raise_frame_assertion_error(
                 detail=f"{right_not_in_left} in right, but not in left",
                 left=lset,
diff --git a/narwhals/testing/asserts/utils.py b/narwhals/testing/asserts/utils.py
index 1dd99ee5c0..42ba29ba0e 100644
--- a/narwhals/testing/asserts/utils.py
+++ b/narwhals/testing/asserts/utils.py
@@ -1,3 +1,4 @@
+# ruff: noqa: PYI051
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any, Literal
@@ -7,23 +8,42 @@
 if TYPE_CHECKING:
     from typing_extensions import Never, TypeAlias
 
-# NOTE: This alias is created to facilitate autocomplete. Feel free to extend it as
-# you please when adding a new feature.
+# NOTE: These aliases are created to facilitate autocompletion.
+# Feel free to extend them as you please when adding new features.
 # See: https://github.com/narwhals-dev/narwhals/pull/2983#discussion_r2337548736
+ObjectName: TypeAlias = Literal["inputs", "Series", "DataFrames"]
 SeriesDetail: TypeAlias = Literal[
+    "dtype mismatch",
+    "exact value mismatch",
     "implementation mismatch",
     "length mismatch",
-    "dtype mismatch",
     "name mismatch",
+    "nested value mismatch",
     "null value mismatch",
-    "exact value mismatch",
     "values not within tolerance",
-    "nested value mismatch",
 ]
+DataFramesDetail: TypeAlias = (
+    Literal[
+        "columns are not in the same order",
+        "dtypes do not match",
+        "height (row count) mismatch",
+        "implementation mismatch",
+    ]
+    | str
+    # NOTE: `| str` makes # This makes the literals above redundant, but they still show
+    # up when typing as autocompletion.
+    # The reason to have `str` is due to the fact that other details are dynamic
+    # and depend upon which columns lead to the assertion error.
+)
 
 
 def raise_assertion_error(
-    objects: str, detail: str, left: Any, right: Any, *, cause: Exception | None = None
+    objects: ObjectName,
+    detail: str,
+    left: Any,
+    right: Any,
+    *,
+    cause: Exception | None = None,
 ) -> Never:
     """Raise a detailed assertion error."""
     __tracebackhide__ = True
@@ -46,6 +66,6 @@ def raise_series_assertion_error(
 
 
 def raise_frame_assertion_error(
-    detail: str, left: Any, right: Any, *, cause: Exception | None = None
+    detail: DataFramesDetail, left: Any, right: Any, *, cause: Exception | None = None
 ) -> Never:
     raise_assertion_error("DataFrames", detail, left, right, cause=cause)
diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
index 972b2a00a6..05e5a722af 100644
--- a/tests/testing/assert_frame_equal_test.py
+++ b/tests/testing/assert_frame_equal_test.py
@@ -228,6 +228,8 @@ def test_check_row_order_nested_only(
         assert_frame_equal(left, left, check_row_order=False)
 
 
+# TODO(FBruzzesi): Do we even this? What should we check?
+# The best way could be to migrate away from `assert_equal_data` in the test suite
 def test_values_mismatch(constructor: Constructor) -> None: ...
 
 

From 9417b60b9e1c87e9142aec31c94649d989f1295e Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sat, 18 Oct 2025 12:38:11 +0200
Subject: [PATCH 07/16] try within 'if TYPE_CHECKING'

---
 narwhals/testing/asserts/utils.py | 50 +++++++++++++++----------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/narwhals/testing/asserts/utils.py b/narwhals/testing/asserts/utils.py
index 42ba29ba0e..6f38af95d9 100644
--- a/narwhals/testing/asserts/utils.py
+++ b/narwhals/testing/asserts/utils.py
@@ -8,33 +8,33 @@
 if TYPE_CHECKING:
     from typing_extensions import Never, TypeAlias
 
-# NOTE: These aliases are created to facilitate autocompletion.
-# Feel free to extend them as you please when adding new features.
-# See: https://github.com/narwhals-dev/narwhals/pull/2983#discussion_r2337548736
-ObjectName: TypeAlias = Literal["inputs", "Series", "DataFrames"]
-SeriesDetail: TypeAlias = Literal[
-    "dtype mismatch",
-    "exact value mismatch",
-    "implementation mismatch",
-    "length mismatch",
-    "name mismatch",
-    "nested value mismatch",
-    "null value mismatch",
-    "values not within tolerance",
-]
-DataFramesDetail: TypeAlias = (
-    Literal[
-        "columns are not in the same order",
-        "dtypes do not match",
-        "height (row count) mismatch",
+    # NOTE: These aliases are created to facilitate autocompletion.
+    # Feel free to extend them as you please when adding new features.
+    # See: https://github.com/narwhals-dev/narwhals/pull/2983#discussion_r2337548736
+    ObjectName: TypeAlias = Literal["inputs", "Series", "DataFrames"]
+    SeriesDetail: TypeAlias = Literal[
+        "dtype mismatch",
+        "exact value mismatch",
         "implementation mismatch",
+        "length mismatch",
+        "name mismatch",
+        "nested value mismatch",
+        "null value mismatch",
+        "values not within tolerance",
     ]
-    | str
-    # NOTE: `| str` makes # This makes the literals above redundant, but they still show
-    # up when typing as autocompletion.
-    # The reason to have `str` is due to the fact that other details are dynamic
-    # and depend upon which columns lead to the assertion error.
-)
+    DataFramesDetail: TypeAlias = (
+        Literal[
+            "columns are not in the same order",
+            "dtypes do not match",
+            "height (row count) mismatch",
+            "implementation mismatch",
+        ]
+        | str
+        # NOTE: `| str` makes the literals above redundant, but they still show
+        # up when typing as autocompletion.
+        # The reason to have `str` is due to the fact that other details are dynamic
+        # and depend upon which columns lead to the assertion error.
+    )
 
 
 def raise_assertion_error(

From e2a7c7c138b6a898de234aaa303ef6dd449a62da Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sat, 18 Oct 2025 12:43:26 +0200
Subject: [PATCH 08/16] skip old pandas nested dtypes

---
 tests/testing/assert_frame_equal_test.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
index 05e5a722af..4b6b2a340e 100644
--- a/tests/testing/assert_frame_equal_test.py
+++ b/tests/testing/assert_frame_equal_test.py
@@ -187,6 +187,10 @@ def test_height_mismatch(constructor: Constructor) -> None:
 def test_check_row_order(
     constructor: Constructor, request: pytest.FixtureRequest, *, check_row_order: bool
 ) -> None:
+    if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2):  # pragma: no cover
+        reason = "Pandas too old for nested dtypes"
+        pytest.skip(reason=reason)
+
     if "dask" in str(constructor):
         reason = "Unsupported List type"
         request.applymarker(pytest.mark.xfail(reason=reason))
@@ -214,6 +218,10 @@ def test_check_row_order(
 def test_check_row_order_nested_only(
     constructor: Constructor, request: pytest.FixtureRequest
 ) -> None:
+    if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2):  # pragma: no cover
+        reason = "Pandas too old for nested dtypes"
+        pytest.skip(reason=reason)
+
     if "dask" in str(constructor):
         reason = "Unsupported List type"
         request.applymarker(pytest.mark.xfail(reason=reason))

From d20fdc4b6b8e8f589ad94ac6c8283f4a0111378f Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Sat, 18 Oct 2025 13:06:38 +0200
Subject: [PATCH 09/16] add missing backtick, improve comment

---
 narwhals/testing/asserts/frame.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index 94a9fbced1..61a7eaa10c 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -111,7 +111,7 @@ def assert_frame_equal(
             "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found:\n"
             f"[left]: {qualified_type_name(type(left))}\n"
             f"[right]: {qualified_type_name(type(right))}\n\n"
-            "Hint: Use `nw.from_native(obj, allow_series=False) to convert each native "
+            "Hint: Use `nw.from_native(obj, allow_series=False)` to convert each native "
             "object into a `narwhals.DataFrame` or `narwhals.LazyFrame` first."
         )
         raise TypeError(msg)
@@ -177,15 +177,18 @@ def _assert_dataframe_equal(
     left_len, right_len = len(left), len(right)
     if left_len != right_len:
         raise_frame_assertion_error("height (row count) mismatch", left_len, right_len)
+    # TODO(FBruzzesi): Should we return early if row count is zero?
 
     left_schema = left.schema
     if (not check_row_order) or (impl not in GUARANTEES_ROW_ORDER):
         # NOTE: Sort by all the non-nested dtypes columns.
-        # ! This might lead to wrong results.
-        # If only nested dtypes are available, then we raise an exception.
+        # See: https://github.com/narwhals-dev/narwhals/issues/2939
+        # ! This might lead to wrong results if there are duplicate values in the sorting
+        # columns as the final order might still be non fully deterministic.
         sort_by = [name for name, dtype in left_schema.items() if not dtype.is_nested()]
 
         if not sort_by:
+            # If only nested dtypes are available, then we raise an exception.
             msg = "`check_row_order=False` is not supported (yet) with only nested data type."
             raise NotImplementedError(msg)
 
@@ -262,5 +265,3 @@ def _check_schema_equal(
             raise_frame_assertion_error(
                 detail="dtypes do not match", left=ldtypes, right=rdtypes
             )
-
-    return

From d3d4ed841b860b6da1a2b3e0cbce7b4530ebfea7 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 23 Oct 2025 12:30:03 +0200
Subject: [PATCH 10/16] ci: Test fairlearn using pytest marker

---
 .github/workflows/downstream_tests.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index a18d60ffc0..7e47d7c909 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -625,6 +625,7 @@ jobs:
       - name: install-deps
         run: |
           cd fairlearn
+          # TODO(FBruzzesi): Align with fairlearn team to get a minimal requirement to test narwhals features
           uv pip install -e . -r requirements.txt matplotlib polars pyarrow pytest typing-extensions --system
       - name: install-narwhals-dev
         run: |
@@ -635,6 +636,4 @@ jobs:
       - name: run-pytest
         run: |
           cd fairlearn
-          # TODO(FBruzzesi): I hope this will be simplified once there is a decision on
-          # https://github.com/fairlearn/fairlearn/issues/1555
-          pytest test/unit/preprocessing test/unit/metrics
+          pytest test/unit -m narwhals

From 149c9c600e29e1739d5129d8846bc9b1aebdfe7f Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 23 Oct 2025 15:13:05 +0200
Subject: [PATCH 11/16] use uv run

---
 .github/workflows/downstream_tests.yml | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index 7e47d7c909..44f7c2f35e 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -622,18 +622,17 @@ jobs:
           git clone https://github.com/fairlearn/fairlearn.git --depth 1
           cd fairlearn
           git log
-      - name: install-deps
+      - name: run pytest
         run: |
           cd fairlearn
           # TODO(FBruzzesi): Align with fairlearn team to get a minimal requirement to test narwhals features
-          uv pip install -e . -r requirements.txt matplotlib polars pyarrow pytest typing-extensions --system
-      - name: install-narwhals-dev
-        run: |
-          cd fairlearn
-          uv pip uninstall narwhals --system
-          uv pip install -e ./.. --system
-          uv pip freeze
-      - name: run-pytest
-        run: |
-          cd fairlearn
-          pytest test/unit -m narwhals
+          uv run \
+            --with . \
+            --with-requirements requirements.txt \
+            --with matplotlib \
+            --with polars \
+            --with pyarrow \
+            --with pytest \
+            --with typing-extensions \
+            --with ./.. \
+            pytest test/unit -m narwhals

From a27931f3025e7508c66736c6d3520325b4d57fb9 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 24 Oct 2025 09:25:01 +0200
Subject: [PATCH 12/16] one more try

---
 .github/workflows/downstream_tests.yml | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index 44f7c2f35e..b3116ca0d2 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -627,12 +627,10 @@ jobs:
           cd fairlearn
           # TODO(FBruzzesi): Align with fairlearn team to get a minimal requirement to test narwhals features
           uv run \
-            --with . \
-            --with-requirements requirements.txt \
-            --with matplotlib \
-            --with polars \
-            --with pyarrow \
-            --with pytest \
-            --with typing-extensions \
-            --with ./.. \
-            pytest test/unit -m narwhals
+          --with . \
+          --with-requirements requirements.txt \
+          --with matplotlib \
+          --with pytest \
+          --with typing-extensions \
+          --with "./..[polars,pyarrow]"" \
+          pytest test/unit -m narwhals

From c3046b050b9e328e31865a95f8e8e88335dd9d83 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 24 Oct 2025 09:25:13 +0200
Subject: [PATCH 13/16] one more try

---
 .github/workflows/downstream_tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index b3116ca0d2..0708bb9e7c 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -632,5 +632,5 @@ jobs:
           --with matplotlib \
           --with pytest \
           --with typing-extensions \
-          --with "./..[polars,pyarrow]"" \
-          pytest test/unit -m narwhals
+          --with "./..[polars,pyarrow]" \
+          pytest test/unit -m "narwhals"

From 6d9528b820c339d6e262674d2548c23f8b72be82 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 24 Oct 2025 09:38:29 +0200
Subject: [PATCH 14/16] ok use system py

---
 .github/workflows/downstream_tests.yml | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index 0708bb9e7c..107b881eec 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -622,15 +622,18 @@ jobs:
           git clone https://github.com/fairlearn/fairlearn.git --depth 1
           cd fairlearn
           git log
-      - name: run pytest
+      - name: install-deps
         run: |
           cd fairlearn
           # TODO(FBruzzesi): Align with fairlearn team to get a minimal requirement to test narwhals features
-          uv run \
-          --with . \
-          --with-requirements requirements.txt \
-          --with matplotlib \
-          --with pytest \
-          --with typing-extensions \
-          --with "./..[polars,pyarrow]" \
+          uv pip install -e . -r requirements.txt matplotlib polars pyarrow pytest typing-extensions --system
+      - name: install-narwhals-dev
+        run: |
+          cd fairlearn
+          uv pip uninstall narwhals --system
+          uv pip install -e ./.. --system
+          uv pip freeze
+      - name: run-pytest
+        run: |
+          cd fairlearn
           pytest test/unit -m "narwhals"

From 1672b53947b975019defb4cb6cabfef436c10172 Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Fri, 24 Oct 2025 12:38:40 +0200
Subject: [PATCH 15/16] something went wrong in merging

---
 .github/workflows/downstream_tests.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
index 107b881eec..a18d60ffc0 100644
--- a/.github/workflows/downstream_tests.yml
+++ b/.github/workflows/downstream_tests.yml
@@ -625,7 +625,6 @@ jobs:
       - name: install-deps
         run: |
           cd fairlearn
-          # TODO(FBruzzesi): Align with fairlearn team to get a minimal requirement to test narwhals features
           uv pip install -e . -r requirements.txt matplotlib polars pyarrow pytest typing-extensions --system
       - name: install-narwhals-dev
         run: |
@@ -636,4 +635,6 @@ jobs:
       - name: run-pytest
         run: |
           cd fairlearn
-          pytest test/unit -m "narwhals"
+          # TODO(FBruzzesi): I hope this will be simplified once there is a decision on
+          # https://github.com/fairlearn/fairlearn/issues/1555
+          pytest test/unit/preprocessing test/unit/metrics

From 253f3ad223c5ce77cd8e061e7e41dd268dc279ae Mon Sep 17 00:00:00 2001
From: FBruzzesi <francesco.bruzzesi.93@gmail.com>
Date: Thu, 30 Oct 2025 14:40:00 +0100
Subject: [PATCH 16/16] fix docstrings

---
 narwhals/testing/asserts/frame.py | 52 +++++++++++++++----------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
index 61a7eaa10c..3062053fdc 100644
--- a/narwhals/testing/asserts/frame.py
+++ b/narwhals/testing/asserts/frame.py
@@ -62,12 +62,12 @@ def assert_frame_equal(
             Enabling this helps compare columns that do not share the same string cache.
 
     Examples:
-        >>> import duckdb
+        >>> import polars as pl
         >>> import narwhals as nw
         >>> from narwhals.testing import assert_frame_equal
         >>>
-        >>> left_native = duckdb.sql("SELECT * FROM VALUES (1, ), (2, ), (3, ) df(a)")
-        >>> right_native = duckdb.sql("SELECT * FROM VALUES (1, ), (5, ), (3, ) df(a)")
+        >>> left_native = pl.LazyFrame({"a": [1, 2, 3]})
+        >>> right_native = pl.LazyFrame({"a": [1, 5, 3]})
         >>> left = nw.from_native(left_native)
         >>> right = nw.from_native(right_native)
         >>> assert_frame_equal(left, right)  # doctest: +ELLIPSIS
@@ -75,31 +75,29 @@ def assert_frame_equal(
             ...
         AssertionError: DataFrames are different (value mismatch for column "a")
         [left]:
-        ┌────────────────────────────────────────────────┐
-        |                Narwhals Series                 |
-        |------------------------------------------------|
-        |<pyarrow.lib.ChunkedArray object at ...
-        |[                                               |
-        |  [                                             |
-        |    1,                                          |
-        |    2,                                          |
-        |    3                                           |
-        |  ]                                             |
-        |]                                               |
-        └────────────────────────────────────────────────┘
+        ┌─────────────────┐
+        | Narwhals Series |
+        |-----------------|
+        |shape: (3,)      |
+        |Series: 'a' [i64]|
+        |[                |
+        |        1        |
+        |        2        |
+        |        3        |
+        |]                |
+        └─────────────────┘
         [right]:
-        ┌────────────────────────────────────────────────┐
-        |                Narwhals Series                 |
-        |------------------------------------------------|
-        |<pyarrow.lib.ChunkedArray object at ...
-        |[                                               |
-        |  [                                             |
-        |    1,                                          |
-        |    3,                                          |
-        |    5                                           |
-        |  ]                                             |
-        |]                                               |
-        └────────────────────────────────────────────────┘
+        ┌─────────────────┐
+        | Narwhals Series |
+        |-----------------|
+        |shape: (3,)      |
+        |Series: 'a' [i64]|
+        |[                |
+        |        1        |
+        |        5        |
+        |        3        |
+        |]                |
+        └─────────────────┘
     """
     __tracebackhide__ = True