diff --git a/docs/api-reference/testing.md b/docs/api-reference/testing.md
index e73fb4d447..db83c6930e 100644
--- a/docs/api-reference/testing.md
+++ b/docs/api-reference/testing.md
@@ -4,4 +4,5 @@
     handler: python
     options:
       members:
+        - assert_frame_equal
         - assert_series_equal
diff --git a/narwhals/testing/__init__.py b/narwhals/testing/__init__.py
index 6bbab67b64..649463383f 100644
--- a/narwhals/testing/__init__.py
+++ b/narwhals/testing/__init__.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from narwhals.testing.asserts.frame import assert_frame_equal
 from narwhals.testing.asserts.series import assert_series_equal
 
-__all__ = ("assert_series_equal",)
+__all__ = ("assert_frame_equal", "assert_series_equal")
diff --git a/narwhals/testing/asserts/frame.py b/narwhals/testing/asserts/frame.py
new file mode 100644
index 0000000000..3062053fdc
--- /dev/null
+++ b/narwhals/testing/asserts/frame.py
@@ -0,0 +1,265 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from narwhals._utils import Implementation, qualified_type_name
+from narwhals.dataframe import DataFrame, LazyFrame
+from narwhals.dependencies import is_narwhals_dataframe, is_narwhals_lazyframe
+from narwhals.testing.asserts.series import assert_series_equal
+from narwhals.testing.asserts.utils import (
+    raise_assertion_error,
+    raise_frame_assertion_error,
+)
+
+if TYPE_CHECKING:
+    from narwhals.typing import DataFrameT, LazyFrameT
+
+GUARANTEES_ROW_ORDER = {
+    Implementation.PANDAS,
+    Implementation.MODIN,
+    Implementation.CUDF,
+    Implementation.PYARROW,
+    Implementation.POLARS,
+    Implementation.DASK,
+}
+
+
+def assert_frame_equal(
+    left: DataFrameT | LazyFrameT,
+    right: DataFrameT | LazyFrameT,
+    *,
+    check_row_order: bool = True,
+    check_column_order: bool = True,
+    check_dtypes: bool = True,
+    check_exact: bool = False,
+    rel_tol: float = 1e-5,
+    abs_tol: float = 1e-8,
+    categorical_as_str: bool = False,
+) -> None:
+    """Assert that the left and right frames are equal.
+
+    Raises a detailed `AssertionError` if the frames differ.
+    This function is intended for use in unit tests.
+
+    Notes:
+        In the case of backends that do not guarantee the row order, such as DuckDB, Ibis,
+        PySpark, and SQLFrame, `check_row_order` argument is ignored and the comparands
+        are sorted by all the columns regardless.
+
+    Arguments:
+        left: The first DataFrame or LazyFrame to compare.
+        right: The second DataFrame or LazyFrame to compare.
+        check_row_order: Requires row order to match. This flag is ignored for backends
+            that do not guarantee row order such as DuckDB, Ibis, PySpark, SQLFrame.
+        check_column_order: Requires column order to match.
+        check_dtypes: Requires data types to match.
+        check_exact: Requires float values to match exactly. If set to `False`, values are
+            considered equal when within tolerance of each other (see `rel_tol` and `abs_tol`).
+            Only affects columns with a Float data type.
+        rel_tol: Relative tolerance for inexact checking. Fraction of values in `right`.
+        abs_tol: Absolute tolerance for inexact checking.
+        categorical_as_str: Cast categorical columns to string before comparing.
+            Enabling this helps compare columns that do not share the same string cache.
+
+    Examples:
+        >>> import polars as pl
+        >>> import narwhals as nw
+        >>> from narwhals.testing import assert_frame_equal
+        >>>
+        >>> left_native = pl.LazyFrame({"a": [1, 2, 3]})
+        >>> right_native = pl.LazyFrame({"a": [1, 5, 3]})
+        >>> left = nw.from_native(left_native)
+        >>> right = nw.from_native(right_native)
+        >>> assert_frame_equal(left, right)  # doctest: +ELLIPSIS
+        Traceback (most recent call last):
+            ...
+        AssertionError: DataFrames are different (value mismatch for column "a")
+        [left]:
+        ┌─────────────────┐
+        | Narwhals Series |
+        |-----------------|
+        |shape: (3,)      |
+        |Series: 'a' [i64]|
+        |[                |
+        |        1        |
+        |        2        |
+        |        3        |
+        |]                |
+        └─────────────────┘
+        [right]:
+        ┌─────────────────┐
+        | Narwhals Series |
+        |-----------------|
+        |shape: (3,)      |
+        |Series: 'a' [i64]|
+        |[                |
+        |        1        |
+        |        5        |
+        |        3        |
+        |]                |
+        └─────────────────┘
+    """
+    __tracebackhide__ = True
+
+    if any(
+        not (is_narwhals_dataframe(obj) or is_narwhals_lazyframe(obj))
+        for obj in (left, right)
+    ):
+        msg = (
+            "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found:\n"
+            f"[left]: {qualified_type_name(type(left))}\n"
+            f"[right]: {qualified_type_name(type(right))}\n\n"
+            "Hint: Use `nw.from_native(obj, allow_series=False)` to convert each native "
+            "object into a `narwhals.DataFrame` or `narwhals.LazyFrame` first."
+        )
+        raise TypeError(msg)
+
+    left_impl, right_impl = left.implementation, right.implementation
+    if left_impl != right_impl:
+        raise_frame_assertion_error("implementation mismatch", left_impl, right_impl)
+
+    left_eager, right_eager = _check_correct_input_type(left, right)
+
+    _assert_dataframe_equal(
+        left=left_eager,
+        right=right_eager,
+        impl=left_impl,
+        check_row_order=check_row_order,
+        check_column_order=check_column_order,
+        check_dtypes=check_dtypes,
+        check_exact=check_exact,
+        rel_tol=rel_tol,
+        abs_tol=abs_tol,
+        categorical_as_str=categorical_as_str,
+    )
+
+
+def _check_correct_input_type(  # noqa: RET503
+    left: DataFrameT | LazyFrameT, right: DataFrameT | LazyFrameT
+) -> tuple[DataFrame[Any], DataFrame[Any]]:
+    # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/py-polars/src/polars/testing/asserts/frame.py#L15-L17
+    if isinstance(left, DataFrame) and isinstance(right, DataFrame):
+        return left, right
+
+    if isinstance(left, LazyFrame) and isinstance(right, LazyFrame):
+        return left.collect(), right.collect()
+
+    raise_assertion_error(
+        "inputs",
+        "unexpected input types",
+        left=type(left).__name__,
+        right=type(right).__name__,
+    )
+
+
+def _assert_dataframe_equal(
+    left: DataFrameT,
+    right: DataFrameT,
+    impl: Implementation,
+    *,
+    check_row_order: bool,
+    check_column_order: bool,
+    check_dtypes: bool,
+    check_exact: bool,
+    rel_tol: float,
+    abs_tol: float,
+    categorical_as_str: bool,
+) -> None:
+    # Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/crates/polars-testing/src/asserts/utils.rs#L829
+    # NOTE: Here `impl` comes from the original dataframe, not the `.collect`-ed one, and
+    # it's used to distinguish between backends that do and do not guarantee row order.
+    _check_schema_equal(
+        left, right, check_dtypes=check_dtypes, check_column_order=check_column_order
+    )
+
+    left_len, right_len = len(left), len(right)
+    if left_len != right_len:
+        raise_frame_assertion_error("height (row count) mismatch", left_len, right_len)
+    # TODO(FBruzzesi): Should we return early if row count is zero?
+
+    left_schema = left.schema
+    if (not check_row_order) or (impl not in GUARANTEES_ROW_ORDER):
+        # NOTE: Sort by all the non-nested dtypes columns.
+        # See: https://github.com/narwhals-dev/narwhals/issues/2939
+        # ! This might lead to wrong results if there are duplicate values in the sorting
+        # columns as the final order might still be non fully deterministic.
+        sort_by = [name for name, dtype in left_schema.items() if not dtype.is_nested()]
+
+        if not sort_by:
+            # If only nested dtypes are available, then we raise an exception.
+            msg = "`check_row_order=False` is not supported (yet) with only nested data type."
+            raise NotImplementedError(msg)
+
+        left = left.sort(sort_by)
+        right = right.sort(sort_by)
+
+    for col_name in left_schema.names():
+        _series_left = left.get_column(col_name)
+        _series_right = right.get_column(col_name)
+        try:
+            assert_series_equal(
+                _series_left,
+                _series_right,
+                check_dtypes=False,
+                check_names=False,
+                check_order=True,
+                check_exact=check_exact,
+                rel_tol=rel_tol,
+                abs_tol=abs_tol,
+                categorical_as_str=categorical_as_str,
+            )
+        except AssertionError:
+            raise_frame_assertion_error(
+                f'value mismatch for column "{col_name}"', _series_left, _series_right
+            )
+
+
+def _check_schema_equal(
+    left: DataFrameT, right: DataFrameT, *, check_dtypes: bool, check_column_order: bool
+) -> None:
+    """Compares DataFrame schema based on specified criteria.
+
+    Adapted from https://github.com/pola-rs/polars/blob/afdbf3056d1228cf493901e45f536b0905cec8ea/crates/polars-testing/src/asserts/utils.rs#L667-L698
+    """
+    lschema, rschema = left.schema, right.schema
+
+    # Fast path for equal DataFrames
+    if lschema == rschema:
+        return
+
+    lnames, rnames = lschema.names(), rschema.names()
+    lset, rset = set(lnames), set(rnames)
+
+    if lset != rset:
+        if left_not_in_right := sorted(lset.difference(rset)):
+            raise_frame_assertion_error(
+                detail=f"{left_not_in_right} in left, but not in right",
+                left=lset,
+                right=rset,
+            )
+        if right_not_in_left := sorted(rset.difference(lset)):  # pragma: no cover
+            # NOTE: the `pragma: no cover` flag is due to a false negative.
+            # The last test in `test_check_schema_mismatch` does cover this case.
+            raise_frame_assertion_error(
+                detail=f"{right_not_in_left} in right, but not in left",
+                left=lset,
+                right=rset,
+            )
+
+    if check_column_order and lnames != rnames:
+        raise_frame_assertion_error(
+            detail="columns are not in the same order", left=lnames, right=rnames
+        )
+
+    if check_dtypes:
+        ldtypes = lschema.dtypes()
+        rdtypes = (
+            rschema.dtypes()
+            if check_column_order
+            else [rschema[col_name] for col_name in lnames]
+        )
+
+        if ldtypes != rdtypes:
+            raise_frame_assertion_error(
+                detail="dtypes do not match", left=ldtypes, right=rdtypes
+            )
diff --git a/narwhals/testing/asserts/utils.py b/narwhals/testing/asserts/utils.py
index 13720c2a35..1af3e846a0 100644
--- a/narwhals/testing/asserts/utils.py
+++ b/narwhals/testing/asserts/utils.py
@@ -7,23 +7,35 @@
 if TYPE_CHECKING:
     from typing_extensions import Never, TypeAlias
 
-# NOTE: This alias is created to facilitate autocomplete. Feel free to extend it as
-# you please when adding a new feature.
-# See: https://github.com/narwhals-dev/narwhals/pull/2983#discussion_r2337548736
-SeriesDetail: TypeAlias = Literal[
-    "implementation mismatch",
-    "length mismatch",
-    "dtype mismatch",
-    "name mismatch",
-    "null value mismatch",
-    "exact value mismatch",
-    "values not within tolerance",
-    "nested value mismatch",
-]
+    # NOTE: These aliases are created to facilitate autocompletion.
+    # Feel free to extend them as you please when adding new features.
+    # See: https://github.com/narwhals-dev/narwhals/pull/2983#discussion_r2337548736
+    ObjectName: TypeAlias = Literal["inputs", "Series", "DataFrames"]
+    SeriesDetail: TypeAlias = Literal[
+        "dtype mismatch",
+        "exact value mismatch",
+        "implementation mismatch",
+        "length mismatch",
+        "name mismatch",
+        "nested value mismatch",
+        "null value mismatch",
+        "values not within tolerance",
+    ]
+    DataFramesDetail: TypeAlias = Literal[
+        "columns are not in the same order",
+        "dtypes do not match",
+        "height (row count) mismatch",
+        "implementation mismatch",
+    ]
 
 
 def raise_assertion_error(
-    objects: str, detail: str, left: Any, right: Any, *, cause: Exception | None = None
+    objects: ObjectName,
+    detail: str,
+    left: Any,
+    right: Any,
+    *,
+    cause: Exception | None = None,
 ) -> Never:
     """Raise a detailed assertion error."""
     __tracebackhide__ = True
@@ -43,3 +55,17 @@ def raise_series_assertion_error(
     detail: SeriesDetail, left: Any, right: Any, *, cause: Exception | None = None
 ) -> Never:
     raise_assertion_error("Series", detail, left, right, cause=cause)
+
+
+def raise_frame_assertion_error(
+    detail: DataFramesDetail | str,
+    left: Any,
+    right: Any,
+    *,
+    cause: Exception | None = None,
+) -> Never:
+    # NOTE: `DataFramesDetail | str` makes the literal (`DataFramesDetail`) redundant.
+    # However, the suggestions still show up as autocompletion in the editor when typing.
+    # The reason to have `str` is due to the fact that some details are dynamic
+    # and depend upon which columns lead to the assertion error.
+    raise_assertion_error("DataFrames", detail, left, right, cause=cause)
diff --git a/tests/testing/assert_frame_equal_test.py b/tests/testing/assert_frame_equal_test.py
new file mode 100644
index 0000000000..7278f25e7d
--- /dev/null
+++ b/tests/testing/assert_frame_equal_test.py
@@ -0,0 +1,257 @@
+from __future__ import annotations
+
+import re
+from contextlib import AbstractContextManager, nullcontext as does_not_raise
+from typing import TYPE_CHECKING, Any
+
+import pytest
+
+import narwhals as nw
+from narwhals.testing import assert_frame_equal
+from narwhals.testing.asserts.frame import GUARANTEES_ROW_ORDER
+from tests.utils import PANDAS_VERSION
+
+if TYPE_CHECKING:
+    from narwhals.typing import IntoSchema
+    from tests.conftest import Data
+    from tests.utils import Constructor, ConstructorEager
+
+
+def _assertion_error(detail: str) -> pytest.RaisesExc:
+    msg = f"DataFrames are different ({detail})"
+    return pytest.raises(AssertionError, match=re.escape(msg))
+
+
+def test_check_narwhals_objects(constructor: Constructor) -> None:
+    """Test that a type error is raised if the input is not a Narwhals object."""
+    frame = constructor({"a": [1, 2, 3]})
+    msg = re.escape(
+        "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found"
+    )
+    with pytest.raises(TypeError, match=msg):
+        assert_frame_equal(frame, frame)  # type: ignore[arg-type]
+
+
+def test_implementation_mismatch() -> None:
+    """Test that different implementations raise an error."""
+    pytest.importorskip("pandas")
+    pytest.importorskip("pyarrow")
+
+    import pandas as pd
+    import pyarrow as pa
+
+    with _assertion_error("implementation mismatch"):
+        assert_frame_equal(
+            nw.from_native(pd.DataFrame({"a": [1]})),
+            nw.from_native(pa.table({"a": [1]})),  # type: ignore[type-var] # pyright: ignore[reportArgumentType]
+        )
+
+
+def test_check_same_input_type(constructor_eager: ConstructorEager) -> None:
+    """Test that left and right frames are either both eager or both lazy.
+
+    NOTE: Use `constructor_eager` instead of `constructor` so that the roundtrip
+        `.lazy().collect()` preserves the same implementation (and we raise the check after)
+    """
+    frame = nw.from_native(constructor_eager({"a": [1, 2, 3]}))
+
+    msg = re.escape("inputs are different (unexpected input types)")
+    with pytest.raises(AssertionError, match=msg):
+        assert_frame_equal(frame.lazy(), frame.lazy().collect())
+
+
+@pytest.mark.parametrize(
+    ("left_schema", "right_schema", "check_dtypes", "check_column_order", "context"),
+    [
+        # Same order, same dtypes
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"a": nw.Int32(), "b": nw.Float32()},
+            True,
+            True,
+            does_not_raise(),
+        ),
+        # Same order, different dtypes
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"a": nw.Int32(), "b": nw.Float64()},
+            False,
+            True,
+            does_not_raise(),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"a": nw.Int32(), "b": nw.Float64()},
+            True,
+            True,
+            _assertion_error("dtypes do not match"),
+        ),
+        # Different order, same dtype
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float32(), "a": nw.Int32()},
+            True,
+            False,
+            does_not_raise(),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float32(), "a": nw.Int32()},
+            True,
+            True,
+            _assertion_error("columns are not in the same order"),
+        ),
+        # Different order, different dtype
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            False,
+            False,
+            does_not_raise(),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            True,
+            False,
+            _assertion_error("dtypes do not match"),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            False,
+            True,
+            _assertion_error("columns are not in the same order"),
+        ),
+        (
+            {"a": nw.Int32(), "b": nw.Float32()},
+            {"b": nw.Float64(), "a": nw.Int16()},
+            True,
+            True,
+            _assertion_error("columns are not in the same order"),
+        ),
+        # Different columns (left not in right)
+        (
+            {"a": nw.Int32(), "z": nw.String()},
+            {"b": nw.Float64()},
+            True,
+            True,
+            _assertion_error("['a', 'z'] in left, but not in right"),
+        ),
+        # Different columns (right not in left)
+        (
+            {"z": nw.String()},
+            {"z": nw.String(), "b": nw.Float64()},
+            True,
+            True,
+            _assertion_error("['b'] in right, but not in left"),
+        ),
+    ],
+)
+def test_check_schema_mismatch(
+    constructor: Constructor,
+    left_schema: IntoSchema,
+    right_schema: IntoSchema,
+    *,
+    check_dtypes: bool,
+    check_column_order: bool,
+    context: AbstractContextManager[Any],
+) -> None:
+    data = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["foo", "bar", "baz"]}
+    left = nw.from_native(constructor(data)).select(
+        nw.col(name).cast(dtype) for name, dtype in left_schema.items()
+    )
+    right = nw.from_native(constructor(data)).select(
+        nw.col(name).cast(dtype) for name, dtype in right_schema.items()
+    )
+
+    with context:
+        assert_frame_equal(
+            left, right, check_column_order=check_column_order, check_dtypes=check_dtypes
+        )
+
+
+def test_height_mismatch(constructor: Constructor) -> None:
+    left = nw.from_native(constructor({"a": [1, 2, 3]}))
+    right = nw.from_native(constructor({"a": [1, 3]}))
+
+    with _assertion_error("height (row count) mismatch"):
+        assert_frame_equal(left, right)
+
+
+@pytest.mark.parametrize("check_row_order", [True, False])
+def test_check_row_order(
+    constructor: Constructor, request: pytest.FixtureRequest, *, check_row_order: bool
+) -> None:
+    if "pandas" in str(constructor):  # pragma: no cover
+        if PANDAS_VERSION < (2, 2):
+            reason = "Pandas too old for nested dtypes"
+            pytest.skip(reason=reason)
+        pytest.importorskip("pyarrow")
+
+    if "dask" in str(constructor):
+        reason = "Unsupported List type"
+        request.applymarker(pytest.mark.xfail(reason=reason))
+
+    data = {"a": [1, 2], "b": [["x", "y"], ["x", "z"]]}
+
+    b_expr = nw.col("b").cast(nw.List(nw.String()))
+    left = (
+        nw.from_native(constructor(data)).with_columns(b_expr).sort("a", descending=False)
+    )
+    right = (
+        nw.from_native(constructor(data)).with_columns(b_expr).sort("a", descending=True)
+    )
+
+    context = (
+        _assertion_error('value mismatch for column "a"')
+        if check_row_order and left.implementation in GUARANTEES_ROW_ORDER
+        else does_not_raise()
+    )
+
+    with context:
+        assert_frame_equal(left, right, check_row_order=check_row_order)
+
+
+def test_check_row_order_nested_only(
+    constructor: Constructor, request: pytest.FixtureRequest
+) -> None:
+    if "pandas" in str(constructor):  # pragma: no cover
+        if PANDAS_VERSION < (2, 2):
+            reason = "Pandas too old for nested dtypes"
+            pytest.skip(reason=reason)
+        pytest.importorskip("pyarrow")
+
+    if "dask" in str(constructor):
+        reason = "Unsupported List type"
+        request.applymarker(pytest.mark.xfail(reason=reason))
+
+    data = {"b": [["x", "y"], ["x", "z"]]}
+
+    b_expr = nw.col("b").cast(nw.List(nw.String()))
+    left = nw.from_native(constructor(data)).select(b_expr)
+
+    msg = "`check_row_order=False` is not supported (yet) with only nested data type."
+    with pytest.raises(NotImplementedError, match=re.escape(msg)):
+        assert_frame_equal(left, left, check_row_order=False)
+
+
+# TODO(FBruzzesi): Do we even this? What should we check?
+# The best way could be to migrate away from `assert_equal_data` in the test suite
+def test_values_mismatch(constructor: Constructor) -> None: ...
+
+
+def test_self_equal(constructor: Constructor, testing_data: Data) -> None:
+    """Test that a dataframe is equal to itself, including nested dtypes with nulls.
+
+    We are dropping columns which type is unsupported by _some_ backend.
+    """
+    cols_to_drop = ("categorical", "enum", "duration", "struct", "time")
+
+    if "pandas" in str(constructor) and PANDAS_VERSION < (2, 2):  # pragma: no cover
+        reason = "Pandas too old for nested dtypes"
+        pytest.skip(reason=reason)
+
+    _data = {k: v for k, v in testing_data.items() if k not in cols_to_drop}
+    df = nw.from_native(constructor(_data))
+    assert_frame_equal(df, df)