From 3d9374686012acafd1593dd4782c7fbee5937c21 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 9 Oct 2025 10:53:25 +0200 Subject: [PATCH 01/30] ci: Unpin (some) dependencies --- .github/workflows/downstream_tests.yml | 9 --------- .github/workflows/extremes.yml | 9 ++++----- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml index 3ed885be33..eabcf6b655 100644 --- a/.github/workflows/downstream_tests.yml +++ b/.github/workflows/downstream_tests.yml @@ -187,8 +187,6 @@ jobs: run: | cd py-shiny . .venv/bin/activate - # temporary to get CI green - uv pip install "chatlas<0.9.0" make narwhals-install-shiny - name: install-narwhals-dev run: | @@ -242,11 +240,6 @@ jobs: cd tea-tasting uv pip uninstall narwhals uv pip install -e ./.. - - name: temporary pin duckdb # TODO(FBruzzesi): Unpin duckdb - run: | - cd tea-tasting - uv pip uninstall duckdb - uv pip install "duckdb<1.4" - name: show-deps run: | cd tea-tasting @@ -292,8 +285,6 @@ jobs: run: | uv pip uninstall narwhals --system uv pip install -e . --system - # temporary pin to get CI green - uv pip install "polars<1.30" --system - name: show-deps run: uv pip freeze - name: Run pytest diff --git a/.github/workflows/extremes.yml b/.github/workflows/extremes.yml index f4c9488df4..2e4b6c51e7 100644 --- a/.github/workflows/extremes.yml +++ b/.github/workflows/extremes.yml @@ -152,10 +152,8 @@ jobs: uv pip install pandas --pre --index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --system -U - name: install pyarrow nightly run: | - # commented out nightly whilst it fails to install - uv pip install -U pyarrow --system - # uv pip uninstall pyarrow --system - # uv pip install pyarrow --pre --index https://pypi.fury.io/arrow-nightlies/ --system -U + uv pip uninstall pyarrow --system + uv pip install pyarrow --pre --index https://pypi.fury.io/arrow-nightlies/ --system -U - name: install numpy nightly run: | uv pip uninstall numpy --system @@ -174,9 +172,10 @@ jobs: run: | DEPS=$(uv pip freeze) echo "$DEPS" | grep 'pandas.*dev' - # echo "$DEPS" | grep 'pyarrow.*dev' + echo "$DEPS" | grep 'pyarrow.*dev' echo "$DEPS" | grep 'numpy.*dev' echo "$DEPS" | grep 'dask.*@' + echo "$DEPS" | grep 'duckdb.*dev' - name: Run pytest run: | pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow \ From 5d2600a843b99faa768e6dc2c4f2fbdcef0e3295 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:33:54 +0000 Subject: [PATCH 02/30] ci(typing): Re-pin `duckdb==1.4.1` Will close #3188 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index eb1b3bdc4f..c386de3d23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ extra = [ # heavier dependencies we don't necessarily need in every testing job "scikit-learn", ] typing = [ # keep some of these pinned and bump periodically so there's fewer surprises for contributors - "duckdb==1.3.0", + "duckdb==1.4.1", "hypothesis", "pytest", "pandas-stubs==2.3.0.250703", From 875ec84cd47c819fc0f716b1528e041b8ff8a21d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 12:09:00 +0000 Subject: [PATCH 03/30] chore(typing): Kinda fix `lit`, `lambda_expr` --- narwhals/_duckdb/utils.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 243c9bf0bc..4bf66c4200 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from functools import lru_cache -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import duckdb import duckdb.typing as duckdb_dtypes @@ -15,13 +15,15 @@ from collections.abc import Mapping, Sequence from duckdb import DuckDBPyRelation + from typing_extensions import TypeAlias from narwhals._compliant.typing import CompliantLazyFrameAny from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr from narwhals.dtypes import DType - from narwhals.typing import IntoDType, TimeUnit + from narwhals.typing import IntoDType, NonNestedLiteral, TimeUnit +Incomplete: TypeAlias = Any UNITS_DICT = { "y": "year", @@ -41,8 +43,13 @@ col = duckdb.ColumnExpression """Alias for `duckdb.ColumnExpression`.""" -lit = duckdb.ConstantExpression -"""Alias for `duckdb.ConstantExpression`.""" + +# TODO @dangotbanned: Raise an issue upstream on `Expression | str` too narrow +def lit(value: Expression | NonNestedLiteral | Sequence[Any]) -> Expression: + """Alias for `duckdb.ConstantExpression`.""" + lit_: Incomplete = duckdb.ConstantExpression + return lit_(value) + when = duckdb.CaseExpression """Alias for `duckdb.CaseExpression`.""" @@ -51,6 +58,8 @@ """Alias for `duckdb.FunctionExpression`.""" +# TODO @dangotbanned: Investigate `lhs: Expression | str | tuple[str]` +# Seems incorrect def lambda_expr( params: str | Expression | tuple[Expression, ...], expr: Expression, / ) -> Expression: @@ -64,7 +73,8 @@ def lambda_expr( msg = f"DuckDB>=1.2.0 is required for this operation. Found: DuckDB {duckdb.__version__}" raise NotImplementedError(msg) from exc args = (params,) if isinstance(params, Expression) else params - return LambdaExpression(args, expr) + lambda_expr_: Incomplete = LambdaExpression + return lambda_expr_(args, expr) def concat_str(*exprs: Expression, separator: str = "") -> Expression: From 49e96f11f79da32917be60466e70c0a9cba781a8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 12:12:23 +0000 Subject: [PATCH 04/30] fix: Always use `fetch_arrow_table` https://github.com/narwhals-dev/narwhals/pull/3015#issuecomment-3207105282 --- narwhals/_duckdb/dataframe.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 4c42a73d2e..8c1a371b5b 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -137,12 +137,8 @@ def collect( if backend is None or backend is Implementation.PYARROW: from narwhals._arrow.dataframe import ArrowDataFrame - if self._backend_version < (1, 4): - ret = self.native.arrow() - else: # pragma: no cover - ret = self.native.fetch_arrow_table() return ArrowDataFrame( - ret, + self.native.fetch_arrow_table(), validate_backend_version=True, version=self._version, validate_column_names=True, From d4df1c0377bd1e972c45d91fe09a5b4657ee6cc0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 14:03:57 +0000 Subject: [PATCH 05/30] fix(typing): Get `pyright` happy with `DuckDBPyType` https://github.com/narwhals-dev/narwhals/pull/3183#issuecomment-3385707277 --- narwhals/_duckdb/typing.py | 95 ++++++++++++++++++++++++++++++++++++-- narwhals/_duckdb/utils.py | 39 +++++++++------- 2 files changed, 113 insertions(+), 21 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index cbd8d16847..fd5960ce9d 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -1,11 +1,13 @@ from __future__ import annotations -from typing import TYPE_CHECKING, TypedDict +from collections.abc import Sequence +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypedDict, overload -if TYPE_CHECKING: - from collections.abc import Sequence +from narwhals._typing_compat import TypeVar - from duckdb import Expression +if TYPE_CHECKING: + from duckdb import DuckDBPyConnection, Expression + from typing_extensions import TypeAlias, TypeIs class WindowExpressionKwargs(TypedDict, total=False): @@ -16,3 +18,88 @@ class WindowExpressionKwargs(TypedDict, total=False): descending: Sequence[bool] nulls_last: Sequence[bool] ignore_nulls: bool + + +_Children: TypeAlias = Sequence[tuple[str, Any]] +T_co = TypeVar("T_co", covariant=True, bound=_Children, default=_Children) +DTypeT_co = TypeVar("DTypeT_co", covariant=True, bound="BaseType", default="BaseType") +_Child: TypeAlias = tuple[Literal["child"], DTypeT_co] +_Size: TypeAlias = tuple[Literal["size"], int] +_ID_co = TypeVar("_ID_co", bound=str, default=str, covariant=True) + + +class BaseType(Protocol[_ID_co]): + def __eq__(self, other: object) -> bool: ... + def __hash__(self) -> int: ... + @overload + def __init__(self, type_str: str, connection: DuckDBPyConnection) -> None: ... + @overload + def __init__(self, obj: object) -> None: ... + @property + def id(self) -> _ID_co: ... + + +class _ParentType(BaseType[_ID_co], Protocol[_ID_co, T_co]): + @property + def children(self) -> T_co: ... + + +class ArrayType( + _ParentType[Literal["array"], tuple[_Child[DTypeT_co], _Size]], Protocol[DTypeT_co] +): ... + + +class EnumType( + _ParentType[Literal["enum"], tuple[tuple[Literal["values"], list[str]]]], Protocol +): ... + + +class ListType( + _ParentType[Literal["list"], tuple[_Child[DTypeT_co]]], Protocol[DTypeT_co] +): + @property + def child(self) -> DTypeT_co: ... + + +class StructType( + _ParentType[Literal["struct"], Sequence[tuple[str, BaseType]]], Protocol +): + def __getattr__(self, name: str) -> BaseType: ... + def __getitem__(self, name: str) -> BaseType: ... + + +def has_children( + dtype: BaseType | _ParentType[_ID_co, T_co], +) -> TypeIs[_ParentType[_ID_co, T_co]]: + """Using `_hasattr_static` returns `True` on any `DuckDBPyType. + + The only way to be sure is forcing an exception. + """ + import duckdb + + try: + return hasattr(dtype, "children") + except duckdb.InvalidInputException: + return False + + +def is_dtype_array(obj: BaseType) -> TypeIs[ArrayType]: + return obj.id == "array" + + +def is_dtype_struct(obj: BaseType) -> TypeIs[StructType]: + return obj.id == "struct" + + +def is_dtype_list(obj: BaseType) -> TypeIs[ListType]: + return obj.id == "list" + + +def is_dtype_enum(obj: BaseType) -> TypeIs[EnumType]: + return obj.id == "enum" + + +def is_dtype_timestamp_with_time_zone( + obj: BaseType, +) -> TypeIs[BaseType[Literal["timestamp with time zone"]]]: + return obj.id == "timestamp with time zone" diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 4bf66c4200..dd143d645b 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -8,8 +8,9 @@ from duckdb import Expression from duckdb.typing import DuckDBPyType +import narwhals._duckdb.typing as nw_dd_t from narwhals._utils import Version, isinstance_or_issubclass, zip_strict -from narwhals.exceptions import ColumnNotFoundError +from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError if TYPE_CHECKING: from collections.abc import Mapping, Sequence @@ -141,18 +142,27 @@ def time_zone(self) -> str: def native_to_narwhals_dtype( - duckdb_dtype: DuckDBPyType, version: Version, deferred_time_zone: DeferredTimeZone + duckdb_dtype: nw_dd_t.BaseType, version: Version, deferred_time_zone: DeferredTimeZone +) -> DType: + if nw_dd_t.has_children(duckdb_dtype): + return _nested_native_to_narwhals_dtype(duckdb_dtype, version, deferred_time_zone) + if nw_dd_t.is_dtype_timestamp_with_time_zone(duckdb_dtype): + return version.dtypes.Datetime(time_zone=deferred_time_zone.time_zone) + return _non_nested_native_to_narwhals_dtype(duckdb_dtype.id, version) + + +def _nested_native_to_narwhals_dtype( + duckdb_dtype: nw_dd_t._ParentType, + version: Version, + deferred_time_zone: DeferredTimeZone, ) -> DType: - duckdb_dtype_id = duckdb_dtype.id dtypes = version.dtypes - # Handle nested data types first - if duckdb_dtype_id == "list": + if nw_dd_t.is_dtype_list(duckdb_dtype): return dtypes.List( native_to_narwhals_dtype(duckdb_dtype.child, version, deferred_time_zone) ) - - if duckdb_dtype_id == "struct": + if nw_dd_t.is_dtype_struct(duckdb_dtype): children = duckdb_dtype.children return dtypes.Struct( [ @@ -163,28 +173,23 @@ def native_to_narwhals_dtype( for child in children ] ) - - if duckdb_dtype_id == "array": + if nw_dd_t.is_dtype_array(duckdb_dtype): child, size = duckdb_dtype.children shape: list[int] = [size[1]] - while child[1].id == "array": + while nw_dd_t.is_dtype_array(child[1]): child, size = child[1].children shape.insert(0, size[1]) inner = native_to_narwhals_dtype(child[1], version, deferred_time_zone) return dtypes.Array(inner=inner, shape=tuple(shape)) - - if duckdb_dtype_id == "enum": + if nw_dd_t.is_dtype_enum(duckdb_dtype): if version is Version.V1: return dtypes.Enum() # type: ignore[call-arg] categories = duckdb_dtype.children[0][1] return dtypes.Enum(categories=categories) - - if duckdb_dtype_id == "timestamp with time zone": - return dtypes.Datetime(time_zone=deferred_time_zone.time_zone) - - return _non_nested_native_to_narwhals_dtype(duckdb_dtype_id, version) + # `MAP`, `UNION` + raise UnsupportedDTypeError(duckdb_dtype) def fetch_rel_time_zone(rel: duckdb.DuckDBPyRelation) -> str: From d110f24d60c8545ea618c35425222f0423639e0f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 14:24:50 +0000 Subject: [PATCH 06/30] ci(typing): Let `mypy` follow `duckdb` imports https://github.com/duckdb/duckdb-python/issues/57 --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c386de3d23..550a01fe83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -321,7 +321,6 @@ module = [ "cupy.*", "dask.*", "dask_expr.*", - "duckdb.*", # https://github.com/ibis-project/ibis/issues/6844 "ibis.*", "joblib.*", From 2e910486ba336df83b5c3a4e49d862305852da62 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 14:28:20 +0000 Subject: [PATCH 07/30] ci(typing): Disable `no-any-return` for `_duckdb` --- narwhals/_duckdb/dataframe.py | 6 +++--- narwhals/_duckdb/series.py | 2 +- narwhals/_duckdb/utils.py | 2 +- pyproject.toml | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 8c1a371b5b..03ffe8a11c 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -101,7 +101,7 @@ def to_narwhals( if self._version is Version.V1: from narwhals.stable.v1 import DataFrame as DataFrameV1 - return DataFrameV1(self, level="interchange") # type: ignore[no-any-return] + return DataFrameV1(self, level="interchange") return self._version.lazyframe(self, level="lazy") def __narwhals_dataframe__(self) -> Self: # pragma: no cover @@ -115,7 +115,7 @@ def __narwhals_lazyframe__(self) -> Self: return self def __native_namespace__(self) -> ModuleType: - return get_duckdb() # type: ignore[no-any-return] + return get_duckdb() def __narwhals_namespace__(self) -> DuckDBNamespace: from narwhals._duckdb.namespace import DuckDBNamespace @@ -255,7 +255,7 @@ def to_pandas(self) -> pd.DataFrame: def to_arrow(self) -> pa.Table: # only if version is v1, keep around for backcompat - return self.lazy().collect(Implementation.PYARROW).native # type: ignore[no-any-return] + return self.lazy().collect(Implementation.PYARROW).native def _with_version(self, version: Version) -> Self: return self.__class__(self.native, version=version) diff --git a/narwhals/_duckdb/series.py b/narwhals/_duckdb/series.py index 5b284b95c3..91f3fc7ecb 100644 --- a/narwhals/_duckdb/series.py +++ b/narwhals/_duckdb/series.py @@ -24,7 +24,7 @@ def __narwhals_series__(self) -> Self: return self def __native_namespace__(self) -> ModuleType: - return get_duckdb() # type: ignore[no-any-return] + return get_duckdb() @property def dtype(self) -> DType: diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index dd143d645b..63a1e694b3 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -197,7 +197,7 @@ def fetch_rel_time_zone(rel: duckdb.DuckDBPyRelation) -> str: "duckdb_settings()", "select value from duckdb_settings() where name = 'TimeZone'" ).fetchone() assert result is not None # noqa: S101 - return result[0] # type: ignore[no-any-return] + return result[0] @lru_cache(maxsize=16) diff --git a/pyproject.toml b/pyproject.toml index 550a01fe83..905306b807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -342,6 +342,7 @@ module = [ "narwhals._arrow.*", "narwhals._dask.*", "narwhals._spark_like.*", + "narwhals._duckdb.*" ] warn_return_any = false From d83ffc7adba619684bc8ef4cf7cc0655040446a8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 14:47:13 +0000 Subject: [PATCH 08/30] fix: Version branch deprecated `duckdb.typing` module --- narwhals/_duckdb/dataframe.py | 2 +- narwhals/_duckdb/namespace.py | 5 ++++- narwhals/_duckdb/utils.py | 15 ++++++++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py index 03ffe8a11c..9a26832fde 100644 --- a/narwhals/_duckdb/dataframe.py +++ b/narwhals/_duckdb/dataframe.py @@ -41,7 +41,7 @@ import pandas as pd import pyarrow as pa from duckdb import Expression - from duckdb.typing import DuckDBPyType + from duckdb.sqltypes import DuckDBPyType from typing_extensions import Self, TypeIs from narwhals._compliant.typing import CompliantDataFrameAny diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 009404c428..44344f366b 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -7,7 +7,6 @@ import duckdb from duckdb import CoalesceOperator, Expression -from duckdb.typing import BIGINT, VARCHAR from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr @@ -16,6 +15,7 @@ DeferredTimeZone, F, concat_str, + duckdb_dtypes, function, lit, narwhals_to_native_dtype, @@ -37,6 +37,9 @@ from narwhals._utils import Version from narwhals.typing import ConcatMethod, IntoDType, NonNestedLiteral +BIGINT = duckdb_dtypes.BIGINT +VARCHAR = duckdb_dtypes.VARCHAR + class DuckDBNamespace( SQLNamespace[DuckDBLazyFrame, DuckDBExpr, "DuckDBPyRelation", Expression] diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 63a1e694b3..3fce6fb304 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -4,18 +4,17 @@ from typing import TYPE_CHECKING, Any import duckdb -import duckdb.typing as duckdb_dtypes from duckdb import Expression -from duckdb.typing import DuckDBPyType import narwhals._duckdb.typing as nw_dd_t -from narwhals._utils import Version, isinstance_or_issubclass, zip_strict +from narwhals._utils import Implementation, Version, isinstance_or_issubclass, zip_strict from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError if TYPE_CHECKING: from collections.abc import Mapping, Sequence from duckdb import DuckDBPyRelation + from duckdb.sqltypes import DuckDBPyType from typing_extensions import TypeAlias from narwhals._compliant.typing import CompliantLazyFrameAny @@ -26,6 +25,16 @@ Incomplete: TypeAlias = Any +BACKEND_VERSION = Implementation.DUCKDB._backend_version() +"""Static backend version for `duckdb`.""" + +if TYPE_CHECKING or BACKEND_VERSION >= (1, 4): + from duckdb import sqltypes as duckdb_dtypes + from duckdb.sqltypes import DuckDBPyType +else: + from duckdb import typing as duckdb_dtypes + from duckdb.typing import DuckDBPyType + UNITS_DICT = { "y": "year", "q": "quarter", From 6c65d72f2211227a107d8f0795fa27f0ebb6a3e5 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:16:32 +0000 Subject: [PATCH 09/30] semi bump `sqlframe` --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 905306b807..c8fccadaee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,8 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "mypy~=1.15.0", "pyright", "pyarrow-stubs==19.2", - "sqlframe", + # https://github.com/eakmanrq/sqlframe/issues/531 + "sqlframe>=3.43.0", "polars==1.34.0", "uv", "narwhals[ibis]", From 98880d068f3bb24e68bc67f7df0e548a68a0f9f4 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:54:31 +0000 Subject: [PATCH 10/30] fix: Exclude `decimal` from `nested` It has children, but we don't use them https://github.com/narwhals-dev/narwhals/actions/runs/18380855813/job/52366638686?pr=3189 --- narwhals/_duckdb/typing.py | 13 +++++++++++++ narwhals/_duckdb/utils.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index fd5960ce9d..d9cb02c2d9 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -68,6 +68,15 @@ def __getattr__(self, name: str) -> BaseType: ... def __getitem__(self, name: str) -> BaseType: ... +class DecimalType( + _ParentType[ + Literal["decimal"], + tuple[tuple[Literal["precision"], int], tuple[Literal["scale"], int]], + ], + Protocol, +): ... + + def has_children( dtype: BaseType | _ParentType[_ID_co, T_co], ) -> TypeIs[_ParentType[_ID_co, T_co]]: @@ -103,3 +112,7 @@ def is_dtype_timestamp_with_time_zone( obj: BaseType, ) -> TypeIs[BaseType[Literal["timestamp with time zone"]]]: return obj.id == "timestamp with time zone" + + +def is_dtype_decimal(obj: BaseType) -> TypeIs[DecimalType]: + return obj.id == "decimal" diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 55f677abe0..5287ef5300 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -151,7 +151,7 @@ def time_zone(self) -> str: def native_to_narwhals_dtype( duckdb_dtype: nw_dd_t.BaseType, version: Version, deferred_time_zone: DeferredTimeZone ) -> DType: - if nw_dd_t.has_children(duckdb_dtype): + if nw_dd_t.has_children(duckdb_dtype) and not nw_dd_t.is_dtype_decimal(duckdb_dtype): return _nested_native_to_narwhals_dtype(duckdb_dtype, version, deferred_time_zone) if nw_dd_t.is_dtype_timestamp_with_time_zone(duckdb_dtype): return version.dtypes.Datetime(time_zone=deferred_time_zone.time_zone) From 54f5030243019734bc72570e37c43c1b20102769 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 15:55:54 +0000 Subject: [PATCH 11/30] fix: Exclude `decimal` from `nested` It has children, but we don't use them https://github.com/narwhals-dev/narwhals/actions/runs/18380855813/job/52366638686?pr=3189 --- narwhals/_duckdb/typing.py | 13 +++++++++++++ narwhals/_duckdb/utils.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index fd5960ce9d..d9cb02c2d9 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -68,6 +68,15 @@ def __getattr__(self, name: str) -> BaseType: ... def __getitem__(self, name: str) -> BaseType: ... +class DecimalType( + _ParentType[ + Literal["decimal"], + tuple[tuple[Literal["precision"], int], tuple[Literal["scale"], int]], + ], + Protocol, +): ... + + def has_children( dtype: BaseType | _ParentType[_ID_co, T_co], ) -> TypeIs[_ParentType[_ID_co, T_co]]: @@ -103,3 +112,7 @@ def is_dtype_timestamp_with_time_zone( obj: BaseType, ) -> TypeIs[BaseType[Literal["timestamp with time zone"]]]: return obj.id == "timestamp with time zone" + + +def is_dtype_decimal(obj: BaseType) -> TypeIs[DecimalType]: + return obj.id == "decimal" diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 55f677abe0..94e0ca9b93 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -30,7 +30,7 @@ if TYPE_CHECKING or BACKEND_VERSION >= (1, 4): from duckdb import sqltypes as duckdb_dtypes -else: +else: # pragma: no cover from duckdb import typing as duckdb_dtypes UNITS_DICT = { @@ -151,7 +151,7 @@ def time_zone(self) -> str: def native_to_narwhals_dtype( duckdb_dtype: nw_dd_t.BaseType, version: Version, deferred_time_zone: DeferredTimeZone ) -> DType: - if nw_dd_t.has_children(duckdb_dtype): + if nw_dd_t.has_children(duckdb_dtype) and not nw_dd_t.is_dtype_decimal(duckdb_dtype): return _nested_native_to_narwhals_dtype(duckdb_dtype, version, deferred_time_zone) if nw_dd_t.is_dtype_timestamp_with_time_zone(duckdb_dtype): return version.dtypes.Datetime(time_zone=deferred_time_zone.time_zone) From 863901c742b57f065fbd5c566610c707bac81507 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 9 Oct 2025 16:01:05 +0000 Subject: [PATCH 12/30] refactor: Use the aliases from (d83ffc7adba619684bc8ef4cf7cc0655040446a8) Merge conflict fixup --- narwhals/_duckdb/namespace.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/narwhals/_duckdb/namespace.py b/narwhals/_duckdb/namespace.py index 43ce9c39b7..44344f366b 100644 --- a/narwhals/_duckdb/namespace.py +++ b/narwhals/_duckdb/namespace.py @@ -111,9 +111,9 @@ def func(df: DuckDBLazyFrame) -> list[Expression]: cols_separated = [ y for x in [ - (col.cast(duckdb_dtypes.VARCHAR),) + (col.cast(VARCHAR),) if i == len(cols) - 1 - else (col.cast(duckdb_dtypes.VARCHAR), lit(separator)) + else (col.cast(VARCHAR), lit(separator)) for i, col in enumerate(cols) ] for y in x @@ -133,9 +133,7 @@ def func(cols: Iterable[Expression]) -> Expression: cols = list(cols) return reduce( operator.add, (CoalesceOperator(col, lit(0)) for col in cols) - ) / reduce( - operator.add, (col.isnotnull().cast(duckdb_dtypes.BIGINT) for col in cols) - ) + ) / reduce(operator.add, (col.isnotnull().cast(BIGINT) for col in cols)) return self._expr._from_elementwise_horizontal_op(func, *exprs) From 98d23e9b42acb91022a174ab44a985a76ea8109a Mon Sep 17 00:00:00 2001 From: Dan Redding <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 09:41:58 +0100 Subject: [PATCH 13/30] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 55d85914fd..2dd04fb7fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "pyright", "pyarrow-stubs==19.2", # https://github.com/eakmanrq/sqlframe/issues/531 - "sqlframe>=3.43.0", + "sqlframe>=3.43.5", "polars==1.34.0", "uv", "narwhals[ibis]", From 3cf4ee43a1148c9519de92be76372c054dc3da52 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 19:49:43 +0000 Subject: [PATCH 14/30] children children children https://github.com/narwhals-dev/narwhals/pull/3189#discussion_r2421820184 --- narwhals/_duckdb/typing.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index d9cb02c2d9..e1fd43282c 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -20,8 +20,12 @@ class WindowExpressionKwargs(TypedDict, total=False): ignore_nulls: bool -_Children: TypeAlias = Sequence[tuple[str, Any]] -T_co = TypeVar("T_co", covariant=True, bound=_Children, default=_Children) +_Children_co = TypeVar( + "_Children_co", + covariant=True, + bound=Sequence[tuple[str, Any]], + default=Sequence[tuple[str, Any]], +) DTypeT_co = TypeVar("DTypeT_co", covariant=True, bound="BaseType", default="BaseType") _Child: TypeAlias = tuple[Literal["child"], DTypeT_co] _Size: TypeAlias = tuple[Literal["size"], int] @@ -39,9 +43,9 @@ def __init__(self, obj: object) -> None: ... def id(self) -> _ID_co: ... -class _ParentType(BaseType[_ID_co], Protocol[_ID_co, T_co]): +class _ParentType(BaseType[_ID_co], Protocol[_ID_co, _Children_co]): @property - def children(self) -> T_co: ... + def children(self) -> _Children_co: ... class ArrayType( @@ -78,8 +82,8 @@ class DecimalType( def has_children( - dtype: BaseType | _ParentType[_ID_co, T_co], -) -> TypeIs[_ParentType[_ID_co, T_co]]: + dtype: BaseType | _ParentType[_ID_co, _Children_co], +) -> TypeIs[_ParentType[_ID_co, _Children_co]]: """Using `_hasattr_static` returns `True` on any `DuckDBPyType. The only way to be sure is forcing an exception. From ef2d4ebef8941819430c91dab9d71e83d51790ce Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 19:52:14 +0000 Subject: [PATCH 15/30] make `{Array,Enum,Decimal}Type` aliases They don't add any new members --- narwhals/_duckdb/typing.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index e1fd43282c..b96832e206 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -48,14 +48,14 @@ class _ParentType(BaseType[_ID_co], Protocol[_ID_co, _Children_co]): def children(self) -> _Children_co: ... -class ArrayType( - _ParentType[Literal["array"], tuple[_Child[DTypeT_co], _Size]], Protocol[DTypeT_co] -): ... - - -class EnumType( - _ParentType[Literal["enum"], tuple[tuple[Literal["values"], list[str]]]], Protocol -): ... +ArrayType: TypeAlias = _ParentType[Literal["array"], tuple[_Child[DTypeT_co], _Size]] +EnumType: TypeAlias = _ParentType[ + Literal["enum"], tuple[tuple[Literal["values"], list[str]]] +] +DecimalType: TypeAlias = _ParentType[ + Literal["decimal"], + tuple[tuple[Literal["precision"], int], tuple[Literal["scale"], int]], +] class ListType( @@ -72,15 +72,6 @@ def __getattr__(self, name: str) -> BaseType: ... def __getitem__(self, name: str) -> BaseType: ... -class DecimalType( - _ParentType[ - Literal["decimal"], - tuple[tuple[Literal["precision"], int], tuple[Literal["scale"], int]], - ], - Protocol, -): ... - - def has_children( dtype: BaseType | _ParentType[_ID_co, _Children_co], ) -> TypeIs[_ParentType[_ID_co, _Children_co]]: From 9bb26cfbd76ff49430d5173d70642ddcf2448f66 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 19:54:17 +0000 Subject: [PATCH 16/30] refactor: Un-inline an import --- narwhals/_duckdb/typing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index b96832e206..12820f307b 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -3,6 +3,8 @@ from collections.abc import Sequence from typing import TYPE_CHECKING, Any, Literal, Protocol, TypedDict, overload +import duckdb + from narwhals._typing_compat import TypeVar if TYPE_CHECKING: @@ -79,8 +81,6 @@ def has_children( The only way to be sure is forcing an exception. """ - import duckdb - try: return hasattr(dtype, "children") except duckdb.InvalidInputException: From b4e4980b0ea5968a9e834d147085263d656aa118 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:42:52 +0000 Subject: [PATCH 17/30] rework into singular `is_dtype`, add docs - https://github.com/narwhals-dev/narwhals/pull/3189#discussion_r2420197664 - https://github.com/narwhals-dev/narwhals/pull/3189#discussion_r2420210095 - https://github.com/narwhals-dev/narwhals/pull/3189#pullrequestreview-3323207941 --- narwhals/_duckdb/typing.py | 102 ++++++++++++++++++++----------------- narwhals/_duckdb/utils.py | 21 ++++---- 2 files changed, 66 insertions(+), 57 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index 12820f307b..12524e75b1 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -11,6 +11,8 @@ from duckdb import DuckDBPyConnection, Expression from typing_extensions import TypeAlias, TypeIs +__all__ = ["BaseType", "WindowExpressionKwargs", "has_children", "is_dtype"] + class WindowExpressionKwargs(TypedDict, total=False): partition_by: Sequence[str | Expression] @@ -32,9 +34,22 @@ class WindowExpressionKwargs(TypedDict, total=False): _Child: TypeAlias = tuple[Literal["child"], DTypeT_co] _Size: TypeAlias = tuple[Literal["size"], int] _ID_co = TypeVar("_ID_co", bound=str, default=str, covariant=True) +_Array: TypeAlias = Literal["array"] +_Struct: TypeAlias = Literal["struct"] +_List: TypeAlias = Literal["list"] +_Enum: TypeAlias = Literal["enum"] +_Decimal: TypeAlias = Literal["decimal"] +_TimestampTZ: TypeAlias = Literal["timestamp with time zone"] class BaseType(Protocol[_ID_co]): + """Structural equivalent to [`DuckDBPyType`]. + + Excludes attributes which are unsafe to use on most types. + + [`DuckDBPyType`]: https://github.com/duckdb/duckdb-python/blob/df7789cbd31b2d2b8d03d012f14331bc3297fb2d/_duckdb-stubs/_sqltypes.pyi#L35-L75 + """ + def __eq__(self, other: object) -> bool: ... def __hash__(self) -> int: ... @overload @@ -45,41 +60,14 @@ def __init__(self, obj: object) -> None: ... def id(self) -> _ID_co: ... -class _ParentType(BaseType[_ID_co], Protocol[_ID_co, _Children_co]): - @property - def children(self) -> _Children_co: ... - - -ArrayType: TypeAlias = _ParentType[Literal["array"], tuple[_Child[DTypeT_co], _Size]] -EnumType: TypeAlias = _ParentType[ - Literal["enum"], tuple[tuple[Literal["values"], list[str]]] -] -DecimalType: TypeAlias = _ParentType[ - Literal["decimal"], - tuple[tuple[Literal["precision"], int], tuple[Literal["scale"], int]], -] - - -class ListType( - _ParentType[Literal["list"], tuple[_Child[DTypeT_co]]], Protocol[DTypeT_co] -): - @property - def child(self) -> DTypeT_co: ... - - -class StructType( - _ParentType[Literal["struct"], Sequence[tuple[str, BaseType]]], Protocol -): - def __getattr__(self, name: str) -> BaseType: ... - def __getitem__(self, name: str) -> BaseType: ... - - def has_children( dtype: BaseType | _ParentType[_ID_co, _Children_co], ) -> TypeIs[_ParentType[_ID_co, _Children_co]]: - """Using `_hasattr_static` returns `True` on any `DuckDBPyType. + """Return True if `dtype.children` can be accessed safely. - The only way to be sure is forcing an exception. + `_hasattr_static` returns True on *any* [`DuckDBPyType`], so the only way to be sure is by forcing an exception. + + [`DuckDBPyType`]: https://github.com/duckdb/duckdb-python/blob/df7789cbd31b2d2b8d03d012f14331bc3297fb2d/_duckdb-stubs/_sqltypes.pyi#L35-L75 """ try: return hasattr(dtype, "children") @@ -87,27 +75,47 @@ def has_children( return False -def is_dtype_array(obj: BaseType) -> TypeIs[ArrayType]: - return obj.id == "array" - - -def is_dtype_struct(obj: BaseType) -> TypeIs[StructType]: - return obj.id == "struct" +@overload +def is_dtype(obj: BaseType, type_id: _Array, /) -> TypeIs[ArrayType]: ... +@overload +def is_dtype(obj: BaseType, type_id: _Struct, /) -> TypeIs[StructType]: ... +@overload +def is_dtype(obj: BaseType, type_id: _List, /) -> TypeIs[ListType]: ... +@overload +def is_dtype(obj: BaseType, type_id: _Enum, /) -> TypeIs[EnumType]: ... +@overload +def is_dtype(obj: BaseType, type_id: _Decimal, /) -> TypeIs[DecimalType]: ... +@overload +def is_dtype( + obj: BaseType, type_id: _TimestampTZ, / +) -> TypeIs[BaseType[_TimestampTZ]]: ... +def is_dtype( + obj: BaseType, type_id: _Array | _Struct | _List | _Enum | _Decimal | _TimestampTZ, / +) -> bool: + """Return True if `obj` is the [`DuckDBPyType`] corresponding with `type_id`. + + [`DuckDBPyType`]: https://github.com/duckdb/duckdb-python/blob/df7789cbd31b2d2b8d03d012f14331bc3297fb2d/_duckdb-stubs/_sqltypes.pyi#L35-L75 + """ + return obj.id == type_id -def is_dtype_list(obj: BaseType) -> TypeIs[ListType]: - return obj.id == "list" +class _ParentType(BaseType[_ID_co], Protocol[_ID_co, _Children_co]): + @property + def children(self) -> _Children_co: ... -def is_dtype_enum(obj: BaseType) -> TypeIs[EnumType]: - return obj.id == "enum" +ArrayType: TypeAlias = _ParentType[_Array, tuple[_Child[DTypeT_co], _Size]] +EnumType: TypeAlias = _ParentType[_Enum, tuple[tuple[Literal["values"], list[str]]]] +DecimalType: TypeAlias = _ParentType[ + _Decimal, tuple[tuple[Literal["precision"], int], tuple[Literal["scale"], int]] +] -def is_dtype_timestamp_with_time_zone( - obj: BaseType, -) -> TypeIs[BaseType[Literal["timestamp with time zone"]]]: - return obj.id == "timestamp with time zone" +class ListType(_ParentType[_List, tuple[_Child[DTypeT_co]]], Protocol[DTypeT_co]): + @property + def child(self) -> DTypeT_co: ... -def is_dtype_decimal(obj: BaseType) -> TypeIs[DecimalType]: - return obj.id == "decimal" +class StructType(_ParentType[_Struct, Sequence[tuple[str, BaseType]]], Protocol): + def __getattr__(self, name: str) -> BaseType: ... + def __getitem__(self, name: str) -> BaseType: ... diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 94e0ca9b93..5178825833 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -6,7 +6,7 @@ import duckdb from duckdb import Expression -import narwhals._duckdb.typing as nw_dd_t +from narwhals._duckdb.typing import BaseType, has_children, is_dtype from narwhals._utils import Implementation, Version, isinstance_or_issubclass, zip_strict from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError @@ -17,6 +17,7 @@ from duckdb.sqltypes import DuckDBPyType from typing_extensions import TypeAlias + import narwhals._duckdb.typing from narwhals._compliant.typing import CompliantLazyFrameAny from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr @@ -149,27 +150,27 @@ def time_zone(self) -> str: def native_to_narwhals_dtype( - duckdb_dtype: nw_dd_t.BaseType, version: Version, deferred_time_zone: DeferredTimeZone + duckdb_dtype: BaseType, version: Version, deferred_time_zone: DeferredTimeZone ) -> DType: - if nw_dd_t.has_children(duckdb_dtype) and not nw_dd_t.is_dtype_decimal(duckdb_dtype): + if has_children(duckdb_dtype) and not is_dtype(duckdb_dtype, "decimal"): return _nested_native_to_narwhals_dtype(duckdb_dtype, version, deferred_time_zone) - if nw_dd_t.is_dtype_timestamp_with_time_zone(duckdb_dtype): + if is_dtype(duckdb_dtype, "timestamp with time zone"): return version.dtypes.Datetime(time_zone=deferred_time_zone.time_zone) return _non_nested_native_to_narwhals_dtype(duckdb_dtype.id, version) def _nested_native_to_narwhals_dtype( - duckdb_dtype: nw_dd_t._ParentType, + duckdb_dtype: narwhals._duckdb.typing._ParentType, version: Version, deferred_time_zone: DeferredTimeZone, ) -> DType: dtypes = version.dtypes - if nw_dd_t.is_dtype_list(duckdb_dtype): + if is_dtype(duckdb_dtype, "list"): return dtypes.List( native_to_narwhals_dtype(duckdb_dtype.child, version, deferred_time_zone) ) - if nw_dd_t.is_dtype_struct(duckdb_dtype): + if is_dtype(duckdb_dtype, "struct"): children = duckdb_dtype.children return dtypes.Struct( [ @@ -180,17 +181,17 @@ def _nested_native_to_narwhals_dtype( for child in children ] ) - if nw_dd_t.is_dtype_array(duckdb_dtype): + if is_dtype(duckdb_dtype, "array"): child, size = duckdb_dtype.children shape: list[int] = [size[1]] - while nw_dd_t.is_dtype_array(child[1]): + while is_dtype(child[1], "array"): child, size = child[1].children shape.insert(0, size[1]) inner = native_to_narwhals_dtype(child[1], version, deferred_time_zone) return dtypes.Array(inner=inner, shape=tuple(shape)) - if nw_dd_t.is_dtype_enum(duckdb_dtype): + if is_dtype(duckdb_dtype, "enum"): if version is Version.V1: return dtypes.Enum() # type: ignore[call-arg] categories = duckdb_dtype.children[0][1] From 7b43748f84a3d95ecaf1e9990e127b362ad6ec73 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 20:45:31 +0000 Subject: [PATCH 18/30] excuse me, what? https://github.com/narwhals-dev/narwhals/actions/runs/18416967212/job/52482837824?pr=3189 --- narwhals/_duckdb/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 5178825833..684f7f8676 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -193,7 +193,7 @@ def _nested_native_to_narwhals_dtype( return dtypes.Array(inner=inner, shape=tuple(shape)) if is_dtype(duckdb_dtype, "enum"): if version is Version.V1: - return dtypes.Enum() # type: ignore[call-arg] + return dtypes.Enum() # pyright: ignore[reportCallIssue] categories = duckdb_dtype.children[0][1] return dtypes.Enum(categories=categories) # `MAP`, `UNION` From 64f6b4f4017d0aca260386e33cab0175d1cd8a41 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 10 Oct 2025 23:08:21 +0000 Subject: [PATCH 19/30] fix: It found a bug!!! https://github.com/narwhals-dev/narwhals/pull/3189#discussion_r2422212545 --- narwhals/_duckdb/expr.py | 3 ++- narwhals/_duckdb/typing.py | 20 ++++++++++++++++++++ narwhals/_duckdb/utils.py | 6 +++--- tests/expr_and_series/is_in_test.py | 12 ++++++++++++ 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py index f2cd30e630..ec43d8ded1 100644 --- a/narwhals/_duckdb/expr.py +++ b/narwhals/_duckdb/expr.py @@ -258,7 +258,8 @@ def is_finite(self) -> Self: return self._with_elementwise(lambda expr: F("isfinite", expr)) def is_in(self, other: Sequence[Any]) -> Self: - return self._with_elementwise(lambda expr: F("contains", lit(other), expr)) + other_ = tuple(other) if not isinstance(other, (tuple, list)) else other + return self._with_elementwise(lambda expr: F("contains", lit(other_), expr)) def fill_null( self, diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index 12524e75b1..c6ccfdd758 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -8,11 +8,31 @@ from narwhals._typing_compat import TypeVar if TYPE_CHECKING: + import uuid + + import numpy as np + import pandas as pd from duckdb import DuckDBPyConnection, Expression from typing_extensions import TypeAlias, TypeIs + from narwhals.typing import Into1DArray, PythonLiteral + + __all__ = ["BaseType", "WindowExpressionKwargs", "has_children", "is_dtype"] +IntoDuckDBLiteral: TypeAlias = """ + PythonLiteral + | dict[Any, Any] + | uuid.UUID + | bytearray + | memoryview + | Into1DArray + | pd.api.typing.NaTType + | pd.api.typing.NAType + | np.ma.MaskedArray + | duckdb.Value + """ + class WindowExpressionKwargs(TypedDict, total=False): partition_by: Sequence[str | Expression] diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 684f7f8676..36dcf24587 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -6,7 +6,7 @@ import duckdb from duckdb import Expression -from narwhals._duckdb.typing import BaseType, has_children, is_dtype +from narwhals._duckdb.typing import BaseType, IntoDuckDBLiteral, has_children, is_dtype from narwhals._utils import Implementation, Version, isinstance_or_issubclass, zip_strict from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError @@ -22,7 +22,7 @@ from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr from narwhals.dtypes import DType - from narwhals.typing import IntoDType, NonNestedLiteral, TimeUnit + from narwhals.typing import IntoDType, TimeUnit Incomplete: TypeAlias = Any @@ -54,7 +54,7 @@ # TODO @dangotbanned: Raise an issue upstream on `Expression | str` too narrow -def lit(value: Expression | NonNestedLiteral | Sequence[Any]) -> Expression: +def lit(value: IntoDuckDBLiteral | Expression) -> Expression: """Alias for `duckdb.ConstantExpression`.""" lit_: Incomplete = duckdb.ConstantExpression return lit_(value) diff --git a/tests/expr_and_series/is_in_test.py b/tests/expr_and_series/is_in_test.py index 2ae6cabea5..e91ff2e8c8 100644 --- a/tests/expr_and_series/is_in_test.py +++ b/tests/expr_and_series/is_in_test.py @@ -26,6 +26,18 @@ def test_expr_is_in_empty_list(constructor: Constructor) -> None: assert_equal_data(result, expected) +def test_expr_is_in_iterable( + constructor: Constructor, request: pytest.FixtureRequest +) -> None: + if any(x in str(constructor) for x in ("sqlframe", "polars")): + request.applymarker(pytest.mark.xfail) + df = nw.from_native(constructor(data)) + sequence = 4, 2 + result = df.select(nw.col("a").is_in(iter(sequence))) + expected = {"a": [False, True, True, False]} + assert_equal_data(result, expected) + + def test_ser_is_in(constructor_eager: ConstructorEager) -> None: ser = nw.from_native(constructor_eager(data), eager_only=True)["a"] result = {"a": ser.is_in([4, 5])} From ed6f6b2463296f168efe416a4aeb5085096736a6 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 11 Oct 2025 10:17:38 +0000 Subject: [PATCH 20/30] fix(typing): Align `lambda_expr` to `cpp` implementation --- narwhals/_duckdb/typing.py | 15 ++++++++++++--- narwhals/_duckdb/utils.py | 15 +++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/narwhals/_duckdb/typing.py b/narwhals/_duckdb/typing.py index c6ccfdd758..43ca5a0509 100644 --- a/narwhals/_duckdb/typing.py +++ b/narwhals/_duckdb/typing.py @@ -1,9 +1,10 @@ from __future__ import annotations from collections.abc import Sequence -from typing import TYPE_CHECKING, Any, Literal, Protocol, TypedDict, overload +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypedDict, Union, overload import duckdb +from duckdb import Expression from narwhals._typing_compat import TypeVar @@ -12,13 +13,19 @@ import numpy as np import pandas as pd - from duckdb import DuckDBPyConnection, Expression + from duckdb import DuckDBPyConnection from typing_extensions import TypeAlias, TypeIs from narwhals.typing import Into1DArray, PythonLiteral -__all__ = ["BaseType", "WindowExpressionKwargs", "has_children", "is_dtype"] +__all__ = [ + "BaseType", + "IntoColumnExpr", + "WindowExpressionKwargs", + "has_children", + "is_dtype", +] IntoDuckDBLiteral: TypeAlias = """ PythonLiteral @@ -60,6 +67,8 @@ class WindowExpressionKwargs(TypedDict, total=False): _Enum: TypeAlias = Literal["enum"] _Decimal: TypeAlias = Literal["decimal"] _TimestampTZ: TypeAlias = Literal["timestamp with time zone"] +IntoColumnExpr: TypeAlias = Union[str, Expression] +"""A column name, or the result of calling `duckdb.ColumnExpression`.""" class BaseType(Protocol[_ID_co]): diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 36dcf24587..a48c81a2a1 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -6,7 +6,13 @@ import duckdb from duckdb import Expression -from narwhals._duckdb.typing import BaseType, IntoDuckDBLiteral, has_children, is_dtype +from narwhals._duckdb.typing import ( + BaseType, + IntoColumnExpr, + IntoDuckDBLiteral, + has_children, + is_dtype, +) from narwhals._utils import Implementation, Version, isinstance_or_issubclass, zip_strict from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError @@ -54,6 +60,7 @@ # TODO @dangotbanned: Raise an issue upstream on `Expression | str` too narrow +# NOTE: https://github.com/duckdb/duckdb-python/blob/df7789cbd31b2d2b8d03d012f14331bc3297fb2d/src/duckdb_py/native/python_conversion.cpp#L916-L1069 def lit(value: IntoDuckDBLiteral | Expression) -> Expression: """Alias for `duckdb.ConstantExpression`.""" lit_: Incomplete = duckdb.ConstantExpression @@ -67,10 +74,10 @@ def lit(value: IntoDuckDBLiteral | Expression) -> Expression: """Alias for `duckdb.FunctionExpression`.""" -# TODO @dangotbanned: Investigate `lhs: Expression | str | tuple[str]` -# Seems incorrect +# TODO @dangotbanned: Raise an issue upstream on `Expression | str | tuple[str` too narrow +# NOTE: https://github.com/duckdb/duckdb-python/blob/df7789cbd31b2d2b8d03d012f14331bc3297fb2d/src/duckdb_py/pyexpression.cpp#L361-L413 def lambda_expr( - params: str | Expression | tuple[Expression, ...], expr: Expression, / + params: IntoColumnExpr | tuple[IntoColumnExpr, ...], expr: Expression, / ) -> Expression: """Wraps [`duckdb.LambdaExpression`]. From eb8e329bde03ceae2247e9c1ffeda3f804123cca Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 11 Oct 2025 10:37:38 +0000 Subject: [PATCH 21/30] chore: Remove outdated comment --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2dd04fb7fd..b82adf7e67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,6 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "mypy~=1.15.0", "pyright", "pyarrow-stubs==19.2", - # https://github.com/eakmanrq/sqlframe/issues/531 "sqlframe>=3.43.5", "polars==1.34.0", "uv", From e1fe121b737013a9a288bf89383560028390c92b Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sat, 11 Oct 2025 22:35:30 +0000 Subject: [PATCH 22/30] revert: raising `UnsupportedDTypeError` resolves - https://github.com/narwhals-dev/narwhals/pull/3189#discussion_r2419291102 - https://github.com/akmalsoliev/Validoopsie/issues/30 --- narwhals/_duckdb/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index a047889a9a..159c2787bd 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -20,7 +20,7 @@ isinstance_or_issubclass, zip_strict, ) -from narwhals.exceptions import ColumnNotFoundError, UnsupportedDTypeError +from narwhals.exceptions import ColumnNotFoundError if TYPE_CHECKING: from collections.abc import Mapping, Sequence @@ -210,7 +210,7 @@ def _nested_native_to_narwhals_dtype( categories = duckdb_dtype.children[0][1] return dtypes.Enum(categories=categories) # `MAP`, `UNION` - raise UnsupportedDTypeError(duckdb_dtype) + return dtypes.Unknown() def fetch_rel_time_zone(rel: duckdb.DuckDBPyRelation) -> str: From fd9de79cc109d134676dbce55310cff5e02898f8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 12 Oct 2025 12:27:42 +0000 Subject: [PATCH 23/30] ci: Install `ibis` from git Suggested by @MarcoGorelli in https://github.com/narwhals-dev/narwhals/pull/3186#issuecomment-3385725260 --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b82adf7e67..3e4b7aca5f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "polars==1.34.0", "uv", "narwhals[ibis]", + "ibis-framework", # remove after `ibis>10.8.0` available ] typing-ci = [ "narwhals[dask,modin]", @@ -116,6 +117,10 @@ local-dev = [ {include-group = "docs"} ] +[tool.uv.sources] +# https://github.com/ibis-project/ibis/pull/11666 +ibis-framework = {git = "https://github.com/ibis-project/ibis", rev = "f9f23635fb2eea6bc7bc8e2b3b667033c1b20aed"} + [tool.hatch.build.targets.sdist] include = [ "narwhals/*", From f84344cbfe4c62ff7e6c194f36973c2c5bd4f211 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 12 Oct 2025 12:29:47 +0000 Subject: [PATCH 24/30] fix: mkdocs as well https://github.com/narwhals-dev/narwhals/actions/runs/18443926780/job/52548020591?pr=3189 --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 3e4b7aca5f..4c3c4bcba9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ docs = [ "jinja2", "duckdb", "narwhals[ibis]", + "ibis-framework", # remove after `ibis>10.8.0` available "markdown-exec[ansi]", "mkdocs", "mkdocs-autorefs", From aa9e43a54ba53275f3cc5502bc7da4fc1dafa624 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 12 Oct 2025 12:40:04 +0000 Subject: [PATCH 25/30] ci: maybe fix https://github.com/narwhals-dev/narwhals/actions/runs/18443952251/job/52548075953?pr=3189 --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f217b4824c..cbd757c2b9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -54,7 +54,7 @@ jobs: - name: install-reqs # we are not testing pyspark on Windows here because it is very slow # TODO(FBruzzesi): Unpin duckdb version once ibis makes a new release - run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra "duckdb<1.4" --system + run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group --system - name: show-deps run: uv pip freeze - name: Run pytest @@ -85,7 +85,7 @@ jobs: cache-dependency-glob: "pyproject.toml" - name: install-reqs # TODO(FBruzzesi): Unpin duckdb version once ibis makes a new release - run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra "duckdb<1.4" --system + run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group --system - name: show-deps run: uv pip freeze - name: Run pytest From 4637e0a1f934c07fe921208cb5296917de5ca4d1 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 12 Oct 2025 12:42:47 +0000 Subject: [PATCH 26/30] why do we have a group named extra --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index cbd757c2b9..1d396d694d 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -54,7 +54,7 @@ jobs: - name: install-reqs # we are not testing pyspark on Windows here because it is very slow # TODO(FBruzzesi): Unpin duckdb version once ibis makes a new release - run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group --system + run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group extra --system - name: show-deps run: uv pip freeze - name: Run pytest @@ -85,7 +85,7 @@ jobs: cache-dependency-glob: "pyproject.toml" - name: install-reqs # TODO(FBruzzesi): Unpin duckdb version once ibis makes a new release - run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group --system + run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group extra --system - name: show-deps run: uv pip freeze - name: Run pytest From 5e2f7dacc6fdd388d8770a1b699dc79212acb284 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 12 Oct 2025 13:20:39 +0000 Subject: [PATCH 27/30] cov: leave todo for (#3197) --- narwhals/_duckdb/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py index 159c2787bd..e5b1ae1533 100644 --- a/narwhals/_duckdb/utils.py +++ b/narwhals/_duckdb/utils.py @@ -209,8 +209,9 @@ def _nested_native_to_narwhals_dtype( return dtypes.Enum() # pyright: ignore[reportCallIssue] categories = duckdb_dtype.children[0][1] return dtypes.Enum(categories=categories) + # TODO @dangotbanned: Get coverage during https://github.com/narwhals-dev/narwhals/issues/3197 # `MAP`, `UNION` - return dtypes.Unknown() + return dtypes.Unknown() # pragma: no cover def fetch_rel_time_zone(rel: duckdb.DuckDBPyRelation) -> str: From a58e4ce17619a5819f804aae55879c8380c9fd9f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 12 Oct 2025 14:14:40 +0000 Subject: [PATCH 28/30] test: fix `ibis`, `sqlframe` xpass https://github.com/narwhals-dev/narwhals/pull/3189#issuecomment-3394351773 --- tests/expr_and_series/reduction_test.py | 9 +-------- tests/frame/collect_test.py | 9 ++------- tests/frame/unique_test.py | 20 ++++++++++++++++---- tests/utils.py | 24 ++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/tests/expr_and_series/reduction_test.py b/tests/expr_and_series/reduction_test.py index 2c672ce341..2e5da2213f 100644 --- a/tests/expr_and_series/reduction_test.py +++ b/tests/expr_and_series/reduction_test.py @@ -91,16 +91,9 @@ def test_empty_scalar_reduction_select(constructor: Constructor) -> None: assert_equal_data(result, expected) -def test_empty_scalar_reduction_with_columns( - constructor: Constructor, request: pytest.FixtureRequest -) -> None: +def test_empty_scalar_reduction_with_columns(constructor: Constructor) -> None: if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): pytest.skip() - if any(x in str(constructor) for x in ("sqlframe", "ibis")) and DUCKDB_VERSION >= ( - 1, - 4, - ): - request.applymarker(pytest.mark.xfail) from itertools import chain data = { diff --git a/tests/frame/collect_test.py b/tests/frame/collect_test.py index 135529ced9..3cc0973d19 100644 --- a/tests/frame/collect_test.py +++ b/tests/frame/collect_test.py @@ -7,7 +7,7 @@ import narwhals as nw from narwhals._utils import Implementation from narwhals.dependencies import get_cudf, get_modin, get_polars -from tests.utils import DUCKDB_VERSION, POLARS_VERSION, Constructor, assert_equal_data +from tests.utils import POLARS_VERSION, Constructor, assert_equal_data if TYPE_CHECKING: from narwhals._typing import Arrow, Dask, IntoBackend, Modin, Pandas, Polars @@ -163,12 +163,7 @@ def test_collect_with_kwargs(constructor: Constructor) -> None: assert_equal_data(result, expected) -def test_collect_empty(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if any(x in str(constructor) for x in ("sqlframe", "ibis")) and DUCKDB_VERSION >= ( - 1, - 4, - ): - request.applymarker(pytest.mark.xfail) +def test_collect_empty(constructor: Constructor) -> None: df = nw.from_native(constructor({"a": [1, 2, 3]})) lf = df.filter(nw.col("a").is_null()).with_columns(b=nw.lit(None)).lazy() result = lf.collect() diff --git a/tests/frame/unique_test.py b/tests/frame/unique_test.py index 0aa3f175f0..9bb57e8ee3 100644 --- a/tests/frame/unique_test.py +++ b/tests/frame/unique_test.py @@ -6,7 +6,14 @@ import narwhals as nw from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError -from tests.utils import DUCKDB_VERSION, Constructor, ConstructorEager, assert_equal_data +from tests.utils import ( + DUCKDB_VERSION, + IBIS_IS_GIT_SOURCE, + IBIS_VERSION, + Constructor, + ConstructorEager, + assert_equal_data, +) data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} @@ -157,9 +164,14 @@ def test_unique_none(constructor: Constructor) -> None: def test_unique_3069(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if "ibis" in str(constructor): - # https://github.com/ibis-project/ibis/issues/11591 - request.applymarker(pytest.mark.xfail) + request.applymarker( + pytest.mark.xfail( + "ibis" in str(constructor) + and IBIS_VERSION <= (10, 8) + and not IBIS_IS_GIT_SOURCE, + reason="https://github.com/ibis-project/ibis/issues/11591", + ) + ) if "duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3): pytest.skip() data = {"name": ["a", "b", "c"], "group": ["d", "e", "f"], "value": [1, 2, 3]} diff --git a/tests/utils.py b/tests/utils.py index 4e06c35063..d35577ec55 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,10 +1,12 @@ from __future__ import annotations +import json import math import os import sys import warnings from datetime import date, datetime +from importlib.metadata import PackageNotFoundError, distribution from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, cast @@ -33,7 +35,29 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: return (0, 0, 0) +def is_git_source(distribution_name: str, /) -> bool: # pragma: no cover + """Return True if `distribution_name` is installed and originated from a [git source]. + + Use to distinguish `__version__` which refers to the prior release, instead of a pre-release. + + [git source]: https://docs.astral.sh/uv/concepts/projects/dependencies/#git + """ + try: + dist = distribution(distribution_name) + except PackageNotFoundError: + return False + return bool( + (direct_url := dist.read_text("direct_url.json")) + and (backported_3_13_origin := json.loads(direct_url)) + # https://docs.python.org/3.13/library/importlib.metadata.html#importlib.metadata.Distribution + # https://github.com/python/cpython/pull/113175#issuecomment-1858138341 + and (vcs_info := backported_3_13_origin.get("vcs_info")) + and vcs_info.get("vcs") == "git" + ) + + IBIS_VERSION: tuple[int, ...] = get_module_version_as_tuple("ibis") +IBIS_IS_GIT_SOURCE: bool = is_git_source("ibis-framework") NUMPY_VERSION: tuple[int, ...] = get_module_version_as_tuple("numpy") PANDAS_VERSION: tuple[int, ...] = get_module_version_as_tuple("pandas") DUCKDB_VERSION: tuple[int, ...] = get_module_version_as_tuple("duckdb") From a98a3d9063e46ca7ccb96e9fcc50313533bd5303 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 15 Oct 2025 14:41:54 +0000 Subject: [PATCH 29/30] ci: Bump ibis https://github.com/ibis-project/ibis/releases/tag/11.0.0 --- .github/workflows/pytest.yml | 6 ++---- pyproject.toml | 6 +----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 1d396d694d..b584ad2d6a 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -53,8 +53,7 @@ jobs: cache-dependency-glob: "pyproject.toml" - name: install-reqs # we are not testing pyspark on Windows here because it is very slow - # TODO(FBruzzesi): Unpin duckdb version once ibis makes a new release - run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group extra --system + run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra --system - name: show-deps run: uv pip freeze - name: Run pytest @@ -84,8 +83,7 @@ jobs: cache-suffix: pytest-full-coverage-${{ matrix.python-version }} cache-dependency-glob: "pyproject.toml" - name: install-reqs - # TODO(FBruzzesi): Unpin duckdb version once ibis makes a new release - run: uv pip install -e ".[dask, modin, ibis]" ibis-framework --group core-tests --group extra --system + run: uv pip install -e ".[dask, modin, ibis]" --group core-tests --group extra --system - name: show-deps run: uv pip freeze - name: Run pytest diff --git a/pyproject.toml b/pyproject.toml index c7352c50f1..51f22dc5ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ typing = [ # keep some of these pinned and bump periodically so there's fewer s "polars==1.34.0", "uv", "narwhals[ibis]", - "ibis-framework", # remove after `ibis>10.8.0` available + "ibis-framework==11.0.0", ] typing-ci = [ "narwhals[dask,modin]", @@ -86,7 +86,6 @@ docs = [ "jinja2", "duckdb", "narwhals[ibis]", - "ibis-framework", # remove after `ibis>10.8.0` available "markdown-exec[ansi]", "mkdocs", "mkdocs-autorefs", @@ -118,9 +117,6 @@ local-dev = [ {include-group = "docs"} ] -[tool.uv.sources] -# https://github.com/ibis-project/ibis/pull/11666 -ibis-framework = {git = "https://github.com/ibis-project/ibis", rev = "f9f23635fb2eea6bc7bc8e2b3b667033c1b20aed"} [tool.hatch.build.targets.sdist] include = [ From 140951407bd5940c44cf1ab01898925489f2a72f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 15 Oct 2025 14:44:33 +0000 Subject: [PATCH 30/30] test: remove workarounds https://github.com/narwhals-dev/narwhals/pull/3189#issuecomment-3394351773 --- tests/frame/unique_test.py | 5 +---- tests/utils.py | 24 ------------------------ 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/tests/frame/unique_test.py b/tests/frame/unique_test.py index 9bb57e8ee3..870b4a1ac6 100644 --- a/tests/frame/unique_test.py +++ b/tests/frame/unique_test.py @@ -8,7 +8,6 @@ from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError from tests.utils import ( DUCKDB_VERSION, - IBIS_IS_GIT_SOURCE, IBIS_VERSION, Constructor, ConstructorEager, @@ -166,9 +165,7 @@ def test_unique_none(constructor: Constructor) -> None: def test_unique_3069(constructor: Constructor, request: pytest.FixtureRequest) -> None: request.applymarker( pytest.mark.xfail( - "ibis" in str(constructor) - and IBIS_VERSION <= (10, 8) - and not IBIS_IS_GIT_SOURCE, + "ibis" in str(constructor) and IBIS_VERSION < (11,), reason="https://github.com/ibis-project/ibis/issues/11591", ) ) diff --git a/tests/utils.py b/tests/utils.py index 9b5f324f62..4fc11492a1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,12 +1,10 @@ from __future__ import annotations -import json import math import os import sys import warnings from datetime import date, datetime -from importlib.metadata import PackageNotFoundError, distribution from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, cast @@ -35,29 +33,7 @@ def get_module_version_as_tuple(module_name: str) -> tuple[int, ...]: return (0, 0, 0) -def is_git_source(distribution_name: str, /) -> bool: # pragma: no cover - """Return True if `distribution_name` is installed and originated from a [git source]. - - Use to distinguish `__version__` which refers to the prior release, instead of a pre-release. - - [git source]: https://docs.astral.sh/uv/concepts/projects/dependencies/#git - """ - try: - dist = distribution(distribution_name) - except PackageNotFoundError: - return False - return bool( - (direct_url := dist.read_text("direct_url.json")) - and (backported_3_13_origin := json.loads(direct_url)) - # https://docs.python.org/3.13/library/importlib.metadata.html#importlib.metadata.Distribution - # https://github.com/python/cpython/pull/113175#issuecomment-1858138341 - and (vcs_info := backported_3_13_origin.get("vcs_info")) - and vcs_info.get("vcs") == "git" - ) - - IBIS_VERSION: tuple[int, ...] = get_module_version_as_tuple("ibis") -IBIS_IS_GIT_SOURCE: bool = is_git_source("ibis-framework") NUMPY_VERSION: tuple[int, ...] = get_module_version_as_tuple("numpy") PANDAS_VERSION: tuple[int, ...] = get_module_version_as_tuple("pandas") DUCKDB_VERSION: tuple[int, ...] = get_module_version_as_tuple("duckdb")