Skip to content

Commit 669a258

Browse files
cmp0xffDr-Irv
andauthored
fix(series): arithmetics for Series[Any] (#1343)
* fix(series): arithmetics for Series[Any] * fix(comment): #1343 (comment) * chore(typing): update mypy and ty * fix(comment): without str #1343 (comment) * handle str ops * feat(series): implement the proposal * fix(comment): reduce with pytest.raises(AssertionError) * doc(comment): #1343 (review) #1343 (comment) * fix for nightly numpy * fix: Never * chore: typo --------- Co-authored-by: Irv Lustig <irv@princeton.com>
1 parent d20612e commit 669a258

File tree

11 files changed

+569
-379
lines changed

11 files changed

+569
-379
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ repos:
1111
hooks:
1212
- id: isort
1313
- repo: https://github.com/astral-sh/ruff-pre-commit
14-
rev: v0.12.3
14+
rev: v0.12.10
1515
hooks:
1616
- id: ruff-check
1717
args: [

docs/philosophy.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,43 @@ The type `TimestampSeries` is the result of creating a series from `pd.to_dateti
6161
the type `TimedeltaSeries` is the result of subtracting two `TimestampSeries` as well as
6262
the result of `pd.to_timedelta()`.
6363

64+
### Generic Series have restricted arithmetic
65+
66+
Consider the following Series from a DataFrame:
67+
68+
```python
69+
import pandas as pd
70+
from typing_extensions import reveal_type
71+
from typing import TYPE_CHECKING, cast
72+
73+
if TYPE_CHECKING:
74+
from pandas.core.series import TimestampSeries # noqa: F401
75+
76+
77+
frame = pd.DataFrame({"timestamp": [pd.Timestamp(2025, 8, 26)], "tag": ["one"], "value": [1.0]})
78+
values = frame["value"]
79+
reveal_type(values) # type checker: Series[Any], runtime: Series
80+
new_values = values + 2
81+
82+
timestamps = frame["timestamp"]
83+
reveal_type(timestamps) # type checker: Series[Any], runtime: Series
84+
reveal_type(timestamps - pd.Timestamp(2025, 7, 12)) # type checker: Unknown and error, runtime: Series
85+
reveal_type(cast("TimestampSeries", timestamps) - pd.Timestamp(2025, 7, 12)) # type checker: TimedeltaSeries, runtime: Series
86+
87+
tags = frame["tag"]
88+
reveal_type("suffix" + tags) # type checker: Never, runtime: Series
89+
```
90+
91+
Since they are taken from a DataFrame, all three of them, `values`, `timestamps`
92+
and `tags`, are recognized by type checkers as `Series[Any]`. The code snippet
93+
runs fine at runtime. In the stub for type checking, however, we restrict
94+
generic Series to perform arithmetic operations only with numeric types, and
95+
give `Series[Any]` for the results. For `Timedelta`, `Timestamp`, `str`, etc.,
96+
arithmetic is restricted to `Series[Any]` and the result is either undefined,
97+
showing `Unknown` and errors, or `Never`. Users are encouraged to cast such
98+
generic Series to ones with concrete types, so that type checkers can provide
99+
meaningful results.
100+
64101
### Interval is Generic
65102

66103
A pandas `Interval` can be a time interval, an interval of integers, or an interval of

pandas-stubs/core/series.pyi

Lines changed: 69 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,6 @@ from pandas._typing import (
186186
np_ndarray_anyint,
187187
np_ndarray_bool,
188188
np_ndarray_complex,
189-
np_ndarray_dt,
190189
np_ndarray_float,
191190
np_ndarray_str,
192191
np_ndarray_td,
@@ -261,9 +260,20 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]):
261260
value: S1 | ArrayLike | Series[S1] | None,
262261
) -> None: ...
263262

264-
_ListLike: TypeAlias = (
263+
_ListLike: TypeAlias = ArrayLike | dict[_str, np.ndarray] | SequenceNotStr[S1]
264+
_ListLikeS1: TypeAlias = (
265265
ArrayLike | dict[_str, np.ndarray] | Sequence[S1] | IndexOpsMixin[S1]
266266
)
267+
_NumListLike: TypeAlias = (
268+
ExtensionArray
269+
| np_ndarray_bool
270+
| np_ndarray_anyint
271+
| np_ndarray_float
272+
| np_ndarray_complex
273+
| dict[_str, np.ndarray]
274+
| Sequence[complex]
275+
| IndexOpsMixin[complex]
276+
)
267277

268278
class Series(IndexOpsMixin[S1], NDFrame):
269279
# Define __index__ because mypy thinks Series follows protocol `SupportsIndex` https://github.com/pandas-dev/pandas-stubs/pull/1332#discussion_r2285648790
@@ -419,7 +429,9 @@ class Series(IndexOpsMixin[S1], NDFrame):
419429
@overload
420430
def __new__(
421431
cls,
422-
data: S1 | _ListLike[S1] | dict[HashableT1, S1] | KeysView[S1] | ValuesView[S1],
432+
data: (
433+
S1 | _ListLikeS1[S1] | dict[HashableT1, S1] | KeysView[S1] | ValuesView[S1]
434+
),
423435
index: AxesData | None = ...,
424436
dtype: Dtype = ...,
425437
name: Hashable = ...,
@@ -1619,7 +1631,9 @@ class Series(IndexOpsMixin[S1], NDFrame):
16191631
# just failed to generate these so I couldn't match
16201632
# them up.
16211633
@overload
1622-
def __add__(self: Series[Never], other: Scalar | _ListLike | Series) -> Series: ...
1634+
def __add__(self: Series[Never], other: _str) -> Never: ...
1635+
@overload
1636+
def __add__(self: Series[Never], other: complex | _ListLike | Series) -> Series: ...
16231637
@overload
16241638
def __add__(self, other: Series[Never]) -> Series: ...
16251639
@overload
@@ -1697,7 +1711,15 @@ class Series(IndexOpsMixin[S1], NDFrame):
16971711
@overload
16981712
def add(
16991713
self: Series[Never],
1700-
other: Scalar | _ListLike | Series,
1714+
other: _str,
1715+
level: Level | None = None,
1716+
fill_value: float | None = None,
1717+
axis: int = 0,
1718+
) -> Never: ...
1719+
@overload
1720+
def add(
1721+
self: Series[Never],
1722+
other: complex | _ListLike | Series,
17011723
level: Level | None = None,
17021724
fill_value: float | None = None,
17031725
axis: int = 0,
@@ -1840,7 +1862,11 @@ class Series(IndexOpsMixin[S1], NDFrame):
18401862
axis: int = 0,
18411863
) -> Series[_str]: ...
18421864
@overload # type: ignore[override]
1843-
def __radd__(self: Series[Never], other: Scalar | _ListLike) -> Series: ...
1865+
def __radd__(self: Series[Never], other: _str) -> Never: ...
1866+
@overload
1867+
def __radd__(
1868+
self: Series[Never], other: complex | _ListLike | Series
1869+
) -> Series: ...
18441870
@overload
18451871
def __radd__(
18461872
self: Series[bool],
@@ -1912,7 +1938,23 @@ class Series(IndexOpsMixin[S1], NDFrame):
19121938
@overload
19131939
def radd(
19141940
self: Series[Never],
1915-
other: Scalar | _ListLike | Series,
1941+
other: _str,
1942+
level: Level | None = None,
1943+
fill_value: float | None = None,
1944+
axis: int = 0,
1945+
) -> Never: ...
1946+
@overload
1947+
def radd(
1948+
self: Series[Never],
1949+
other: complex | _ListLike | Series,
1950+
level: Level | None = None,
1951+
fill_value: float | None = None,
1952+
axis: int = 0,
1953+
) -> Series: ...
1954+
@overload
1955+
def radd(
1956+
self: Series[S1],
1957+
other: Series[Never],
19161958
level: Level | None = None,
19171959
fill_value: float | None = None,
19181960
axis: int = 0,
@@ -2051,7 +2093,9 @@ class Series(IndexOpsMixin[S1], NDFrame):
20512093
self, other: S1 | _ListLike | Series[S1] | datetime | timedelta | date
20522094
) -> Series[_bool]: ...
20532095
@overload
2054-
def __mul__(self: Series[Never], other: complex | _ListLike | Series) -> Series: ...
2096+
def __mul__(
2097+
self: Series[Never], other: complex | _NumListLike | Series
2098+
) -> Series: ...
20552099
@overload
20562100
def __mul__(self, other: Series[Never]) -> Series: ... # type: ignore[overload-overlap]
20572101
@overload
@@ -2246,7 +2290,7 @@ class Series(IndexOpsMixin[S1], NDFrame):
22462290
) -> TimedeltaSeries: ...
22472291
@overload
22482292
def __rmul__(
2249-
self: Series[Never], other: complex | _ListLike | Series
2293+
self: Series[Never], other: complex | _NumListLike | Series
22502294
) -> Series: ...
22512295
@overload
22522296
def __rmul__(self, other: Series[Never]) -> Series: ... # type: ignore[overload-overlap]
@@ -2475,12 +2519,11 @@ class Series(IndexOpsMixin[S1], NDFrame):
24752519
@overload
24762520
def __rxor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
24772521
@overload
2478-
def __sub__(
2479-
self: Series[Never],
2480-
other: datetime | np.datetime64 | np_ndarray_dt | TimestampSeries,
2481-
) -> TimedeltaSeries: ...
2522+
def __sub__(self: Series[Never], other: TimestampSeries) -> Never: ...
24822523
@overload
2483-
def __sub__(self: Series[Never], other: complex | _ListLike | Series) -> Series: ...
2524+
def __sub__(
2525+
self: Series[Never], other: complex | _NumListLike | Series
2526+
) -> Series: ...
24842527
@overload
24852528
def __sub__(self, other: Series[Never]) -> Series: ... # type: ignore[overload-overlap]
24862529
@overload
@@ -2571,15 +2614,15 @@ class Series(IndexOpsMixin[S1], NDFrame):
25712614
@overload
25722615
def sub(
25732616
self: Series[Never],
2574-
other: datetime | np.datetime64 | np_ndarray_dt | TimestampSeries,
2617+
other: TimestampSeries,
25752618
level: Level | None = None,
25762619
fill_value: float | None = None,
25772620
axis: int = 0,
2578-
) -> TimedeltaSeries: ...
2621+
) -> Never: ...
25792622
@overload
25802623
def sub(
25812624
self: Series[Never],
2582-
other: complex | _ListLike | Series,
2625+
other: complex | _NumListLike | Series,
25832626
level: Level | None = None,
25842627
fill_value: float | None = None,
25852628
axis: int = 0,
@@ -2705,13 +2748,10 @@ class Series(IndexOpsMixin[S1], NDFrame):
27052748
axis: int = 0,
27062749
) -> TimedeltaSeries: ...
27072750
@overload
2708-
def __rsub__( # type: ignore[misc]
2709-
self: Series[Never],
2710-
other: datetime | np.datetime64 | np_ndarray_dt | TimestampSeries,
2711-
) -> TimedeltaSeries: ...
2751+
def __rsub__(self: Series[Never], other: TimestampSeries) -> Never: ... # type: ignore[misc]
27122752
@overload
27132753
def __rsub__(
2714-
self: Series[Never], other: complex | _ListLike | Series
2754+
self: Series[Never], other: complex | _NumListLike | Series
27152755
) -> Series: ...
27162756
@overload
27172757
def __rsub__(self, other: Series[Never]) -> Series: ...
@@ -2781,15 +2821,15 @@ class Series(IndexOpsMixin[S1], NDFrame):
27812821
@overload
27822822
def rsub(
27832823
self: Series[Never],
2784-
other: datetime | np.datetime64 | np_ndarray_dt | TimestampSeries,
2824+
other: TimestampSeries,
27852825
level: Level | None = None,
27862826
fill_value: float | None = None,
27872827
axis: int = 0,
2788-
) -> TimedeltaSeries: ...
2828+
) -> Never: ...
27892829
@overload
27902830
def rsub(
27912831
self: Series[Never],
2792-
other: complex | _ListLike | Series,
2832+
other: complex | _NumListLike | Series,
27932833
level: Level | None = None,
27942834
fill_value: float | None = None,
27952835
axis: int = 0,
@@ -2887,8 +2927,8 @@ class Series(IndexOpsMixin[S1], NDFrame):
28872927
axis: int = 0,
28882928
) -> Series[complex]: ...
28892929
@overload
2890-
def __truediv__(
2891-
self: Series[Never], other: complex | _ListLike | Series
2930+
def __truediv__( # type:ignore[overload-overlap]
2931+
self: Series[Never], other: complex | _NumListLike | Series
28922932
) -> Series: ...
28932933
@overload
28942934
def __truediv__(self, other: Series[Never]) -> Series: ...
@@ -3083,8 +3123,8 @@ class Series(IndexOpsMixin[S1], NDFrame):
30833123
) -> Series: ...
30843124
div = truediv
30853125
@overload
3086-
def __rtruediv__(
3087-
self: Series[Never], other: complex | _ListLike | Series
3126+
def __rtruediv__( # type:ignore[overload-overlap]
3127+
self: Series[Never], other: complex | _NumListLike | Series
30883128
) -> Series: ...
30893129
@overload
30903130
def __rtruediv__(self, other: Series[Never]) -> Series: ...

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,12 @@ types-pytz = ">= 2022.1.1"
3535
numpy = ">= 1.23.5"
3636

3737
[tool.poetry.group.dev.dependencies]
38-
mypy = "1.17.0"
38+
mypy = "1.17.1"
3939
pandas = "2.3.1"
4040
pyarrow = ">=10.0.1"
4141
pytest = ">=7.1.2"
4242
pyright = ">=1.1.404"
43-
ty = "^0.0.1a8"
43+
ty = "^0.0.1a9"
4444
pyrefly = "^0.21.0"
4545
poethepoet = ">=0.16.5"
4646
loguru = ">=0.6.0"

tests/series/arithmetic/str/test_add.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
def test_add_py_scalar() -> None:
21-
"""Testpd.Series[str]+ Python native 'scalar's"""
21+
"""Test pd.Series[str] + Python native 'scalar's"""
2222
i = 4
2323
r0 = "right"
2424

@@ -35,12 +35,12 @@ def test_add_py_scalar() -> None:
3535
check(assert_type(left.add(r0), "pd.Series[str]"), pd.Series, str)
3636

3737
if TYPE_CHECKING_INVALID_USAGE:
38-
left.radd(i) # type: ignore[call-overload] # pyright: ignore[reportArgumentType]
38+
left.radd(i) # type: ignore[call-overload] # pyright: ignore[reportArgumentType, reportCallIssue]
3939
check(assert_type(left.radd(r0), "pd.Series[str]"), pd.Series, str)
4040

4141

4242
def test_add_py_sequence() -> None:
43-
"""Testpd.Series[str]+ Python native sequence"""
43+
"""Test pd.Series[str] + Python native sequence"""
4444
i = [3, 5, 8]
4545
r0 = ["a", "bc", "def"]
4646
r1 = tuple(r0)
@@ -61,13 +61,13 @@ def test_add_py_sequence() -> None:
6161
check(assert_type(left.add(r1), "pd.Series[str]"), pd.Series, str)
6262

6363
if TYPE_CHECKING_INVALID_USAGE:
64-
left.radd(i) # type: ignore[arg-type] # pyright: ignore[reportArgumentType]
64+
left.radd(i) # type: ignore[arg-type] # pyright: ignore[reportArgumentType,reportCallIssue]
6565
check(assert_type(left.radd(r0), "pd.Series[str]"), pd.Series, str)
6666
check(assert_type(left.radd(r1), "pd.Series[str]"), pd.Series, str)
6767

6868

6969
def test_add_numpy_array() -> None:
70-
"""Testpd.Series[str]+ numpy array"""
70+
"""Test pd.Series[str] + numpy array"""
7171
i = np.array([3, 5, 8], np.int64)
7272
r0 = np.array(["a", "bc", "def"], np.str_)
7373

@@ -96,12 +96,12 @@ def test_add_numpy_array() -> None:
9696
check(assert_type(left.add(r0), "pd.Series[str]"), pd.Series, str)
9797

9898
if TYPE_CHECKING_INVALID_USAGE:
99-
left.radd(i) # type: ignore[arg-type] # pyright: ignore[reportArgumentType]
99+
left.radd(i) # type: ignore[arg-type] # pyright: ignore[reportArgumentType, reportCallIssue]
100100
check(assert_type(left.radd(r0), "pd.Series[str]"), pd.Series, str)
101101

102102

103103
def test_add_pd_series() -> None:
104-
"""Testpd.Series[str]+ pandas series"""
104+
"""Test pd.Series[str] + pandas series"""
105105
i = pd.Series([3, 5, 8])
106106
r0 = pd.Series(["a", "bc", "def"])
107107

@@ -118,5 +118,5 @@ def test_add_pd_series() -> None:
118118
check(assert_type(left.add(r0), "pd.Series[str]"), pd.Series, str)
119119

120120
if TYPE_CHECKING_INVALID_USAGE:
121-
left.radd(i) # type: ignore[arg-type] # pyright: ignore[reportArgumentType]
121+
left.radd(i) # type: ignore[arg-type] # pyright: ignore[reportArgumentType, reportCallIssue]
122122
check(assert_type(left.radd(r0), "pd.Series[str]"), pd.Series, str)

0 commit comments

Comments
 (0)