From a9df51b017f02bd005972b2bdaf18a5e3c383541 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 00:46:10 +0200 Subject: [PATCH 1/7] CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarray in .array/EA.to_numpy() --- pandas/core/arrays/_mixins.py | 3 ++ pandas/core/arrays/arrow/array.py | 3 ++ pandas/core/arrays/base.py | 65 +++++++++++++++++++++++++ pandas/core/arrays/datetimelike.py | 7 ++- pandas/core/arrays/interval.py | 3 ++ pandas/core/arrays/masked.py | 14 +++++- pandas/core/arrays/numpy_.py | 23 +++++++-- pandas/core/arrays/period.py | 6 ++- pandas/core/arrays/sparse/array.py | 8 ++- pandas/core/arrays/string_.py | 3 ++ pandas/core/dtypes/astype.py | 11 +++++ pandas/core/indexes/base.py | 2 + pandas/core/internals/blocks.py | 6 ++- pandas/core/series.py | 5 +- pandas/tests/extension/base/setitem.py | 57 ++++++++++++++++++++++ pandas/tests/extension/date/array.py | 3 ++ pandas/tests/extension/decimal/array.py | 3 ++ pandas/tests/extension/json/array.py | 3 ++ 18 files changed, 215 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 26585e7bab8e3..2eb235f87d231 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -252,6 +252,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: return self._from_backing_data(new_values) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 919453b29b7f9..61082f920a4ab 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1967,6 +1967,9 @@ def __setitem__(self, key, value) -> None: ------- None """ + if self._readonly: + raise ValueError("Cannot modify readonly array") + # GH50085: unwrap 1D indexers if isinstance(key, tuple) and len(key) == 1: key = key[0] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d11e2271f9574..5d32e7f025416 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -40,6 +40,7 @@ validate_insert_loc, ) +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.cast import maybe_cast_pointwise_result from pandas.core.dtypes.common import ( is_list_like, @@ -269,6 +270,8 @@ class ExtensionArray: # strictly less than 2000 to be below Index.__pandas_priority__. __pandas_priority__ = 1000 + _readonly = False + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ @@ -482,6 +485,11 @@ def __setitem__(self, key, value) -> None: Returns ------- None + + Raises + ------ + ValueError + If the array is readonly and modification is attempted. """ # Some notes to the ExtensionArray implementer who may have ended up # here. While this method is not required for the interface, if you @@ -501,8 +509,59 @@ def __setitem__(self, key, value) -> None: # __init__ method coerces that value, then so should __setitem__ # Note, also, that Series/DataFrame.where internally use __setitem__ # on a copy of the data. + # Check if the array is readonly + if self._readonly: + raise ValueError("Cannot modify readonly array") + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") + @property + def readonly(self) -> bool: + """ + Whether the array is readonly. + + If True, attempts to modify the array via __setitem__ will raise + a ValueError. + + Returns + ------- + bool + True if the array is readonly, False otherwise. + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr.readonly + False + >>> arr.readonly = True + >>> arr[0] = 5 + Traceback (most recent call last): + ... + ValueError: Cannot modify readonly ExtensionArray + """ + return getattr(self, "_readonly", False) + + @readonly.setter + def readonly(self, value: bool) -> None: + """ + Set the readonly state of the array. + + Parameters + ---------- + value : bool + True to make the array readonly, False to make it writable. + + Examples + -------- + >>> arr = pd.array([1, 2, 3]) + >>> arr.readonly = True + >>> arr.readonly + True + """ + if not isinstance(value, bool): + raise TypeError("readonly must be a boolean") + self._readonly = value + def __len__(self) -> int: """ Length of this array @@ -595,8 +654,14 @@ def to_numpy( result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() + elif self._readonly and astype_is_view(self.dtype, result.dtype): + # If the ExtensionArray is readonly, make the numpy array readonly too + result = result.view() + result.flags.writeable = False + if na_value is not lib.no_default: result[self.isna()] = na_value # type: ignore[index] + return result # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d5e654c95577e..91f6646d07c21 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -368,7 +368,12 @@ def __array__( if copy is True: return np.array(self._ndarray, dtype=dtype) - return self._ndarray + + result = self._ndarray + if self._readonly: + result = result.view() + result.flags.writeable = False + return result @overload def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ... diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4bcbe2eedee47..9996eb93ea8f0 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -729,6 +729,9 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index fefd70fef35c9..59cbda6fd1e64 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -23,6 +23,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( is_bool, @@ -290,6 +291,9 @@ def _validate_setitem_value(self, value): raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'") def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + key = check_array_indexer(self, key) if is_scalar(value): @@ -520,6 +524,9 @@ def to_numpy( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) data = self._data.astype(dtype, copy=copy) + if self._readonly and astype_is_view(self.dtype, dtype): + data = data.view() + data.flags.writeable = False return data @doc(ExtensionArray.tolist) @@ -596,7 +603,12 @@ def __array__( if copy is False: if not self._hasna: # special case, here we can simply return the underlying data - return np.array(self._data, dtype=dtype, copy=copy) + result = np.array(self._data, dtype=dtype, copy=copy) + # If the ExtensionArray is readonly, make the numpy array readonly too + if self._readonly: + result = result.view() + result.flags.writeable = False + return result raise ValueError( "Unable to avoid copy while creating an array as requested." ) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index fd2c8c9d63362..ebede913e77fc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -12,7 +12,10 @@ from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv -from pandas.core.dtypes.astype import astype_array +from pandas.core.dtypes.astype import ( + astype_array, + astype_is_view, +) from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import NumpyEADtype @@ -160,8 +163,19 @@ def __array__( ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support - return np.array(self._ndarray, dtype=dtype, copy=copy) - return np.asarray(self._ndarray, dtype=dtype) + result = np.array(self._ndarray, dtype=dtype, copy=copy) + else: + result = np.asarray(self._ndarray, dtype=dtype) + + if ( + self._readonly + and not copy + and (dtype is None or astype_is_view(self.dtype, dtype)) + ): + result = result.view() + result.flags.writeable = False + + return result def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # Lightly modified version of @@ -512,6 +526,9 @@ def to_numpy( result[mask] = na_value else: result = self._ndarray + if not copy and self._readonly: + result = result.view() + result.flags.writeable = False result = np.asarray(result, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ae92e17332c76..d2b3448a59896 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -393,7 +393,11 @@ def __array__( # For NumPy 1.x compatibility we cannot use copy=None. And # `copy=False` has the meaning of `copy=None` here: if not copy: - return np.asarray(self.asi8, dtype=dtype) + result = np.asarray(self.asi8, dtype=dtype) + if self._readonly: + result = result.view() + result.flags.writeable = False + return result else: return np.array(self.asi8, dtype=dtype) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 137dbb6e4d139..fbf457e414c13 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -562,7 +562,11 @@ def __array__( if copy is True: return np.array(self.sp_values) else: - return self.sp_values + result = self.sp_values + if self._readonly: + result = result.view() + result.flags.writeable = False + return result if copy is False: raise ValueError( @@ -591,6 +595,8 @@ def __array__( return out def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") # I suppose we could allow setting of non-fill_value elements. # TODO(SparseArray.__setitem__): remove special cases in # ExtensionBlock.where diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f52b709a59de9..abf77b57e2a49 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -769,6 +769,9 @@ def _maybe_convert_setitem_value(self, value): return value def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + value = self._maybe_convert_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..f9cdb50254730 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -24,8 +24,11 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, ExtensionDtype, + IntervalDtype, NumpyEADtype, + PeriodDtype, ) if TYPE_CHECKING: @@ -283,6 +286,14 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) + elif new_dtype == object and isinstance(dtype, (PeriodDtype, IntervalDtype)): + return False + + elif isinstance(dtype, CategoricalDtype) and not isinstance( + new_dtype, CategoricalDtype + ): + return False + numpy_dtype = getattr(dtype, "numpy_dtype", None) new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8c5a03a6de50..4172e159b0d90 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4972,6 +4972,8 @@ def array(self) -> ExtensionArray: from pandas.core.arrays.numpy_ import NumpyExtensionArray array = NumpyExtensionArray(array) + array = array.view() + array._readonly = True return array @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6aa5062b8ed86..f778cf618d077 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2359,7 +2359,9 @@ def external_values(values: ArrayLike) -> ArrayLike: if isinstance(values, np.ndarray): values = values.view() values.flags.writeable = False - - # TODO(CoW) we should also mark our ExtensionArrays as read-only + else: + # ExtensionArrays + values = values.view() + values._readonly = True return values diff --git a/pandas/core/series.py b/pandas/core/series.py index ce5b2e5ed8de5..1bd46f779831b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -820,7 +820,10 @@ def _references(self) -> BlockValuesRefs: @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[prop-decorator] @property def array(self) -> ExtensionArray: - return self._mgr.array_values() + arr = self._mgr.array_values() + arr = arr.view() + arr._readonly = True + return arr def __len__(self) -> int: """ diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 185d6d750cace..3c81ff4d457ea 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -454,3 +454,60 @@ def test_setitem_2d_values(self, data): df.loc[[0, 1], :] = df.loc[[1, 0], :].values assert (df.loc[0, :] == original[1]).all() assert (df.loc[1, :] == original[0]).all() + + def test_readonly_property(self, data): + assert data.readonly is False + + data.readonly = True + assert data.readonly is True + + data_orig = data.copy() + assert data_orig.readonly is False + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[0] = data[1] + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[0:3] = data[1] + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[np.array([True] * len(data))] = data[1] + + tm.assert_extension_array_equal(data, data_orig) + + def test_readonly_propagates_to_numpy_array(self, data): + data.readonly = True + + # when we ask for a copy, the result should never be readonly + arr = np.array(data) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = np.asarray(data) + arr2 = np.asarray(data) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + def test_readonly_propagates_to_numpy_array_method(self, data): + data.readonly = True + + # when we ask for a copy, the result should never be readonly + arr = data.to_numpy(copy=True) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = data.to_numpy(copy=False) + arr2 = data.to_numpy(copy=False) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + # non-NA fill value should always result in a copy + if data.isna().any(): + arr = data.to_numpy(copy=False, na_value=data[0]) + assert arr.flags.writeable diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index 0c51570189a7c..e6a4441bca97c 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -149,6 +149,9 @@ def __getitem__(self, item: PositionalIndexer): raise NotImplementedError("only ints are supported as indexes") def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if not isinstance(key, int): raise NotImplementedError("only ints are supported as indexes") diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2ee6a73ec4054..de8e45679ea59 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -195,6 +195,9 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if is_list_like(value): if is_scalar(key): raise ValueError("setting an array element with a sequence.") diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index b110911bda400..7c01bdef88c0a 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -120,6 +120,9 @@ def __getitem__(self, item): return type(self)([self.data[i] for i in item]) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if isinstance(key, numbers.Integral): self.data[key] = value else: From 9cd6e4f39fc109ed5326606cb11c6905d8b34716 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 01:16:39 +0200 Subject: [PATCH 2/7] cleanup --- pandas/core/arrays/base.py | 47 -------------------------------------- 1 file changed, 47 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 5d32e7f025416..5c488b6aa0906 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -515,53 +515,6 @@ def __setitem__(self, key, value) -> None: raise NotImplementedError(f"{type(self)} does not implement __setitem__.") - @property - def readonly(self) -> bool: - """ - Whether the array is readonly. - - If True, attempts to modify the array via __setitem__ will raise - a ValueError. - - Returns - ------- - bool - True if the array is readonly, False otherwise. - - Examples - -------- - >>> arr = pd.array([1, 2, 3]) - >>> arr.readonly - False - >>> arr.readonly = True - >>> arr[0] = 5 - Traceback (most recent call last): - ... - ValueError: Cannot modify readonly ExtensionArray - """ - return getattr(self, "_readonly", False) - - @readonly.setter - def readonly(self, value: bool) -> None: - """ - Set the readonly state of the array. - - Parameters - ---------- - value : bool - True to make the array readonly, False to make it writable. - - Examples - -------- - >>> arr = pd.array([1, 2, 3]) - >>> arr.readonly = True - >>> arr.readonly - True - """ - if not isinstance(value, bool): - raise TypeError("readonly must be a boolean") - self._readonly = value - def __len__(self) -> int: """ Length of this array From c6f37d1f0cfd50c70aa729da03c355eb2f17e24b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 01:36:38 +0200 Subject: [PATCH 3/7] fixup attribute name in tests --- pandas/tests/extension/base/setitem.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 3c81ff4d457ea..853b314e76a8e 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -456,13 +456,13 @@ def test_setitem_2d_values(self, data): assert (df.loc[1, :] == original[0]).all() def test_readonly_property(self, data): - assert data.readonly is False + assert data._readonly is False - data.readonly = True - assert data.readonly is True + data._readonly = True + assert data._readonly is True data_orig = data.copy() - assert data_orig.readonly is False + assert data_orig._readonly is False with pytest.raises(ValueError, match="Cannot modify readonly array"): data[0] = data[1] @@ -476,7 +476,7 @@ def test_readonly_property(self, data): tm.assert_extension_array_equal(data, data_orig) def test_readonly_propagates_to_numpy_array(self, data): - data.readonly = True + data._readonly = True # when we ask for a copy, the result should never be readonly arr = np.array(data) @@ -492,7 +492,7 @@ def test_readonly_propagates_to_numpy_array(self, data): assert arr1.flags.writeable def test_readonly_propagates_to_numpy_array_method(self, data): - data.readonly = True + data._readonly = True # when we ask for a copy, the result should never be readonly arr = data.to_numpy(copy=True) From 8058d9ad53ef933ec3fb650c6bd8caf597dc1da8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 08:49:59 +0200 Subject: [PATCH 4/7] fix tests --- pandas/_libs/ops.pyx | 4 ++-- pandas/tests/arrays/integer/test_indexing.py | 2 +- pandas/tests/arrays/numpy_/test_numpy.py | 7 ------- pandas/tests/arrays/test_datetimelike.py | 4 ++-- pandas/tests/extension/test_common.py | 2 +- pandas/tests/extension/test_numpy.py | 2 +- pandas/tests/internals/test_internals.py | 2 +- pandas/tests/series/test_constructors.py | 4 ++-- 8 files changed, 10 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 567bfc02a2950..8b53e842a7988 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(object[:] values, object val, object op) -> ndarray: +def scalar_binop(ndarray[object] values, object val, object op) -> ndarray: """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(object[:] left, object[:] right, object op) -> ndarray: +def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray: """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. diff --git a/pandas/tests/arrays/integer/test_indexing.py b/pandas/tests/arrays/integer/test_indexing.py index 4b953d699108b..ce801db5cb58d 100644 --- a/pandas/tests/arrays/integer/test_indexing.py +++ b/pandas/tests/arrays/integer/test_indexing.py @@ -12,7 +12,7 @@ def test_array_setitem_nullable_boolean_mask(): def test_array_setitem(): # GH 31446 - arr = pd.Series([1, 2], dtype="Int64").array + arr = pd.array([1, 2], dtype="Int64") arr[arr > 1] = 1 expected = pd.array([1, 1], dtype="Int64") diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 620a553d5a731..af3a793a3ef77 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -159,13 +159,6 @@ def test_to_numpy(): # Setitem -def test_setitem_series(): - ser = pd.Series([1, 2, 3]) - ser.array[0] = 10 - expected = pd.Series([10, 2, 3]) - tm.assert_series_equal(ser, expected) - - def test_setitem(any_numpy_array): nparr = any_numpy_array arr = NumpyExtensionArray(nparr, copy=True) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d1ef29b0bf8a0..a2eeee1447111 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats): @pytest.mark.parametrize( "arr", [ - pd.date_range("2000", periods=4).array, - pd.timedelta_range("2000", periods=4).array, + pd.date_range("2000", periods=4).array.copy(), + pd.timedelta_range("2000", periods=4).array.copy(), ], ) def test_to_numpy_extra(arr): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 5eda0f00f54ca..9f782770a02e4 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -101,5 +101,5 @@ def test_ellipsis_index(): # String comparison because there's no native way to compare slices. # Before the fix for GH#42430, last_item_arg would get set to the 2D slice # (Ellipsis, slice(None, 1, None)) - out = df["col1"].array.last_item_arg + out = df["col1"]._values.last_item_arg assert str(out) == "slice(None, 1, None)" diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 79cfb736941d6..e6301d9a23604 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -77,7 +77,7 @@ def allow_in_pandas(monkeypatch): @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return pd.Series([(i,) for i in range(100)]).array + return pd.Series([(i,) for i in range(100)]).array.copy() return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 11e6b99204aee..2622310aaacd6 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1347,7 +1347,7 @@ def check_series_setitem(self, elem, index: Index, inplace: bool): ser[: len(elem)] = elem if inplace: - assert ser.array is arr # i.e. setting was done inplace + assert ser._values is arr # i.e. setting was done inplace else: assert ser.dtype == object diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f82451a2be84d..f8af23de89297 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -525,9 +525,9 @@ def test_categorical_sideeffects_free(self): # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=False) - assert s.values is cat + assert s._values is cat s = s.cat.rename_categories([1, 2, 3]) - assert s.values is not cat + assert s._values is not cat exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) From 91465ee00540b109892c38b40520cbe1ef112b44 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 10:05:00 +0200 Subject: [PATCH 5/7] more test fixes --- pandas/core/arrays/sparse/array.py | 2 ++ pandas/core/dtypes/astype.py | 5 ++++- pandas/tests/base/test_conversion.py | 2 +- pandas/tests/extension/base/setitem.py | 7 ++++++- pandas/tests/extension/json/array.py | 1 - pandas/tests/extension/json/test_json.py | 6 ++++++ 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fbf457e414c13..2912444a5185e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -975,6 +975,8 @@ def __getitem__( # _NestedSequence[Union[bool, int]]], ...]]" data_slice = self.to_dense()[key] # type: ignore[index] elif isinstance(key, slice): + if key == slice(None): + return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype) # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: start = 0 if key.start is None else key.start diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index f9cdb50254730..75464cbb2b5f9 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -25,6 +25,7 @@ ) from pandas.core.dtypes.dtypes import ( CategoricalDtype, + DatetimeTZDtype, ExtensionDtype, IntervalDtype, NumpyEADtype, @@ -286,7 +287,9 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) - elif new_dtype == object and isinstance(dtype, (PeriodDtype, IntervalDtype)): + elif new_dtype == object and isinstance( + dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype) + ): return False elif isinstance(dtype, CategoricalDtype) and not isinstance( diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index e3a821519c638..bddd28d29ba1c 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -285,7 +285,7 @@ def test_array(arr, attr, index_or_series): arr = getattr(arr, attr) result = getattr(result, attr) - assert result is arr + assert np.shares_memory(result, arr) def test_array_multiindex_raises(): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 853b314e76a8e..e9389a4e57549 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -510,4 +510,9 @@ def test_readonly_propagates_to_numpy_array_method(self, data): # non-NA fill value should always result in a copy if data.isna().any(): arr = data.to_numpy(copy=False, na_value=data[0]) - assert arr.flags.writeable + if isinstance(data.dtype, pd.ArrowDtype) and data.dtype.kind == "f": + # for float dtype, after the fillna, the conversion from pyarrow to + # numpy is zero-copy, and pyarrow will mark the array as readonly + assert not arr.flags.writeable + else: + assert arr.flags.writeable diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 7c01bdef88c0a..7e224b2416e10 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -155,7 +155,6 @@ def __array__(self, dtype=None, copy=None): raise ValueError( "Unable to avoid copy while creating an array as requested." ) - if dtype is None: dtype = object if dtype == object: diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 4bc9562f1895d..895d5df8810ad 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -418,6 +418,12 @@ def test_setitem_slice_array(self, data): def test_setitem_invalid(self, data, invalid_scalar): super().test_setitem_invalid(data, invalid_scalar) + @pytest.mark.xfail( + reason="result readonly flag is incorrect and does not support na_value" + ) + def test_readonly_propagates_to_numpy_array_method(self, data): + super().test_readonly_propagates_to_numpy_array_method(data) + @pytest.mark.xfail(reason="only integer scalar arrays can be converted") def test_setitem_2d_values(self, data): super().test_setitem_2d_values(data) From 856dc0242d2f67c7caa52ec2a1d34729fb100a47 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 23 Jul 2025 10:21:30 +0200 Subject: [PATCH 6/7] add tests for .array being readonly --- pandas/tests/copy_view/test_array.py | 37 +++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/pandas/tests/copy_view/test_array.py b/pandas/tests/copy_view/test_array.py index 2b3ef9201d918..81b77bb4abdee 100644 --- a/pandas/tests/copy_view/test_array.py +++ b/pandas/tests/copy_view/test_array.py @@ -19,10 +19,11 @@ "method", [ lambda ser: ser.values, + lambda ser: np.asarray(ser.array), lambda ser: np.asarray(ser), lambda ser: np.array(ser, copy=False), ], - ids=["values", "asarray", "array"], + ids=["values", "array", "np.asarray", "np.array"], ) def test_series_values(method): ser = Series([1, 2, 3], name="name") @@ -105,24 +106,38 @@ def test_series_to_numpy(): assert arr.flags.writeable is True -def test_series_array_ea_dtypes(): +@pytest.mark.parametrize( + "method", + [ + lambda ser: np.asarray(ser.array), + lambda ser: np.asarray(ser), + lambda ser: np.asarray(ser, dtype="int64"), + lambda ser: np.array(ser, copy=False), + ], + ids=["array", "np.asarray", "np.asarray-dtype", "np.array"], +) +def test_series_values_ea_dtypes(method): ser = Series([1, 2, 3], dtype="Int64") - arr = np.asarray(ser, dtype="int64") - assert np.shares_memory(arr, get_array(ser)) - assert arr.flags.writeable is False + arr = method(ser) - arr = np.asarray(ser) assert np.shares_memory(arr, get_array(ser)) assert arr.flags.writeable is False -def test_dataframe_array_ea_dtypes(): +@pytest.mark.parametrize( + "method", + [ + lambda df: df.values, + lambda df: np.asarray(df), + lambda df: np.asarray(df, dtype="int64"), + lambda df: np.array(df, copy=False), + ], + ids=["values", "np.asarray", "np.asarray-dtype", "np.array"], +) +def test_dataframe_array_ea_dtypes(method): df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") - arr = np.asarray(df, dtype="int64") - assert np.shares_memory(arr, get_array(df, "a")) - assert arr.flags.writeable is False + arr = method(df) - arr = np.asarray(df) assert np.shares_memory(arr, get_array(df, "a")) assert arr.flags.writeable is False From ee1ed6ed56f8ebfa1068fe9e5ae60694fa11001e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 3 Aug 2025 11:48:12 +0200 Subject: [PATCH 7/7] typing --- pandas/core/arrays/_mixins.py | 6 ++++++ pandas/core/arrays/_utils.py | 2 +- pandas/core/arrays/base.py | 6 ++++++ pandas/core/arrays/numpy_.py | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 2eb235f87d231..87eed4d8f32cc 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -115,6 +115,12 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ + @overload + def view(self) -> Self: ... + + @overload + def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... + def view(self, dtype: Dtype | None = None) -> ArrayLike: # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py index 6b46396d5efdf..5a5ac6cb48039 100644 --- a/pandas/core/arrays/_utils.py +++ b/pandas/core/arrays/_utils.py @@ -22,7 +22,7 @@ def to_numpy_dtype_inference( arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool -) -> tuple[npt.DTypeLike, Any]: +) -> tuple[np.dtype | None, Any]: if dtype is None and is_numeric_dtype(arr.dtype): dtype_given = False if hasna: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9a1bd9c453b21..4ac48716b9561 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1853,6 +1853,12 @@ def copy(self) -> Self: """ raise AbstractMethodError(self) + @overload + def view(self) -> Self: ... + + @overload + def view(self, dtype: Dtype | None = ...) -> ArrayLike: ... + def view(self, dtype: Dtype | None = None) -> ArrayLike: """ Return a view on the array. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ebede913e77fc..37c3e574124ed 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -159,7 +159,7 @@ def dtype(self) -> NumpyEADtype: # NumPy Array Interface def __array__( - self, dtype: NpDtype | None = None, copy: bool | None = None + self, dtype: np.dtype | None = None, copy: bool | None = None ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support