diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 567bfc02a2950..8b53e842a7988 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(object[:] values, object val, object op) -> ndarray: +def scalar_binop(ndarray[object] values, object val, object op) -> ndarray: """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(object[:] left, object[:] right, object op) -> ndarray: +def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray: """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 26585e7bab8e3..2eb235f87d231 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -252,6 +252,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self: return self._from_backing_data(new_values) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 919453b29b7f9..61082f920a4ab 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1967,6 +1967,9 @@ def __setitem__(self, key, value) -> None: ------- None """ + if self._readonly: + raise ValueError("Cannot modify readonly array") + # GH50085: unwrap 1D indexers if isinstance(key, tuple) and len(key) == 1: key = key[0] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index d11e2271f9574..5c488b6aa0906 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -40,6 +40,7 @@ validate_insert_loc, ) +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.cast import maybe_cast_pointwise_result from pandas.core.dtypes.common import ( is_list_like, @@ -269,6 +270,8 @@ class ExtensionArray: # strictly less than 2000 to be below Index.__pandas_priority__. __pandas_priority__ = 1000 + _readonly = False + # ------------------------------------------------------------------------ # Constructors # ------------------------------------------------------------------------ @@ -482,6 +485,11 @@ def __setitem__(self, key, value) -> None: Returns ------- None + + Raises + ------ + ValueError + If the array is readonly and modification is attempted. """ # Some notes to the ExtensionArray implementer who may have ended up # here. While this method is not required for the interface, if you @@ -501,6 +509,10 @@ def __setitem__(self, key, value) -> None: # __init__ method coerces that value, then so should __setitem__ # Note, also, that Series/DataFrame.where internally use __setitem__ # on a copy of the data. + # Check if the array is readonly + if self._readonly: + raise ValueError("Cannot modify readonly array") + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") def __len__(self) -> int: @@ -595,8 +607,14 @@ def to_numpy( result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() + elif self._readonly and astype_is_view(self.dtype, result.dtype): + # If the ExtensionArray is readonly, make the numpy array readonly too + result = result.view() + result.flags.writeable = False + if na_value is not lib.no_default: result[self.isna()] = na_value # type: ignore[index] + return result # ------------------------------------------------------------------------ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d5e654c95577e..91f6646d07c21 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -368,7 +368,12 @@ def __array__( if copy is True: return np.array(self._ndarray, dtype=dtype) - return self._ndarray + + result = self._ndarray + if self._readonly: + result = result.view() + result.flags.writeable = False + return result @overload def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ... diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4bcbe2eedee47..9996eb93ea8f0 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -729,6 +729,9 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA: return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type] def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index fefd70fef35c9..59cbda6fd1e64 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -23,6 +23,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +from pandas.core.dtypes.astype import astype_is_view from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( is_bool, @@ -290,6 +291,9 @@ def _validate_setitem_value(self, value): raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'") def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + key = check_array_indexer(self, key) if is_scalar(value): @@ -520,6 +524,9 @@ def to_numpy( with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) data = self._data.astype(dtype, copy=copy) + if self._readonly and astype_is_view(self.dtype, dtype): + data = data.view() + data.flags.writeable = False return data @doc(ExtensionArray.tolist) @@ -596,7 +603,12 @@ def __array__( if copy is False: if not self._hasna: # special case, here we can simply return the underlying data - return np.array(self._data, dtype=dtype, copy=copy) + result = np.array(self._data, dtype=dtype, copy=copy) + # If the ExtensionArray is readonly, make the numpy array readonly too + if self._readonly: + result = result.view() + result.flags.writeable = False + return result raise ValueError( "Unable to avoid copy while creating an array as requested." ) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index fd2c8c9d63362..ebede913e77fc 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -12,7 +12,10 @@ from pandas._libs.tslibs import is_supported_dtype from pandas.compat.numpy import function as nv -from pandas.core.dtypes.astype import astype_array +from pandas.core.dtypes.astype import ( + astype_array, + astype_is_view, +) from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import NumpyEADtype @@ -160,8 +163,19 @@ def __array__( ) -> np.ndarray: if copy is not None: # Note: branch avoids `copy=None` for NumPy 1.x support - return np.array(self._ndarray, dtype=dtype, copy=copy) - return np.asarray(self._ndarray, dtype=dtype) + result = np.array(self._ndarray, dtype=dtype, copy=copy) + else: + result = np.asarray(self._ndarray, dtype=dtype) + + if ( + self._readonly + and not copy + and (dtype is None or astype_is_view(self.dtype, dtype)) + ): + result = result.view() + result.flags.writeable = False + + return result def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # Lightly modified version of @@ -512,6 +526,9 @@ def to_numpy( result[mask] = na_value else: result = self._ndarray + if not copy and self._readonly: + result = result.view() + result.flags.writeable = False result = np.asarray(result, dtype=dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ae92e17332c76..d2b3448a59896 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -393,7 +393,11 @@ def __array__( # For NumPy 1.x compatibility we cannot use copy=None. And # `copy=False` has the meaning of `copy=None` here: if not copy: - return np.asarray(self.asi8, dtype=dtype) + result = np.asarray(self.asi8, dtype=dtype) + if self._readonly: + result = result.view() + result.flags.writeable = False + return result else: return np.array(self.asi8, dtype=dtype) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 137dbb6e4d139..2912444a5185e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -562,7 +562,11 @@ def __array__( if copy is True: return np.array(self.sp_values) else: - return self.sp_values + result = self.sp_values + if self._readonly: + result = result.view() + result.flags.writeable = False + return result if copy is False: raise ValueError( @@ -591,6 +595,8 @@ def __array__( return out def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") # I suppose we could allow setting of non-fill_value elements. # TODO(SparseArray.__setitem__): remove special cases in # ExtensionBlock.where @@ -969,6 +975,8 @@ def __getitem__( # _NestedSequence[Union[bool, int]]], ...]]" data_slice = self.to_dense()[key] # type: ignore[index] elif isinstance(key, slice): + if key == slice(None): + return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype) # Avoid densifying when handling contiguous slices if key.step is None or key.step == 1: start = 0 if key.start is None else key.start diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f52b709a59de9..abf77b57e2a49 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -769,6 +769,9 @@ def _maybe_convert_setitem_value(self, value): return value def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + value = self._maybe_convert_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 086f7d2da6640..75464cbb2b5f9 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -24,8 +24,12 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, ExtensionDtype, + IntervalDtype, NumpyEADtype, + PeriodDtype, ) if TYPE_CHECKING: @@ -283,6 +287,16 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool: new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype) return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None) + elif new_dtype == object and isinstance( + dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype) + ): + return False + + elif isinstance(dtype, CategoricalDtype) and not isinstance( + new_dtype, CategoricalDtype + ): + return False + numpy_dtype = getattr(dtype, "numpy_dtype", None) new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e8c5a03a6de50..4172e159b0d90 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4972,6 +4972,8 @@ def array(self) -> ExtensionArray: from pandas.core.arrays.numpy_ import NumpyExtensionArray array = NumpyExtensionArray(array) + array = array.view() + array._readonly = True return array @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6aa5062b8ed86..f778cf618d077 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2359,7 +2359,9 @@ def external_values(values: ArrayLike) -> ArrayLike: if isinstance(values, np.ndarray): values = values.view() values.flags.writeable = False - - # TODO(CoW) we should also mark our ExtensionArrays as read-only + else: + # ExtensionArrays + values = values.view() + values._readonly = True return values diff --git a/pandas/core/series.py b/pandas/core/series.py index ce5b2e5ed8de5..1bd46f779831b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -820,7 +820,10 @@ def _references(self) -> BlockValuesRefs: @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[prop-decorator] @property def array(self) -> ExtensionArray: - return self._mgr.array_values() + arr = self._mgr.array_values() + arr = arr.view() + arr._readonly = True + return arr def __len__(self) -> int: """ diff --git a/pandas/tests/arrays/integer/test_indexing.py b/pandas/tests/arrays/integer/test_indexing.py index 4b953d699108b..ce801db5cb58d 100644 --- a/pandas/tests/arrays/integer/test_indexing.py +++ b/pandas/tests/arrays/integer/test_indexing.py @@ -12,7 +12,7 @@ def test_array_setitem_nullable_boolean_mask(): def test_array_setitem(): # GH 31446 - arr = pd.Series([1, 2], dtype="Int64").array + arr = pd.array([1, 2], dtype="Int64") arr[arr > 1] = 1 expected = pd.array([1, 1], dtype="Int64") diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index 620a553d5a731..af3a793a3ef77 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -159,13 +159,6 @@ def test_to_numpy(): # Setitem -def test_setitem_series(): - ser = pd.Series([1, 2, 3]) - ser.array[0] = 10 - expected = pd.Series([10, 2, 3]) - tm.assert_series_equal(ser, expected) - - def test_setitem(any_numpy_array): nparr = any_numpy_array arr = NumpyExtensionArray(nparr, copy=True) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d1ef29b0bf8a0..a2eeee1447111 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats): @pytest.mark.parametrize( "arr", [ - pd.date_range("2000", periods=4).array, - pd.timedelta_range("2000", periods=4).array, + pd.date_range("2000", periods=4).array.copy(), + pd.timedelta_range("2000", periods=4).array.copy(), ], ) def test_to_numpy_extra(arr): diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index e3a821519c638..bddd28d29ba1c 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -285,7 +285,7 @@ def test_array(arr, attr, index_or_series): arr = getattr(arr, attr) result = getattr(result, attr) - assert result is arr + assert np.shares_memory(result, arr) def test_array_multiindex_raises(): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 185d6d750cace..e9389a4e57549 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -454,3 +454,65 @@ def test_setitem_2d_values(self, data): df.loc[[0, 1], :] = df.loc[[1, 0], :].values assert (df.loc[0, :] == original[1]).all() assert (df.loc[1, :] == original[0]).all() + + def test_readonly_property(self, data): + assert data._readonly is False + + data._readonly = True + assert data._readonly is True + + data_orig = data.copy() + assert data_orig._readonly is False + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[0] = data[1] + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[0:3] = data[1] + + with pytest.raises(ValueError, match="Cannot modify readonly array"): + data[np.array([True] * len(data))] = data[1] + + tm.assert_extension_array_equal(data, data_orig) + + def test_readonly_propagates_to_numpy_array(self, data): + data._readonly = True + + # when we ask for a copy, the result should never be readonly + arr = np.array(data) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = np.asarray(data) + arr2 = np.asarray(data) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + def test_readonly_propagates_to_numpy_array_method(self, data): + data._readonly = True + + # when we ask for a copy, the result should never be readonly + arr = data.to_numpy(copy=True) + assert arr.flags.writeable + + # when we don't ask for a copy -> if the conversion is zero-copy, + # the result should be readonly + arr1 = data.to_numpy(copy=False) + arr2 = data.to_numpy(copy=False) + if np.shares_memory(arr1, arr2): + assert not arr1.flags.writeable + else: + assert arr1.flags.writeable + + # non-NA fill value should always result in a copy + if data.isna().any(): + arr = data.to_numpy(copy=False, na_value=data[0]) + if isinstance(data.dtype, pd.ArrowDtype) and data.dtype.kind == "f": + # for float dtype, after the fillna, the conversion from pyarrow to + # numpy is zero-copy, and pyarrow will mark the array as readonly + assert not arr.flags.writeable + else: + assert arr.flags.writeable diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index 0c51570189a7c..e6a4441bca97c 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -149,6 +149,9 @@ def __getitem__(self, item: PositionalIndexer): raise NotImplementedError("only ints are supported as indexes") def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if not isinstance(key, int): raise NotImplementedError("only ints are supported as indexes") diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 2ee6a73ec4054..de8e45679ea59 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -195,6 +195,9 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy=copy) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if is_list_like(value): if is_scalar(key): raise ValueError("setting an array element with a sequence.") diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index b110911bda400..7e224b2416e10 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -120,6 +120,9 @@ def __getitem__(self, item): return type(self)([self.data[i] for i in item]) def __setitem__(self, key, value) -> None: + if self._readonly: + raise ValueError("Cannot modify readonly array") + if isinstance(key, numbers.Integral): self.data[key] = value else: @@ -152,7 +155,6 @@ def __array__(self, dtype=None, copy=None): raise ValueError( "Unable to avoid copy while creating an array as requested." ) - if dtype is None: dtype = object if dtype == object: diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 4bc9562f1895d..895d5df8810ad 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -418,6 +418,12 @@ def test_setitem_slice_array(self, data): def test_setitem_invalid(self, data, invalid_scalar): super().test_setitem_invalid(data, invalid_scalar) + @pytest.mark.xfail( + reason="result readonly flag is incorrect and does not support na_value" + ) + def test_readonly_propagates_to_numpy_array_method(self, data): + super().test_readonly_propagates_to_numpy_array_method(data) + @pytest.mark.xfail(reason="only integer scalar arrays can be converted") def test_setitem_2d_values(self, data): super().test_setitem_2d_values(data) diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 5eda0f00f54ca..9f782770a02e4 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -101,5 +101,5 @@ def test_ellipsis_index(): # String comparison because there's no native way to compare slices. # Before the fix for GH#42430, last_item_arg would get set to the 2D slice # (Ellipsis, slice(None, 1, None)) - out = df["col1"].array.last_item_arg + out = df["col1"]._values.last_item_arg assert str(out) == "slice(None, 1, None)" diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 79cfb736941d6..e6301d9a23604 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -77,7 +77,7 @@ def allow_in_pandas(monkeypatch): @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": - return pd.Series([(i,) for i in range(100)]).array + return pd.Series([(i,) for i in range(100)]).array.copy() return NumpyExtensionArray(np.arange(1, 101, dtype=dtype._dtype)) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 11e6b99204aee..2622310aaacd6 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1347,7 +1347,7 @@ def check_series_setitem(self, elem, index: Index, inplace: bool): ser[: len(elem)] = elem if inplace: - assert ser.array is arr # i.e. setting was done inplace + assert ser._values is arr # i.e. setting was done inplace else: assert ser.dtype == object diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f82451a2be84d..f8af23de89297 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -525,9 +525,9 @@ def test_categorical_sideeffects_free(self): # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=False) - assert s.values is cat + assert s._values is cat s = s.cat.rename_categories([1, 2, 3]) - assert s.values is not cat + assert s._values is not cat exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s)