Skip to content

CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarray in .array/EA.to_numpy() #61925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr

@cython.wraparound(False)
@cython.boundscheck(False)
def scalar_binop(object[:] values, object val, object op) -> ndarray:
def scalar_binop(ndarray[object] values, object val, object op) -> ndarray:
"""
Apply the given binary operator `op` between each element of the array
`values` and the scalar `val`.
Expand Down Expand Up @@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray:

@cython.wraparound(False)
@cython.boundscheck(False)
def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
"""
Apply the given binary operator `op` pointwise to the elements of
arrays `left` and `right`.
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self:
return self._from_backing_data(new_values)

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify readonly array")

key = check_array_indexer(self, key)
value = self._validate_setitem_value(value)
self._ndarray[key] = value
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1967,6 +1967,9 @@ def __setitem__(self, key, value) -> None:
-------
None
"""
if self._readonly:
raise ValueError("Cannot modify readonly array")

# GH50085: unwrap 1D indexers
if isinstance(key, tuple) and len(key) == 1:
key = key[0]
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
validate_insert_loc,
)

from pandas.core.dtypes.astype import astype_is_view
from pandas.core.dtypes.cast import maybe_cast_pointwise_result
from pandas.core.dtypes.common import (
is_list_like,
Expand Down Expand Up @@ -269,6 +270,8 @@ class ExtensionArray:
# strictly less than 2000 to be below Index.__pandas_priority__.
__pandas_priority__ = 1000

_readonly = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use arr.flags.writeable to be consistent with numpy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because this was easier for a quick POC ;)
It would indeed keep it more consistent in usage, so that might be a reason to add a flags attribute, so code that needs to work with both ndarray or EA can use one code path. But I don't think we would ever add any of the other flags that numpy has, so not sure it would then be worth to add a nested attribute for this.


# ------------------------------------------------------------------------
# Constructors
# ------------------------------------------------------------------------
Expand Down Expand Up @@ -482,6 +485,11 @@ def __setitem__(self, key, value) -> None:
Returns
-------
None

Raises
------
ValueError
If the array is readonly and modification is attempted.
"""
# Some notes to the ExtensionArray implementer who may have ended up
# here. While this method is not required for the interface, if you
Expand All @@ -501,6 +509,10 @@ def __setitem__(self, key, value) -> None:
# __init__ method coerces that value, then so should __setitem__
# Note, also, that Series/DataFrame.where internally use __setitem__
# on a copy of the data.
# Check if the array is readonly
if self._readonly:
raise ValueError("Cannot modify readonly array")

raise NotImplementedError(f"{type(self)} does not implement __setitem__.")

def __len__(self) -> int:
Expand Down Expand Up @@ -595,8 +607,14 @@ def to_numpy(
result = np.asarray(self, dtype=dtype)
if copy or na_value is not lib.no_default:
result = result.copy()
elif self._readonly and astype_is_view(self.dtype, result.dtype):
# If the ExtensionArray is readonly, make the numpy array readonly too
result = result.view()
result.flags.writeable = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be done below the setting of na_value on L616?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, because in that case the result array is already a copy, so no need to take a read-only view in that case


if na_value is not lib.no_default:
result[self.isna()] = na_value # type: ignore[index]

return result

# ------------------------------------------------------------------------
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,12 @@ def __array__(

if copy is True:
return np.array(self._ndarray, dtype=dtype)
return self._ndarray

result = self._ndarray
if self._readonly:
result = result.view()
result.flags.writeable = False
return result

@overload
def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ...
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,9 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA:
return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify readonly array")

value_left, value_right = self._validate_setitem_value(value)
key = check_array_indexer(self, key)

Expand Down
14 changes: 13 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc

from pandas.core.dtypes.astype import astype_is_view
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.common import (
is_bool,
Expand Down Expand Up @@ -290,6 +291,9 @@ def _validate_setitem_value(self, value):
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify readonly array")

key = check_array_indexer(self, key)

if is_scalar(value):
Expand Down Expand Up @@ -520,6 +524,9 @@ def to_numpy(
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
data = self._data.astype(dtype, copy=copy)
if self._readonly and astype_is_view(self.dtype, dtype):
data = data.view()
data.flags.writeable = False
return data

@doc(ExtensionArray.tolist)
Expand Down Expand Up @@ -596,7 +603,12 @@ def __array__(
if copy is False:
if not self._hasna:
# special case, here we can simply return the underlying data
return np.array(self._data, dtype=dtype, copy=copy)
result = np.array(self._data, dtype=dtype, copy=copy)
# If the ExtensionArray is readonly, make the numpy array readonly too
if self._readonly:
result = result.view()
result.flags.writeable = False
return result
raise ValueError(
"Unable to avoid copy while creating an array as requested."
)
Expand Down
23 changes: 20 additions & 3 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from pandas._libs.tslibs import is_supported_dtype
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.astype import astype_array
from pandas.core.dtypes.astype import (
astype_array,
astype_is_view,
)
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.dtypes import NumpyEADtype
Expand Down Expand Up @@ -160,8 +163,19 @@ def __array__(
) -> np.ndarray:
if copy is not None:
# Note: branch avoids `copy=None` for NumPy 1.x support
return np.array(self._ndarray, dtype=dtype, copy=copy)
return np.asarray(self._ndarray, dtype=dtype)
result = np.array(self._ndarray, dtype=dtype, copy=copy)
else:
result = np.asarray(self._ndarray, dtype=dtype)

if (
self._readonly
and not copy
and (dtype is None or astype_is_view(self.dtype, dtype))
):
result = result.view()
result.flags.writeable = False

return result

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# Lightly modified version of
Expand Down Expand Up @@ -512,6 +526,9 @@ def to_numpy(
result[mask] = na_value
else:
result = self._ndarray
if not copy and self._readonly:
result = result.view()
result.flags.writeable = False

result = np.asarray(result, dtype=dtype)

Expand Down
6 changes: 5 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,11 @@ def __array__(
# For NumPy 1.x compatibility we cannot use copy=None. And
# `copy=False` has the meaning of `copy=None` here:
if not copy:
return np.asarray(self.asi8, dtype=dtype)
result = np.asarray(self.asi8, dtype=dtype)
if self._readonly:
result = result.view()
result.flags.writeable = False
return result
else:
return np.array(self.asi8, dtype=dtype)

Expand Down
10 changes: 9 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,11 @@ def __array__(
if copy is True:
return np.array(self.sp_values)
else:
return self.sp_values
result = self.sp_values
if self._readonly:
result = result.view()
result.flags.writeable = False
return result

if copy is False:
raise ValueError(
Expand Down Expand Up @@ -591,6 +595,8 @@ def __array__(
return out

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify readonly array")
# I suppose we could allow setting of non-fill_value elements.
# TODO(SparseArray.__setitem__): remove special cases in
# ExtensionBlock.where
Expand Down Expand Up @@ -969,6 +975,8 @@ def __getitem__(
# _NestedSequence[Union[bool, int]]], ...]]"
data_slice = self.to_dense()[key] # type: ignore[index]
elif isinstance(key, slice):
if key == slice(None):
return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this special case needed?

Copy link
Member Author

@jorisvandenbossche jorisvandenbossche Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid that arr[:] makes a copy, and I got there because the default EA.view() implementation uses that.

But can add a comment to clarify. There is a comment just below about "# Avoid densifying when handling contiguous slices", but that does not actually avoid making a copy in its current implementation because it translates the slice in integer indices. While for the special case of a full slice, that should not even be needed.

# Avoid densifying when handling contiguous slices
if key.step is None or key.step == 1:
start = 0 if key.start is None else key.start
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,9 @@ def _maybe_convert_setitem_value(self, value):
return value

def __setitem__(self, key, value) -> None:
if self._readonly:
raise ValueError("Cannot modify readonly array")

value = self._maybe_convert_setitem_value(value)

key = check_array_indexer(self, key)
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
IntervalDtype,
NumpyEADtype,
PeriodDtype,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -283,6 +287,16 @@ def astype_is_view(dtype: DtypeObj, new_dtype: DtypeObj) -> bool:
new_dtype = getattr(new_dtype, "numpy_dtype", new_dtype)
return getattr(dtype, "unit", None) == getattr(new_dtype, "unit", None)

elif new_dtype == object and isinstance(
dtype, (DatetimeTZDtype, PeriodDtype, IntervalDtype)
):
return False

elif isinstance(dtype, CategoricalDtype) and not isinstance(
new_dtype, CategoricalDtype
):
return False

numpy_dtype = getattr(dtype, "numpy_dtype", None)
new_numpy_dtype = getattr(new_dtype, "numpy_dtype", None)

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4972,6 +4972,8 @@ def array(self) -> ExtensionArray:
from pandas.core.arrays.numpy_ import NumpyExtensionArray

array = NumpyExtensionArray(array)
array = array.view()
array._readonly = True
return array

@property
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2359,7 +2359,9 @@ def external_values(values: ArrayLike) -> ArrayLike:
if isinstance(values, np.ndarray):
values = values.view()
values.flags.writeable = False

# TODO(CoW) we should also mark our ExtensionArrays as read-only
else:
# ExtensionArrays
values = values.view()
values._readonly = True

return values
5 changes: 4 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,10 @@ def _references(self) -> BlockValuesRefs:
@Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[prop-decorator]
@property
def array(self) -> ExtensionArray:
return self._mgr.array_values()
arr = self._mgr.array_values()
arr = arr.view()
arr._readonly = True
return arr

def __len__(self) -> int:
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/integer/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_array_setitem_nullable_boolean_mask():

def test_array_setitem():
# GH 31446
arr = pd.Series([1, 2], dtype="Int64").array
arr = pd.array([1, 2], dtype="Int64")
arr[arr > 1] = 1

expected = pd.array([1, 1], dtype="Int64")
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/arrays/numpy_/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,6 @@ def test_to_numpy():
# Setitem


def test_setitem_series():
ser = pd.Series([1, 2, 3])
ser.array[0] = 10
expected = pd.Series([10, 2, 3])
tm.assert_series_equal(ser, expected)


def test_setitem(any_numpy_array):
nparr = any_numpy_array
arr = NumpyExtensionArray(nparr, copy=True)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats):
@pytest.mark.parametrize(
"arr",
[
pd.date_range("2000", periods=4).array,
pd.timedelta_range("2000", periods=4).array,
pd.date_range("2000", periods=4).array.copy(),
pd.timedelta_range("2000", periods=4).array.copy(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

._values?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it seems that my test updates are a bit of a mix of both .array/values.copy() or _values. Will more consistently use _values

],
)
def test_to_numpy_extra(arr):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def test_array(arr, attr, index_or_series):
arr = getattr(arr, attr)
result = getattr(result, attr)

assert result is arr
assert np.shares_memory(result, arr)


def test_array_multiindex_raises():
Expand Down
Loading
Loading