-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarray in .array/EA.to_numpy() #61925
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarray in .array/EA.to_numpy() #61925
Changes from all commits
a9df51b
9cd6e4f
c6f37d1
8058d9a
91465ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ | |
validate_insert_loc, | ||
) | ||
|
||
from pandas.core.dtypes.astype import astype_is_view | ||
from pandas.core.dtypes.cast import maybe_cast_pointwise_result | ||
from pandas.core.dtypes.common import ( | ||
is_list_like, | ||
|
@@ -269,6 +270,8 @@ class ExtensionArray: | |
# strictly less than 2000 to be below Index.__pandas_priority__. | ||
__pandas_priority__ = 1000 | ||
|
||
_readonly = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not use arr.flags.writeable to be consistent with numpy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because this was easier for a quick POC ;) |
||
|
||
# ------------------------------------------------------------------------ | ||
# Constructors | ||
# ------------------------------------------------------------------------ | ||
|
@@ -482,6 +485,11 @@ def __setitem__(self, key, value) -> None: | |
Returns | ||
------- | ||
None | ||
|
||
Raises | ||
------ | ||
ValueError | ||
If the array is readonly and modification is attempted. | ||
""" | ||
# Some notes to the ExtensionArray implementer who may have ended up | ||
# here. While this method is not required for the interface, if you | ||
|
@@ -501,6 +509,10 @@ def __setitem__(self, key, value) -> None: | |
# __init__ method coerces that value, then so should __setitem__ | ||
# Note, also, that Series/DataFrame.where internally use __setitem__ | ||
# on a copy of the data. | ||
# Check if the array is readonly | ||
if self._readonly: | ||
raise ValueError("Cannot modify readonly array") | ||
|
||
raise NotImplementedError(f"{type(self)} does not implement __setitem__.") | ||
|
||
def __len__(self) -> int: | ||
|
@@ -595,8 +607,14 @@ def to_numpy( | |
result = np.asarray(self, dtype=dtype) | ||
if copy or na_value is not lib.no_default: | ||
result = result.copy() | ||
elif self._readonly and astype_is_view(self.dtype, result.dtype): | ||
# If the ExtensionArray is readonly, make the numpy array readonly too | ||
result = result.view() | ||
result.flags.writeable = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be done below the setting of na_value on L616? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so, because in that case the |
||
|
||
if na_value is not lib.no_default: | ||
result[self.isna()] = na_value # type: ignore[index] | ||
|
||
return result | ||
|
||
# ------------------------------------------------------------------------ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -562,7 +562,11 @@ def __array__( | |
if copy is True: | ||
return np.array(self.sp_values) | ||
else: | ||
return self.sp_values | ||
result = self.sp_values | ||
if self._readonly: | ||
result = result.view() | ||
result.flags.writeable = False | ||
return result | ||
|
||
if copy is False: | ||
raise ValueError( | ||
|
@@ -591,6 +595,8 @@ def __array__( | |
return out | ||
|
||
def __setitem__(self, key, value) -> None: | ||
if self._readonly: | ||
raise ValueError("Cannot modify readonly array") | ||
# I suppose we could allow setting of non-fill_value elements. | ||
# TODO(SparseArray.__setitem__): remove special cases in | ||
# ExtensionBlock.where | ||
|
@@ -969,6 +975,8 @@ def __getitem__( | |
# _NestedSequence[Union[bool, int]]], ...]]" | ||
data_slice = self.to_dense()[key] # type: ignore[index] | ||
elif isinstance(key, slice): | ||
if key == slice(None): | ||
return type(self)._simple_new(self.sp_values, self.sp_index, self.dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this special case needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid that But can add a comment to clarify. There is a comment just below about "# Avoid densifying when handling contiguous slices", but that does not actually avoid making a copy in its current implementation because it translates the slice in integer indices. While for the special case of a full slice, that should not even be needed. |
||
# Avoid densifying when handling contiguous slices | ||
if key.step is None or key.step == 1: | ||
start = 0 if key.start is None else key.start | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1248,8 +1248,8 @@ def test_invalid_nat_setitem_array(arr, non_casting_nats): | |
@pytest.mark.parametrize( | ||
"arr", | ||
[ | ||
pd.date_range("2000", periods=4).array, | ||
pd.timedelta_range("2000", periods=4).array, | ||
pd.date_range("2000", periods=4).array.copy(), | ||
pd.timedelta_range("2000", periods=4).array.copy(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ._values? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it seems that my test updates are a bit of a mix of both |
||
], | ||
) | ||
def test_to_numpy_extra(arr): | ||
|
Uh oh!
There was an error while loading. Please reload this page.