Skip to content

Commit f84ba18

Browse files
authored
REF: get rid of ArrowStringArrayNumpySemantics (#62165)
1 parent 551a8e8 commit f84ba18

File tree

12 files changed

+219
-177
lines changed

12 files changed

+219
-177
lines changed

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ class ArrowStringArrayMixin:
3434
def __init__(self, *args, **kwargs) -> None:
3535
raise NotImplementedError
3636

37+
def _from_pyarrow_array(self, pa_array) -> Self:
38+
raise NotImplementedError
39+
3740
def _convert_bool_result(self, result, na=lib.no_default, method_name=None):
3841
# Convert a bool-dtype result to the appropriate result type
3942
raise NotImplementedError
@@ -50,31 +53,31 @@ def _str_len(self):
5053
return self._convert_int_result(result)
5154

5255
def _str_lower(self) -> Self:
53-
return type(self)(pc.utf8_lower(self._pa_array))
56+
return self._from_pyarrow_array(pc.utf8_lower(self._pa_array))
5457

5558
def _str_upper(self) -> Self:
56-
return type(self)(pc.utf8_upper(self._pa_array))
59+
return self._from_pyarrow_array(pc.utf8_upper(self._pa_array))
5760

5861
def _str_strip(self, to_strip=None) -> Self:
5962
if to_strip is None:
6063
result = pc.utf8_trim_whitespace(self._pa_array)
6164
else:
6265
result = pc.utf8_trim(self._pa_array, characters=to_strip)
63-
return type(self)(result)
66+
return self._from_pyarrow_array(result)
6467

6568
def _str_lstrip(self, to_strip=None) -> Self:
6669
if to_strip is None:
6770
result = pc.utf8_ltrim_whitespace(self._pa_array)
6871
else:
6972
result = pc.utf8_ltrim(self._pa_array, characters=to_strip)
70-
return type(self)(result)
73+
return self._from_pyarrow_array(result)
7174

7275
def _str_rstrip(self, to_strip=None) -> Self:
7376
if to_strip is None:
7477
result = pc.utf8_rtrim_whitespace(self._pa_array)
7578
else:
7679
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
77-
return type(self)(result)
80+
return self._from_pyarrow_array(result)
7881

7982
def _str_pad(
8083
self,
@@ -104,7 +107,9 @@ def _str_pad(
104107
raise ValueError(
105108
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
106109
)
107-
return type(self)(pa_pad(self._pa_array, width=width, padding=fillchar))
110+
return self._from_pyarrow_array(
111+
pa_pad(self._pa_array, width=width, padding=fillchar)
112+
)
108113

109114
def _str_get(self, i: int) -> Self:
110115
lengths = pc.utf8_length(self._pa_array)
@@ -124,15 +129,17 @@ def _str_get(self, i: int) -> Self:
124129
)
125130
null_value = pa.scalar(None, type=self._pa_array.type)
126131
result = pc.if_else(not_out_of_bounds, selected, null_value)
127-
return type(self)(result)
132+
return self._from_pyarrow_array(result)
128133

129134
def _str_slice(
130135
self, start: int | None = None, stop: int | None = None, step: int | None = None
131136
) -> Self:
132137
if pa_version_under13p0:
133138
# GH#59724
134139
result = self._apply_elementwise(lambda val: val[start:stop:step])
135-
return type(self)(pa.chunked_array(result, type=self._pa_array.type))
140+
return self._from_pyarrow_array(
141+
pa.chunked_array(result, type=self._pa_array.type)
142+
)
136143
if start is None:
137144
if step is not None and step < 0:
138145
# GH#59710
@@ -141,7 +148,7 @@ def _str_slice(
141148
start = 0
142149
if step is None:
143150
step = 1
144-
return type(self)(
151+
return self._from_pyarrow_array(
145152
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
146153
)
147154

@@ -154,7 +161,9 @@ def _str_slice_replace(
154161
start = 0
155162
if stop is None:
156163
stop = np.iinfo(np.int64).max
157-
return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
164+
return self._from_pyarrow_array(
165+
pc.utf8_replace_slice(self._pa_array, start, stop, repl)
166+
)
158167

159168
def _str_replace(
160169
self,
@@ -181,32 +190,32 @@ def _str_replace(
181190
replacement=repl,
182191
max_replacements=pa_max_replacements,
183192
)
184-
return type(self)(result)
193+
return self._from_pyarrow_array(result)
185194

186195
def _str_capitalize(self) -> Self:
187-
return type(self)(pc.utf8_capitalize(self._pa_array))
196+
return self._from_pyarrow_array(pc.utf8_capitalize(self._pa_array))
188197

189198
def _str_title(self) -> Self:
190-
return type(self)(pc.utf8_title(self._pa_array))
199+
return self._from_pyarrow_array(pc.utf8_title(self._pa_array))
191200

192201
def _str_swapcase(self) -> Self:
193-
return type(self)(pc.utf8_swapcase(self._pa_array))
202+
return self._from_pyarrow_array(pc.utf8_swapcase(self._pa_array))
194203

195204
def _str_removeprefix(self, prefix: str):
196205
if not pa_version_under13p0:
197206
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
198207
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
199208
result = pc.if_else(starts_with, removed, self._pa_array)
200-
return type(self)(result)
209+
return self._from_pyarrow_array(result)
201210
predicate = lambda val: val.removeprefix(prefix)
202211
result = self._apply_elementwise(predicate)
203-
return type(self)(pa.chunked_array(result))
212+
return self._from_pyarrow_array(pa.chunked_array(result))
204213

205214
def _str_removesuffix(self, suffix: str):
206215
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
207216
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
208217
result = pc.if_else(ends_with, removed, self._pa_array)
209-
return type(self)(result)
218+
return self._from_pyarrow_array(result)
210219

211220
def _str_startswith(
212221
self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default

0 commit comments

Comments
 (0)