@@ -34,6 +34,9 @@ class ArrowStringArrayMixin:
34
34
def __init__ (self , * args , ** kwargs ) -> None :
35
35
raise NotImplementedError
36
36
37
+ def _from_pyarrow_array (self , pa_array ) -> Self :
38
+ raise NotImplementedError
39
+
37
40
def _convert_bool_result (self , result , na = lib .no_default , method_name = None ):
38
41
# Convert a bool-dtype result to the appropriate result type
39
42
raise NotImplementedError
@@ -50,31 +53,31 @@ def _str_len(self):
50
53
return self ._convert_int_result (result )
51
54
52
55
def _str_lower (self ) -> Self :
53
- return type ( self ) (pc .utf8_lower (self ._pa_array ))
56
+ return self . _from_pyarrow_array (pc .utf8_lower (self ._pa_array ))
54
57
55
58
def _str_upper (self ) -> Self :
56
- return type ( self ) (pc .utf8_upper (self ._pa_array ))
59
+ return self . _from_pyarrow_array (pc .utf8_upper (self ._pa_array ))
57
60
58
61
def _str_strip (self , to_strip = None ) -> Self :
59
62
if to_strip is None :
60
63
result = pc .utf8_trim_whitespace (self ._pa_array )
61
64
else :
62
65
result = pc .utf8_trim (self ._pa_array , characters = to_strip )
63
- return type ( self ) (result )
66
+ return self . _from_pyarrow_array (result )
64
67
65
68
def _str_lstrip (self , to_strip = None ) -> Self :
66
69
if to_strip is None :
67
70
result = pc .utf8_ltrim_whitespace (self ._pa_array )
68
71
else :
69
72
result = pc .utf8_ltrim (self ._pa_array , characters = to_strip )
70
- return type ( self ) (result )
73
+ return self . _from_pyarrow_array (result )
71
74
72
75
def _str_rstrip (self , to_strip = None ) -> Self :
73
76
if to_strip is None :
74
77
result = pc .utf8_rtrim_whitespace (self ._pa_array )
75
78
else :
76
79
result = pc .utf8_rtrim (self ._pa_array , characters = to_strip )
77
- return type ( self ) (result )
80
+ return self . _from_pyarrow_array (result )
78
81
79
82
def _str_pad (
80
83
self ,
@@ -104,7 +107,9 @@ def _str_pad(
104
107
raise ValueError (
105
108
f"Invalid side: { side } . Side must be one of 'left', 'right', 'both'"
106
109
)
107
- return type (self )(pa_pad (self ._pa_array , width = width , padding = fillchar ))
110
+ return self ._from_pyarrow_array (
111
+ pa_pad (self ._pa_array , width = width , padding = fillchar )
112
+ )
108
113
109
114
def _str_get (self , i : int ) -> Self :
110
115
lengths = pc .utf8_length (self ._pa_array )
@@ -124,15 +129,17 @@ def _str_get(self, i: int) -> Self:
124
129
)
125
130
null_value = pa .scalar (None , type = self ._pa_array .type )
126
131
result = pc .if_else (not_out_of_bounds , selected , null_value )
127
- return type ( self ) (result )
132
+ return self . _from_pyarrow_array (result )
128
133
129
134
def _str_slice (
130
135
self , start : int | None = None , stop : int | None = None , step : int | None = None
131
136
) -> Self :
132
137
if pa_version_under13p0 :
133
138
# GH#59724
134
139
result = self ._apply_elementwise (lambda val : val [start :stop :step ])
135
- return type (self )(pa .chunked_array (result , type = self ._pa_array .type ))
140
+ return self ._from_pyarrow_array (
141
+ pa .chunked_array (result , type = self ._pa_array .type )
142
+ )
136
143
if start is None :
137
144
if step is not None and step < 0 :
138
145
# GH#59710
@@ -141,7 +148,7 @@ def _str_slice(
141
148
start = 0
142
149
if step is None :
143
150
step = 1
144
- return type ( self ) (
151
+ return self . _from_pyarrow_array (
145
152
pc .utf8_slice_codeunits (self ._pa_array , start = start , stop = stop , step = step )
146
153
)
147
154
@@ -154,7 +161,9 @@ def _str_slice_replace(
154
161
start = 0
155
162
if stop is None :
156
163
stop = np .iinfo (np .int64 ).max
157
- return type (self )(pc .utf8_replace_slice (self ._pa_array , start , stop , repl ))
164
+ return self ._from_pyarrow_array (
165
+ pc .utf8_replace_slice (self ._pa_array , start , stop , repl )
166
+ )
158
167
159
168
def _str_replace (
160
169
self ,
@@ -181,32 +190,32 @@ def _str_replace(
181
190
replacement = repl ,
182
191
max_replacements = pa_max_replacements ,
183
192
)
184
- return type ( self ) (result )
193
+ return self . _from_pyarrow_array (result )
185
194
186
195
def _str_capitalize (self ) -> Self :
187
- return type ( self ) (pc .utf8_capitalize (self ._pa_array ))
196
+ return self . _from_pyarrow_array (pc .utf8_capitalize (self ._pa_array ))
188
197
189
198
def _str_title (self ) -> Self :
190
- return type ( self ) (pc .utf8_title (self ._pa_array ))
199
+ return self . _from_pyarrow_array (pc .utf8_title (self ._pa_array ))
191
200
192
201
def _str_swapcase (self ) -> Self :
193
- return type ( self ) (pc .utf8_swapcase (self ._pa_array ))
202
+ return self . _from_pyarrow_array (pc .utf8_swapcase (self ._pa_array ))
194
203
195
204
def _str_removeprefix (self , prefix : str ):
196
205
if not pa_version_under13p0 :
197
206
starts_with = pc .starts_with (self ._pa_array , pattern = prefix )
198
207
removed = pc .utf8_slice_codeunits (self ._pa_array , len (prefix ))
199
208
result = pc .if_else (starts_with , removed , self ._pa_array )
200
- return type ( self ) (result )
209
+ return self . _from_pyarrow_array (result )
201
210
predicate = lambda val : val .removeprefix (prefix )
202
211
result = self ._apply_elementwise (predicate )
203
- return type ( self ) (pa .chunked_array (result ))
212
+ return self . _from_pyarrow_array (pa .chunked_array (result ))
204
213
205
214
def _str_removesuffix (self , suffix : str ):
206
215
ends_with = pc .ends_with (self ._pa_array , pattern = suffix )
207
216
removed = pc .utf8_slice_codeunits (self ._pa_array , 0 , stop = - len (suffix ))
208
217
result = pc .if_else (ends_with , removed , self ._pa_array )
209
- return type ( self ) (result )
218
+ return self . _from_pyarrow_array (result )
210
219
211
220
def _str_startswith (
212
221
self , pat : str | tuple [str , ...], na : Scalar | lib .NoDefault = lib .no_default
0 commit comments