Skip to content

Commit 36b8f20

Browse files
authored
BUG/DEPR: logical operation with bool and string (#61995)
1 parent b004478 commit 36b8f20

File tree

4 files changed

+67
-1
lines changed

4 files changed

+67
-1
lines changed

doc/source/whatsnew/v2.3.2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Bug fixes
2525
- Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
2626
"string" type in the JSON Table Schema for :class:`StringDtype` columns
2727
(:issue:`61889`)
28-
28+
- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`)
2929

3030
.. ---------------------------------------------------------------------------
3131
.. _whatsnew_232.contributors:

pandas/core/arrays/arrow/array.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
overload,
1313
)
1414
import unicodedata
15+
import warnings
1516

1617
import numpy as np
1718

@@ -27,6 +28,7 @@
2728
pa_version_under13p0,
2829
)
2930
from pandas.util._decorators import doc
31+
from pandas.util._exceptions import find_stack_level
3032

3133
from pandas.core.dtypes.cast import (
3234
can_hold_element,
@@ -852,6 +854,25 @@ def _logical_method(self, other, op) -> Self:
852854
# integer types. Otherwise these are boolean ops.
853855
if pa.types.is_integer(self._pa_array.type):
854856
return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS)
857+
elif (
858+
(
859+
pa.types.is_string(self._pa_array.type)
860+
or pa.types.is_large_string(self._pa_array.type)
861+
)
862+
and op in (roperator.ror_, roperator.rand_, roperator.rxor)
863+
and isinstance(other, np.ndarray)
864+
and other.dtype == bool
865+
):
866+
# GH#60234 backward compatibility for the move to StringDtype in 3.0
867+
op_name = op.__name__[1:].strip("_")
868+
warnings.warn(
869+
f"'{op_name}' operations between boolean dtype and {self.dtype} are "
870+
"deprecated and will raise in a future version. Explicitly "
871+
"cast the strings to a boolean dtype before operating instead.",
872+
FutureWarning,
873+
stacklevel=find_stack_level(),
874+
)
875+
return op(other, self.astype(bool))
855876
else:
856877
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
857878

pandas/core/arrays/string_.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
missing,
5353
nanops,
5454
ops,
55+
roperator,
5556
)
5657
from pandas.core.algorithms import isin
5758
from pandas.core.array_algos import masked_reductions
@@ -390,6 +391,26 @@ class BaseStringArray(ExtensionArray):
390391

391392
dtype: StringDtype
392393

394+
# TODO(4.0): Once the deprecation here is enforced, this method can be
395+
# removed and we use the parent class method instead.
396+
def _logical_method(self, other, op):
397+
if (
398+
op in (roperator.ror_, roperator.rand_, roperator.rxor)
399+
and isinstance(other, np.ndarray)
400+
and other.dtype == bool
401+
):
402+
# GH#60234 backward compatibility for the move to StringDtype in 3.0
403+
op_name = op.__name__[1:].strip("_")
404+
warnings.warn(
405+
f"'{op_name}' operations between boolean dtype and {self.dtype} are "
406+
"deprecated and will raise in a future version. Explicitly "
407+
"cast the strings to a boolean dtype before operating instead.",
408+
FutureWarning,
409+
stacklevel=find_stack_level(),
410+
)
411+
return op(other, self.astype(bool))
412+
return NotImplemented
413+
393414
@doc(ExtensionArray.tolist)
394415
def tolist(self) -> list:
395416
if self.ndim > 1:

pandas/tests/strings/test_strings.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,3 +787,27 @@ def test_decode_with_dtype_none():
787787
result = ser.str.decode("utf-8", dtype=None)
788788
expected = Series(["a", "b", "c"], dtype="str")
789789
tm.assert_series_equal(result, expected)
790+
791+
792+
def test_reversed_logical_ops(any_string_dtype):
793+
# GH#60234
794+
dtype = any_string_dtype
795+
warn = None if dtype == object else FutureWarning
796+
left = Series([True, False, False, True])
797+
right = Series(["", "", "b", "c"], dtype=dtype)
798+
799+
msg = "operations between boolean dtype and"
800+
with tm.assert_produces_warning(warn, match=msg):
801+
result = left | right
802+
expected = left | right.astype(bool)
803+
tm.assert_series_equal(result, expected)
804+
805+
with tm.assert_produces_warning(warn, match=msg):
806+
result = left & right
807+
expected = left & right.astype(bool)
808+
tm.assert_series_equal(result, expected)
809+
810+
with tm.assert_produces_warning(warn, match=msg):
811+
result = left ^ right
812+
expected = left ^ right.astype(bool)
813+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)