diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index d1cf1e7504ece..14dadd9b41772 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -184,7 +184,7 @@ class StringMethods(NoNewAttributesMixin): >>> s = pd.Series(["A_Str_Series"]) >>> s 0 A_Str_Series - dtype: object + dtype: str >>> s.str.split("_") 0 [A, Str, Series] @@ -192,7 +192,7 @@ class StringMethods(NoNewAttributesMixin): >>> s.str.replace("_", "") 0 AStrSeries - dtype: object + dtype: str """ # Note: see the docstring in pandas.core.strings.__init__ @@ -560,7 +560,7 @@ def cat( 1 b,B 2 NaN 3 d,D - dtype: object + dtype: str Missing values will remain missing in the result, but can again be represented using `na_rep` @@ -570,7 +570,7 @@ def cat( 1 b,B 2 -,C 3 d,D - dtype: object + dtype: str If `sep` is not specified, the values are concatenated without separation. @@ -580,7 +580,7 @@ def cat( 1 bB 2 -C 3 dD - dtype: object + dtype: str Series with different indexes can be aligned before concatenation. The `join`-keyword works as in other methods. @@ -591,7 +591,7 @@ def cat( 1 b- 2 -c 3 dd - dtype: object + dtype: str >>> >>> s.str.cat(t, join="outer", na_rep="-") 0 aa @@ -599,20 +599,20 @@ def cat( 2 -c 3 dd 4 -e - dtype: object + dtype: str >>> >>> s.str.cat(t, join="inner", na_rep="-") 0 aa 2 -c 3 dd - dtype: object + dtype: str >>> >>> s.str.cat(t, join="right", na_rep="-") 3 dd 0 aa 4 -e 2 -c - dtype: object + dtype: str For more examples, see :ref:`here `. """ @@ -770,7 +770,7 @@ def cat( 0 this is a regular sentence 1 https://docs.python.org/3/tutorial/index.html 2 NaN - dtype: object + dtype: str In the default setting, the string is split by whitespace. @@ -817,17 +817,17 @@ def cat( the columns during the split. >>> s.str.split(expand=True) - 0 1 2 3 4 - 0 this is a regular sentence - 1 https://docs.python.org/3/tutorial/index.html None None None None - 2 NaN NaN NaN NaN NaN + 0 1 2 3 4 + 0 this is a regular sentence + 1 https://docs.python.org/3/tutorial/index.html NaN NaN NaN NaN + 2 NaN NaN NaN NaN NaN For slightly more complex use cases like splitting the html document name from a url, a combination of parameter settings can be used. >>> s.str.rsplit("/", n=1, expand=True) 0 1 - 0 this is a regular sentence None + 0 this is a regular sentence NaN 1 https://docs.python.org/3/tutorial index.html 2 NaN NaN %(regex_examples)s""" @@ -982,7 +982,7 @@ def rsplit(self, pat=None, *, n=-1, expand: bool = False): >>> s 0 Linda van der Berg 1 George Pitt-Rivers - dtype: object + dtype: str >>> s.str.partition() 0 1 2 @@ -1014,7 +1014,7 @@ def rsplit(self, pat=None, *, n=-1, expand: bool = False): >>> idx = pd.Index(['X 123', 'Y 999']) >>> idx - Index(['X 123', 'Y 999'], dtype='object') + Index(['X 123', 'Y 999'], dtype='str') Which will create a MultiIndex: @@ -1274,14 +1274,14 @@ def contains( 1 True 2 False 3 False - 4 NaN - dtype: object + 4 False + dtype: bool Returning an Index of booleans using only a literal pattern. >>> ind = pd.Index(["Mouse", "dog", "house and parrot", "23.0", np.nan]) >>> ind.str.contains("23", regex=False) - Index([False, False, False, True, nan], dtype='object') + array([False, False, False, True, False]) Specifying case sensitivity using `case`. @@ -1290,8 +1290,8 @@ def contains( 1 False 2 False 3 False - 4 NaN - dtype: object + 4 False + dtype: bool Specifying `na` to be `False` instead of `NaN` replaces NaN values with `False`. If Series or Index does not contain NaN values @@ -1312,8 +1312,8 @@ def contains( 1 True 2 True 3 False - 4 NaN - dtype: object + 4 False + dtype: bool Ignoring case sensitivity using `flags` with regex. @@ -1323,8 +1323,8 @@ def contains( 1 False 2 True 3 False - 4 NaN - dtype: object + 4 False + dtype: bool Returning any digit using regular expression. @@ -1333,8 +1333,8 @@ def contains( 1 False 2 False 3 True - 4 NaN - dtype: object + 4 False + dtype: bool Ensure `pat` is a not a literal pattern when `regex` is set to True. Note in the following example one might expect only `s2[1]` and `s2[3]` to @@ -1544,7 +1544,7 @@ def replace( 0 a 1 b 2 NaN - dtype: object + dtype: str When `pat` is a string and `regex` is True, the given `pat` is compiled as a regex. When `repl` is a string, it replaces matching @@ -1555,7 +1555,7 @@ def replace( 0 bao 1 baz 2 NaN - dtype: object + dtype: str When `pat` is a string and `regex` is False, every `pat` is replaced with `repl` as with :meth:`str.replace`: @@ -1564,7 +1564,7 @@ def replace( 0 bao 1 fuz 2 NaN - dtype: object + dtype: str When `repl` is a callable, it is called on every `pat` using :func:`re.sub`. The callable should expect one positional argument @@ -1576,7 +1576,7 @@ def replace( 0 oo 1 uz 2 NaN - dtype: object + dtype: str Reverse every lowercase alphabetic word: @@ -1586,7 +1586,7 @@ def replace( 0 oof 123 1 rab zab 2 NaN - dtype: object + dtype: str Using regex groups (extract second group and swap case): @@ -1596,7 +1596,7 @@ def replace( >>> ser.str.replace(pat, repl, regex=True) 0 tWO 1 bAR - dtype: object + dtype: str Using a compiled regex with flags @@ -1606,7 +1606,7 @@ def replace( 0 foo 1 bar 2 NaN - dtype: object + dtype: str """ if isinstance(pat, dict) and repl is not None: raise ValueError("repl cannot be used when pat is a dictionary") @@ -1685,7 +1685,7 @@ def repeat(self, repeats): 0 a 1 b 2 c - dtype: object + dtype: str Single int repeats string in Series @@ -1693,7 +1693,7 @@ def repeat(self, repeats): 0 aa 1 bb 2 cc - dtype: object + dtype: str Sequence of int repeats corresponding string in Series @@ -1701,7 +1701,7 @@ def repeat(self, repeats): 0 a 1 bb 2 ccc - dtype: object + dtype: str """ result = self._data.array._str_repeat(repeats) return self._wrap_result(result) @@ -1754,22 +1754,22 @@ def pad( >>> s 0 caribou 1 tiger - dtype: object + dtype: str >>> s.str.pad(width=10) 0 caribou 1 tiger - dtype: object + dtype: str >>> s.str.pad(width=10, side="right", fillchar="-") 0 caribou--- 1 tiger----- - dtype: object + dtype: str >>> s.str.pad(width=10, side="both", fillchar="-") 0 -caribou-- 1 --tiger--- - dtype: object + dtype: str """ if not isinstance(fillchar, str): msg = f"fillchar must be a character, not {type(fillchar).__name__}" @@ -1823,7 +1823,7 @@ def pad( 0 ..dog... 1 ..bird.. 2 .mouse.. - dtype: object + dtype: str For Series.str.ljust: @@ -1832,7 +1832,7 @@ def pad( 0 dog..... 1 bird.... 2 mouse... - dtype: object + dtype: str For Series.str.rjust: @@ -1841,7 +1841,7 @@ def pad( 0 .....dog 1 ....bird 2 ...mouse - dtype: object + dtype: str """ @Appender(_shared_docs["str_pad"] % {"side": "left and right", "method": "center"}) @@ -1965,37 +1965,37 @@ def slice(self, start=None, stop=None, step=None): 0 koala 1 dog 2 chameleon - dtype: object + dtype: str >>> s.str.slice(start=1) 0 oala 1 og 2 hameleon - dtype: object + dtype: str >>> s.str.slice(start=-1) 0 a 1 g 2 n - dtype: object + dtype: str >>> s.str.slice(stop=2) 0 ko 1 do 2 ch - dtype: object + dtype: str >>> s.str.slice(step=2) 0 kaa 1 dg 2 caeen - dtype: object + dtype: str >>> s.str.slice(start=0, stop=5, step=3) 0 kl 1 d 2 cm - dtype: object + dtype: str Equivalent behaviour to: @@ -2003,7 +2003,7 @@ def slice(self, start=None, stop=None, step=None): 0 kl 1 d 2 cm - dtype: object + dtype: str """ result = self._data.array._str_slice(start, stop, step) return self._wrap_result(result) @@ -2050,7 +2050,7 @@ def slice_replace(self, start=None, stop=None, repl=None): 2 abc 3 abdc 4 abcde - dtype: object + dtype: str Specify just `start`, meaning replace `start` until the end of the string with `repl`. @@ -2061,7 +2061,7 @@ def slice_replace(self, start=None, stop=None, repl=None): 2 aX 3 aX 4 aX - dtype: object + dtype: str Specify just `stop`, meaning the start of the string to `stop` is replaced with `repl`, and the rest of the string is included. @@ -2072,7 +2072,7 @@ def slice_replace(self, start=None, stop=None, repl=None): 2 Xc 3 Xdc 4 Xcde - dtype: object + dtype: str Specify `start` and `stop`, meaning the slice from `start` to `stop` is replaced with `repl`. Everything before or after `start` and `stop` is @@ -2084,7 +2084,7 @@ def slice_replace(self, start=None, stop=None, repl=None): 2 aX 3 aXc 4 aXde - dtype: object + dtype: str """ result = self._data.array._str_slice_replace(start, stop, repl) return self._wrap_result(result) @@ -2130,7 +2130,7 @@ def decode( 0 cow 1 123 2 () - dtype: object + dtype: str """ if dtype is not None and not is_string_dtype(dtype): raise ValueError(f"dtype must be string or object, got {dtype=}") @@ -2315,24 +2315,24 @@ def rstrip(self, to_strip=None): 0 str_foo 1 str_bar 2 no_prefix - dtype: object + dtype: str >>> s.str.removeprefix("str_") 0 foo 1 bar 2 no_prefix - dtype: object + dtype: str >>> s = pd.Series(["foo_str", "bar_str", "no_suffix"]) >>> s 0 foo_str 1 bar_str 2 no_suffix - dtype: object + dtype: str >>> s.str.removesuffix("_str") 0 foo 1 bar 2 no_suffix - dtype: object + dtype: str """ @Appender( @@ -2458,7 +2458,7 @@ def wrap( >>> s.str.wrap(12) 0 line to be\nwrapped 1 another line\nto be\nwrapped - dtype: object + dtype: str """ result = self._data.array._str_wrap( width=width, @@ -2583,7 +2583,7 @@ def translate(self, table): >>> ser.str.translate(mytable) 0 El nino 1 Francoise - dtype: object + dtype: str """ result = self._data.array._str_translate(table) dtype = object if self._data.dtype == "object" else None @@ -2695,21 +2695,21 @@ def startswith( 1 Bear 2 cat 3 NaN - dtype: object + dtype: str >>> s.str.startswith("b") 0 True 1 False 2 False - 3 NaN - dtype: object + 3 False + dtype: bool >>> s.str.startswith(("b", "B")) 0 True 1 True 2 False - 3 NaN - dtype: object + 3 False + dtype: bool Specifying `na` to be `False` instead of `NaN`. @@ -2766,21 +2766,21 @@ def endswith( 1 bear 2 caT 3 NaN - dtype: object + dtype: str >>> s.str.endswith("t") 0 True 1 False 2 False - 3 NaN - dtype: object + 3 False + dtype: bool >>> s.str.endswith(("t", "T")) 0 True 1 False 2 True - 3 NaN - dtype: object + 3 False + dtype: bool Specifying `na` to be `False` instead of `NaN`. @@ -2961,7 +2961,7 @@ def extract( if expand=True. >>> s.str.extract(r"[ab](\d)", expand=True) - 0 + 0 0 1 1 2 2 NaN @@ -2972,7 +2972,7 @@ def extract( 0 1 1 2 2 NaN - dtype: object + dtype: str """ from pandas import DataFrame @@ -3379,42 +3379,42 @@ def len(self): 1 CAPITALS 2 this is a sentence 3 SwApCaSe - dtype: object + dtype: str >>> s.str.lower() 0 lower 1 capitals 2 this is a sentence 3 swapcase - dtype: object + dtype: str >>> s.str.upper() 0 LOWER 1 CAPITALS 2 THIS IS A SENTENCE 3 SWAPCASE - dtype: object + dtype: str >>> s.str.title() 0 Lower 1 Capitals 2 This Is A Sentence 3 Swapcase - dtype: object + dtype: str >>> s.str.capitalize() 0 Lower 1 Capitals 2 This is a sentence 3 Swapcase - dtype: object + dtype: str >>> s.str.swapcase() 0 LOWER 1 capitals 2 THIS IS A SENTENCE 3 sWaPcAsE - dtype: object + dtype: str """ # Types: # cases: @@ -3640,7 +3640,7 @@ def casefold(self): >>> s3 = pd.Series(['23', '³', '⅕', '']) >>> s3.str.isdigit() 0 True - 1 True + 1 False 2 False 3 False dtype: bool