Skip to content

Commit 94d9d2e

Browse files
authored
DOC: Update documentation for using natural sort with sort_values (#61979)
1 parent e4a03b6 commit 94d9d2e

File tree

2 files changed

+56
-37
lines changed

2 files changed

+56
-37
lines changed

pandas/core/frame.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,35 +7173,43 @@ def sort_values(
71737173
`natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
71747174
This can be done using
71757175
``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
7176-
which provides sorted indices according
7177-
to their natural order, as shown below:
7176+
which provides a function to generate a key
7177+
to sort data in their natural order:
71787178
71797179
>>> df = pd.DataFrame(
71807180
... {
7181-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
7182-
... "value": [10, 20, 30, 40, 50],
7181+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
7182+
... "mins": [
7183+
... "10mins",
7184+
... "40mins",
7185+
... "40mins",
7186+
... "40mins",
7187+
... "10mins",
7188+
... "10mins",
7189+
... ],
7190+
... "value": [10, 20, 30, 40, 50, 60],
71837191
... }
71847192
... )
71857193
>>> df
7186-
time value
7187-
0 0hr 10
7188-
1 128hr 20
7189-
2 72hr 30
7190-
3 48hr 40
7191-
4 96hr 50
7192-
>>> from natsort import index_natsorted
7193-
>>> index_natsorted(df["time"])
7194-
[0, 3, 2, 4, 1]
7194+
hours mins value
7195+
0 0hr 10mins 10
7196+
1 128hr 40mins 20
7197+
2 0hr 40mins 30
7198+
3 64hr 40mins 40
7199+
4 64hr 10mins 50
7200+
5 128hr 10mins 60
7201+
>>> from natsort import natsort_keygen
71957202
>>> df.sort_values(
7196-
... by="time",
7197-
... key=lambda x: np.argsort(index_natsorted(x)),
7203+
... by=["hours", "mins"],
7204+
... key=natsort_keygen(),
71987205
... )
7199-
time value
7200-
0 0hr 10
7201-
3 48hr 40
7202-
2 72hr 30
7203-
4 96hr 50
7204-
1 128hr 20
7206+
hours mins value
7207+
0 0hr 10mins 10
7208+
2 0hr 40mins 30
7209+
4 64hr 10mins 50
7210+
3 64hr 40mins 40
7211+
5 128hr 10mins 60
7212+
1 128hr 40mins 20
72057213
"""
72067214
inplace = validate_bool_kwarg(inplace, "inplace")
72077215
axis = self._get_axis_number(axis)

pandas/core/generic.py

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5004,27 +5004,38 @@ def sort_values(
50045004
50055005
>>> df = pd.DataFrame(
50065006
... {
5007-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
5008-
... "value": [10, 20, 30, 40, 50],
5007+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
5008+
... "mins": [
5009+
... "10mins",
5010+
... "40mins",
5011+
... "40mins",
5012+
... "40mins",
5013+
... "10mins",
5014+
... "10mins",
5015+
... ],
5016+
... "value": [10, 20, 30, 40, 50, 60],
50095017
... }
50105018
... )
50115019
>>> df
5012-
time value
5013-
0 0hr 10
5014-
1 128hr 20
5015-
2 72hr 30
5016-
3 48hr 40
5017-
4 96hr 50
5018-
>>> from natsort import index_natsorted
5020+
hours mins value
5021+
0 0hr 10mins 10
5022+
1 128hr 40mins 20
5023+
2 0hr 40mins 30
5024+
3 64hr 40mins 40
5025+
4 64hr 10mins 50
5026+
5 128hr 10mins 60
5027+
>>> from natsort import natsort_keygen
50195028
>>> df.sort_values(
5020-
... by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
5029+
... by=["hours", "mins"],
5030+
... key=natsort_keygen(),
50215031
... )
5022-
time value
5023-
0 0hr 10
5024-
3 48hr 40
5025-
2 72hr 30
5026-
4 96hr 50
5027-
1 128hr 20
5032+
hours mins value
5033+
0 0hr 10mins 10
5034+
2 0hr 40mins 30
5035+
4 64hr 10mins 50
5036+
3 64hr 40mins 40
5037+
5 128hr 10mins 60
5038+
1 128hr 40mins 20
50285039
"""
50295040
raise AbstractMethodError(self)
50305041

0 commit comments

Comments
 (0)