Skip to content

Commit 5f545f3

Browse files
committed
Update documentation for sort_values and natural sorting
1 parent e4a03b6 commit 5f545f3

File tree

2 files changed

+78
-37
lines changed

2 files changed

+78
-37
lines changed

pandas/core/frame.py

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7173,35 +7173,54 @@ def sort_values(
71737173
`natural sorting <https://en.wikipedia.org/wiki/Natural_sort_order>`__.
71747174
This can be done using
71757175
``natsort`` `package <https://github.com/SethMMorton/natsort>`__,
7176-
which provides sorted indices according
7177-
to their natural order, as shown below:
7176+
which provides a function to generate a key
7177+
to sort data in their natural order:
71787178
71797179
>>> df = pd.DataFrame(
71807180
... {
7181-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
7182-
... "value": [10, 20, 30, 40, 50],
7181+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
7182+
... "mins": [
7183+
... "10mins",
7184+
... "40mins",
7185+
... "40mins",
7186+
... "40mins",
7187+
... "10mins",
7188+
... "10mins",
7189+
... ],
7190+
... "value": [10, 20, 30, 40, 50, 60],
71837191
... }
71847192
... )
71857193
>>> df
7186-
time value
7187-
0 0hr 10
7188-
1 128hr 20
7189-
2 72hr 30
7190-
3 48hr 40
7191-
4 96hr 50
7192-
>>> from natsort import index_natsorted
7193-
>>> index_natsorted(df["time"])
7194-
[0, 3, 2, 4, 1]
7194+
hours mins value
7195+
0 0hr 10mins 10
7196+
1 128hr 40mins 20
7197+
2 0hr 40mins 30
7198+
3 64hr 40mins 40
7199+
4 64hr 10mins 50
7200+
5 128hr 10mins 60
7201+
>>> from natsort import natsort_keygen
7202+
>>> natsort_keygen()(df["hours"])
7203+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
7204+
>>> natsort_keygen()(df["mins"])
7205+
(
7206+
('', 10, 'mins'),
7207+
('', 40, 'mins'),
7208+
('', 40, 'mins'),
7209+
('', 40, 'mins'),
7210+
('', 10, 'mins'),
7211+
('', 10, 'mins'),
7212+
)
71957213
>>> df.sort_values(
7196-
... by="time",
7197-
... key=lambda x: np.argsort(index_natsorted(x)),
7214+
... by=["hours", "mins"],
7215+
... key=natsort_keygen(),
71987216
... )
7199-
time value
7200-
0 0hr 10
7201-
3 48hr 40
7202-
2 72hr 30
7203-
4 96hr 50
7204-
1 128hr 20
7217+
hours mins value
7218+
0 0hr 10mins 10
7219+
2 0hr 40mins 30
7220+
4 64hr 10mins 50
7221+
3 64hr 40mins 40
7222+
5 128hr 10mins 60
7223+
1 128hr 40mins 20
72057224
"""
72067225
inplace = validate_bool_kwarg(inplace, "inplace")
72077226
axis = self._get_axis_number(axis)

pandas/core/generic.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5004,27 +5004,49 @@ def sort_values(
50045004
50055005
>>> df = pd.DataFrame(
50065006
... {
5007-
... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"],
5008-
... "value": [10, 20, 30, 40, 50],
5007+
... "hours": ["0hr", "128hr", "0hr", "64hr", "64hr", "128hr"],
5008+
... "mins": [
5009+
... "10mins",
5010+
... "40mins",
5011+
... "40mins",
5012+
... "40mins",
5013+
... "10mins",
5014+
... "10mins",
5015+
... ],
5016+
... "value": [10, 20, 30, 40, 50, 60],
50095017
... }
50105018
... )
50115019
>>> df
5012-
time value
5013-
0 0hr 10
5014-
1 128hr 20
5015-
2 72hr 30
5016-
3 48hr 40
5017-
4 96hr 50
5018-
>>> from natsort import index_natsorted
5020+
hours mins value
5021+
0 0hr 10mins 10
5022+
1 128hr 40mins 20
5023+
2 0hr 40mins 30
5024+
3 64hr 40mins 40
5025+
4 64hr 10mins 50
5026+
5 128hr 10mins 60
5027+
>>> from natsort import natsort_keygen
5028+
>>> natsort_keygen()(df["hours"])
5029+
(('', 0, 'hr'), ('', 128, 'hr'), ('', 72, 'hr'), ('', 48, 'hr'), ('', 96, 'hr'))
5030+
>>> natsort_keygen()(df["mins"])
5031+
(
5032+
('', 10, 'mins'),
5033+
('', 40, 'mins'),
5034+
('', 40, 'mins'),
5035+
('', 40, 'mins'),
5036+
('', 10, 'mins'),
5037+
('', 10, 'mins'),
5038+
)
50195039
>>> df.sort_values(
5020-
... by="time", key=lambda x: np.argsort(index_natsorted(df["time"]))
5040+
... by=["hours", "mins"],
5041+
... key=natsort_keygen(),
50215042
... )
5022-
time value
5023-
0 0hr 10
5024-
3 48hr 40
5025-
2 72hr 30
5026-
4 96hr 50
5027-
1 128hr 20
5043+
hours mins value
5044+
0 0hr 10mins 10
5045+
2 0hr 40mins 30
5046+
4 64hr 10mins 50
5047+
3 64hr 40mins 40
5048+
5 128hr 10mins 60
5049+
1 128hr 40mins 20
50285050
"""
50295051
raise AbstractMethodError(self)
50305052

0 commit comments

Comments
 (0)