Skip to content

Commit c849d39

Browse files
Output formatting: preserve quoting for string categories (#61891)
1 parent e72c8a1 commit c849d39

File tree

6 files changed

+48
-144
lines changed

6 files changed

+48
-144
lines changed

pandas/core/arrays/categorical.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2233,8 +2233,16 @@ def _repr_categories(self) -> list[str]:
22332233
)
22342234
from pandas.io.formats import format as fmt
22352235

2236+
formatter = None
2237+
if self.categories.dtype == "str":
2238+
# the extension array formatter defaults to boxed=True in format_array
2239+
# override here to boxed=False to be consistent with QUOTE_NONNUMERIC
2240+
formatter = cast(ExtensionArray, self.categories._values)._formatter(
2241+
boxed=False
2242+
)
2243+
22362244
format_array = partial(
2237-
fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC
2245+
fmt.format_array, formatter=formatter, quoting=QUOTE_NONNUMERIC
22382246
)
22392247
if len(self.categories) > max_categories:
22402248
num = max_categories // 2

pandas/tests/arrays/categorical/test_repr.py

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,11 @@
1616
class TestCategoricalReprWithFactor:
1717
def test_print(self, using_infer_string):
1818
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
19-
if using_infer_string:
20-
expected = [
21-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
22-
"Categories (3, str): [a < b < c]",
23-
]
24-
else:
25-
expected = [
26-
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
27-
"Categories (3, object): ['a' < 'b' < 'c']",
28-
]
19+
dtype = "str" if using_infer_string else "object"
20+
expected = [
21+
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
22+
f"Categories (3, {dtype}): ['a' < 'b' < 'c']",
23+
]
2924
expected = "\n".join(expected)
3025
actual = repr(factor)
3126
assert actual == expected
@@ -82,10 +77,7 @@ def test_unicode_print(self, using_infer_string):
8277
Categories (3, object): ['aaaaa', 'bb', 'cccc']"""
8378

8479
if using_infer_string:
85-
expected = expected.replace(
86-
"(3, object): ['aaaaa', 'bb', 'cccc']",
87-
"(3, str): [aaaaa, bb, cccc]",
88-
)
80+
expected = expected.replace("object", "str")
8981

9082
assert repr(c) == expected
9183

@@ -96,10 +88,7 @@ def test_unicode_print(self, using_infer_string):
9688
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
9789

9890
if using_infer_string:
99-
expected = expected.replace(
100-
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
101-
"(3, str): [ああああ, いいいいい, ううううううう]",
102-
)
91+
expected = expected.replace("object", "str")
10392

10493
assert repr(c) == expected
10594

@@ -112,12 +101,9 @@ def test_unicode_print(self, using_infer_string):
112101
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
113102

114103
if using_infer_string:
115-
expected = expected.replace(
116-
"(3, object): ['ああああ', 'いいいいい', 'ううううううう']",
117-
"(3, str): [ああああ, いいいいい, ううううううう]",
118-
)
104+
expected = expected.replace("object", "str")
119105

120-
assert repr(c) == expected
106+
assert repr(c) == expected
121107

122108
def test_categorical_repr(self):
123109
c = Categorical([1, 2, 3])

pandas/tests/indexes/categorical/test_category.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas._libs import index as libindex
75
from pandas._libs.arrays import NDArrayBacked
86

@@ -199,8 +197,6 @@ def test_unique(self, data, categories, expected_data, ordered):
199197
expected = CategoricalIndex(expected_data, dtype=dtype)
200198
tm.assert_index_equal(idx.unique(), expected)
201199

202-
# TODO(3.0): remove this test once using_string_dtype() is always True
203-
@pytest.mark.xfail(using_string_dtype(), reason="repr doesn't roundtrip")
204200
def test_repr_roundtrip(self):
205201
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
206202
str(ci)

pandas/tests/indexes/categorical/test_formats.py

Lines changed: 12 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -8,125 +8,78 @@
88

99

1010
class TestCategoricalIndexReprStringCategories:
11-
def test_string_categorical_index_repr(self, using_infer_string):
11+
def test_string_categorical_index_repr(self):
1212
# short
1313
idx = CategoricalIndex(["a", "bb", "ccc"])
1414
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
15-
if using_infer_string:
16-
expected = expected.replace(
17-
"categories=['a', 'bb', 'ccc']",
18-
"categories=[a, bb, ccc]",
19-
)
2015
assert repr(idx) == expected
2116

22-
def test_categorical_index_repr_multiline(self, using_infer_string):
17+
def test_categorical_index_repr_multiline(self):
2318
# multiple lines
2419
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
2520
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
2621
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
2722
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
2823
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
29-
if using_infer_string:
30-
expected = expected.replace(
31-
"categories=['a', 'bb', 'ccc']",
32-
"categories=[a, bb, ccc]",
33-
)
3424
assert repr(idx) == expected
3525

36-
def test_categorical_index_repr_truncated(self, using_infer_string):
26+
def test_categorical_index_repr_truncated(self):
3727
# truncated
3828
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
3929
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
4030
...
4131
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
4232
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
43-
if using_infer_string:
44-
expected = expected.replace(
45-
"categories=['a', 'bb', 'ccc']",
46-
"categories=[a, bb, ccc]",
47-
)
4833
assert repr(idx) == expected
4934

50-
def test_categorical_index_repr_many_categories(self, using_infer_string):
35+
def test_categorical_index_repr_many_categories(self):
5136
# larger categories
5237
idx = CategoricalIndex(list("abcdefghijklmmo"))
5338
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
5439
'm', 'm', 'o'],
5540
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
56-
if using_infer_string:
57-
expected = expected.replace(
58-
"categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o']",
59-
"categories=[a, b, c, d, ..., k, l, m, o]",
60-
)
6141
assert repr(idx) == expected
6242

63-
def test_categorical_index_repr_unicode(self, using_infer_string):
43+
def test_categorical_index_repr_unicode(self):
6444
# short
6545
idx = CategoricalIndex(["あ", "いい", "ううう"])
6646
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
67-
if using_infer_string:
68-
expected = expected.replace(
69-
"categories=['あ', 'いい', 'ううう']",
70-
"categories=[あ, いい, ううう]",
71-
)
7247
assert repr(idx) == expected
7348

74-
def test_categorical_index_repr_unicode_multiline(self, using_infer_string):
49+
def test_categorical_index_repr_unicode_multiline(self):
7550
# multiple lines
7651
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
7752
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
7853
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
7954
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
8055
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
81-
if using_infer_string:
82-
expected = expected.replace(
83-
"categories=['あ', 'いい', 'ううう']",
84-
"categories=[あ, いい, ううう]",
85-
)
8656
assert repr(idx) == expected
8757

88-
def test_categorical_index_repr_unicode_truncated(self, using_infer_string):
58+
def test_categorical_index_repr_unicode_truncated(self):
8959
# truncated
9060
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
9161
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
9262
...
9363
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
9464
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
95-
if using_infer_string:
96-
expected = expected.replace(
97-
"categories=['あ', 'いい', 'ううう']",
98-
"categories=[あ, いい, ううう]",
99-
)
10065
assert repr(idx) == expected
10166

102-
def test_categorical_index_repr_unicode_many_categories(self, using_infer_string):
67+
def test_categorical_index_repr_unicode_many_categories(self):
10368
# larger categories
10469
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
10570
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
10671
'す', 'せ', 'そ'],
10772
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
108-
if using_infer_string:
109-
expected = expected.replace(
110-
"categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ']",
111-
"categories=[あ, い, う, え, ..., し, す, せ, そ]",
112-
)
11373
assert repr(idx) == expected
11474

115-
def test_categorical_index_repr_east_asian_width(self, using_infer_string):
75+
def test_categorical_index_repr_east_asian_width(self):
11676
with cf.option_context("display.unicode.east_asian_width", True):
11777
# short
11878
idx = CategoricalIndex(["あ", "いい", "ううう"])
11979
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
120-
if using_infer_string:
121-
expected = expected.replace(
122-
"categories=['あ', 'いい', 'ううう']",
123-
"categories=[あ, いい, ううう]",
124-
)
12580
assert repr(idx) == expected
12681

127-
def test_categorical_index_repr_east_asian_width_multiline(
128-
self, using_infer_string
129-
):
82+
def test_categorical_index_repr_east_asian_width_multiline(self):
13083
with cf.option_context("display.unicode.east_asian_width", True):
13184
# multiple lines
13285
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
@@ -136,16 +89,9 @@ def test_categorical_index_repr_east_asian_width_multiline(
13689
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
13790
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
13891

139-
if using_infer_string:
140-
expected = expected.replace(
141-
"categories=['あ', 'いい', 'ううう']",
142-
"categories=[あ, いい, ううう]",
143-
)
14492
assert repr(idx) == expected
14593

146-
def test_categorical_index_repr_east_asian_width_truncated(
147-
self, using_infer_string
148-
):
94+
def test_categorical_index_repr_east_asian_width_truncated(self):
14995
with cf.option_context("display.unicode.east_asian_width", True):
15096
# truncated
15197
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
@@ -156,25 +102,13 @@ def test_categorical_index_repr_east_asian_width_truncated(
156102
'あ', 'いい', 'ううう'],
157103
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
158104

159-
if using_infer_string:
160-
expected = expected.replace(
161-
"categories=['あ', 'いい', 'ううう']",
162-
"categories=[あ, いい, ううう]",
163-
)
164105
assert repr(idx) == expected
165106

166-
def test_categorical_index_repr_east_asian_width_many_categories(
167-
self, using_infer_string
168-
):
107+
def test_categorical_index_repr_east_asian_width_many_categories(self):
169108
with cf.option_context("display.unicode.east_asian_width", True):
170109
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
171110
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
172111
'さ', 'し', 'す', 'せ', 'そ'],
173112
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
174113

175-
if using_infer_string:
176-
expected = expected.replace(
177-
"categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ']",
178-
"categories=[あ, い, う, え, ..., し, す, せ, そ]",
179-
)
180114
assert repr(idx) == expected

pandas/tests/series/test_formats.py

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -309,38 +309,27 @@ def test_categorical_repr(self, using_infer_string):
309309
assert exp == a.__str__()
310310

311311
a = Series(Categorical(["a", "b"] * 25))
312+
exp = (
313+
"0 a\n1 b\n"
314+
" ..\n"
315+
"48 a\n49 b\n"
316+
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
317+
)
312318
if using_infer_string:
313-
exp = (
314-
"0 a\n1 b\n"
315-
" ..\n"
316-
"48 a\n49 b\n"
317-
"Length: 50, dtype: category\nCategories (2, str): [a, b]"
318-
)
319-
else:
320-
exp = (
321-
"0 a\n1 b\n"
322-
" ..\n"
323-
"48 a\n49 b\n"
324-
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
325-
)
319+
exp = exp.replace("object", "str")
326320
with option_context("display.max_rows", 5):
327321
assert exp == repr(a)
328322

329323
levs = list("abcdefghijklmnopqrstuvwxyz")
330324
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
325+
exp = (
326+
"0 a\n1 b\n"
327+
"dtype: category\n"
328+
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
329+
"'w' < 'x' < 'y' < 'z']"
330+
)
331331
if using_infer_string:
332-
exp = (
333-
"0 a\n1 b\n"
334-
"dtype: category\n"
335-
"Categories (26, str): [a < b < c < d ... w < x < y < z]"
336-
)
337-
else:
338-
exp = (
339-
"0 a\n1 b\n"
340-
"dtype: category\n"
341-
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
342-
"'w' < 'x' < 'y' < 'z']"
343-
)
332+
exp = exp.replace("object", "str")
344333
assert exp == a.__str__()
345334

346335
def test_categorical_series_repr(self):

pandas/tests/util/test_assert_series_equal.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -214,24 +214,15 @@ def test_series_equal_numeric_values_mismatch(rtol):
214214

215215

216216
def test_series_equal_categorical_values_mismatch(rtol, using_infer_string):
217-
if using_infer_string:
218-
msg = """Series are different
219-
220-
Series values are different \\(66\\.66667 %\\)
221-
\\[index\\]: \\[0, 1, 2\\]
222-
\\[left\\]: \\['a', 'b', 'c'\\]
223-
Categories \\(3, str\\): \\[a, b, c\\]
224-
\\[right\\]: \\['a', 'c', 'b'\\]
225-
Categories \\(3, str\\): \\[a, b, c\\]"""
226-
else:
227-
msg = """Series are different
217+
dtype = "str" if using_infer_string else "object"
218+
msg = f"""Series are different
228219
229220
Series values are different \\(66\\.66667 %\\)
230221
\\[index\\]: \\[0, 1, 2\\]
231222
\\[left\\]: \\['a', 'b', 'c'\\]
232-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]
223+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]
233224
\\[right\\]: \\['a', 'c', 'b'\\]
234-
Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
225+
Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]"""
235226

236227
s1 = Series(Categorical(["a", "b", "c"]))
237228
s2 = Series(Categorical(["a", "c", "b"]))

0 commit comments

Comments
 (0)