Skip to content

Commit bf1d3fa

Browse files
committed
Add doctests
1 parent f3049f9 commit bf1d3fa

File tree

4 files changed

+110
-22
lines changed

4 files changed

+110
-22
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@ pypi-setup:
3737
develop: pypi-setup
3838
$(PIP) install -e .
3939

40+
doctest:
41+
MATHICS_CHARACTER_ENCODING="ASCII" $(PYTHON) -m mathics.docpipeline -l pymathics.icu -c 'ICU — International Components for Unicode' $o
42+
43+
4044
# Run tests
41-
check: pytest
45+
check: pytest doctest
4246

4347
#: Remove derived files
4448
clean: clean-pyc

pymathics/icu/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
88
Load in Mathics3 Module:
99
>> LoadModule["pymathics.icu"]
10-
= pymathics.natlang
10+
= pymathics.icu
1111
1212
Show the language in effect:
1313
>> $Language
14-
= "English"
14+
= English
1515
1616
Get the alphabet for that language:
1717
>> Alphabet[]
@@ -26,7 +26,7 @@
2626
= {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ}
2727
"""
2828

29-
from pymathics.icu.__main__ import Alphabet, Language
29+
from pymathics.icu.__main__ import Alphabet, AlphabeticOrder, Language
3030
from pymathics.icu.version import __version__
3131

3232
pymathics_version_data = {
@@ -36,4 +36,4 @@
3636
"requires": ["PyICU"],
3737
}
3838

39-
__all__ = ["Alphabet", "Language", "pymathics_version_data", "__version__"]
39+
__all__ = ["Alphabet", "AlphabeticOrder", "Language", "pymathics_version_data", "__version__"]

pymathics/icu/__main__.py

Lines changed: 98 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,38 +4,81 @@
44
Languages - Human-Language Alphabets and Locales via PyICU.
55
"""
66

7-
# PYTHON MODULES USED IN HERE
8-
9-
# PyICU: human-language alphabets and locales
10-
11-
127
from typing import List, Optional
138

14-
from icu import Locale, LocaleData
15-
from mathics.core.atoms import String
9+
from icu import Collator, Locale, LocaleData
10+
from mathics.core.atoms import Integer, String
1611
from mathics.core.builtin import Builtin, Predefined
1712
from mathics.core.convert.expression import to_mathics_list
1813
from mathics.core.evaluation import Evaluation
1914

20-
availableLocales = Locale.getAvailableLocales()
15+
available_locales = Locale.getAvailableLocales()
2116
language2locale = {
2217
availableLocale.getDisplayLanguage(): locale_name
23-
for locale_name, availableLocale in availableLocales.items()
18+
for locale_name, availableLocale in available_locales.items()
2419
}
2520

2621
# The current value of $Language
2722
LANGUAGE = "English"
2823

24+
2925
def eval_alphabet(language_name: String) -> Optional[List[String]]:
3026

3127
py_language_name = language_name.value
3228
locale = language2locale.get(py_language_name, py_language_name)
33-
if locale not in availableLocales:
29+
if locale not in available_locales:
3430
return
3531
alphabet_set = LocaleData(locale).getExemplarSet(0, 0)
3632
return to_mathics_list(*alphabet_set, elements_conversion_fn=String)
3733

3834

35+
def eval_alphabetic_order(string1: str, string2: str, language_name=LANGUAGE) -> int:
36+
"""
37+
Compare two strings using locale-sensitive alphabetic order.
38+
39+
Returns:
40+
1 if string1 appears before string2 in alphabetic order,
41+
-1 if string1 appears after string2,
42+
0 if they are identical.
43+
"""
44+
locale_str = language_to_locale(language_name)
45+
collator = Collator.createInstance(Locale(locale_str))
46+
comparison = collator.compare(string1, string2)
47+
if comparison < 0:
48+
return 1
49+
elif comparison > 0:
50+
return -1
51+
else:
52+
return 0
53+
54+
55+
def language_to_locale(language_name: str, fallback="en_US") -> str:
56+
"""
57+
Convert a language name (e.g., "English") to an ICU locale string (e.g., "en_US").
58+
Returns the first matching locale string or a fallback if not found.
59+
60+
Args:
61+
language_name (str): Language name in English (e.g., "English", "French").
62+
fallback (str): Locale string to return if not found.
63+
64+
Returns:
65+
str: Locale string (e.g., "en_US", "fr_FR").
66+
"""
67+
# Normalize input
68+
language_name = language_name.strip().lower()
69+
70+
for loc_str in available_locales:
71+
loc = Locale(loc_str)
72+
# Get display language in English.
73+
# FIXME? Generalize or do better later?
74+
disp_lang = loc.getDisplayLanguage(Locale("en")).lower()
75+
if disp_lang == language_name:
76+
return loc_str
77+
78+
# Could not find exact match, return fallback
79+
return fallback
80+
81+
3982
class Alphabet(Builtin):
4083
"""
4184
Basic lowercase alphabet via <url>:Unicode: https://home.unicode.org/</url> and <url>:PyICU: https://pypi.org/project/PyICU/</url>
@@ -48,7 +91,7 @@ class Alphabet(Builtin):
4891
</dl>
4992
5093
>> Alphabet["Ukrainian"]
51-
= {a, ä, b, c, d, e, f, g, h, i, j, k, l, m, n, o, ö, p, q, r, s, ß, t, u, ü, v, w, x, y, z}
94+
= {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ}
5295
5396
The alphabet when nothing is specified, "English" is used:
5497
>> Alphabet[]
@@ -81,6 +124,48 @@ def eval(self, alpha: String, evaluation):
81124
return
82125
return alphabet_list
83126

127+
128+
class AlphabeticOrder(Builtin):
129+
"""
130+
<url>:WMA:https://reference.wolfram.com/language/ref/AlphabeticOrder.html</url>
131+
<dl>
132+
<dt>'AlphabetOrder'[$string_1$, $string_2$]
133+
<dd>gives 1 if $string_1$ appears before $string_2$ in alphabetical order, -1 if it is after, and 0 if it is identical.
134+
</dl>
135+
136+
>> AlphabeticOrder["apple", "banana"]
137+
= 1
138+
139+
>> AlphabeticOrder["parrot", "parrot"]
140+
= 0
141+
142+
When words are the same but only differ in case, usually lowercase letters come first:
143+
>> AlphabeticOrder["A", "a"]
144+
= -1
145+
146+
Longer words follow their prefixes:
147+
>> AlphabeticOrder["Papagayo", "Papa", "Spanish"]
148+
= -1
149+
150+
But accented letters usually appear at the end of the alphabet:
151+
>> AlphabeticOrder["Papá", "Papa", "Spanish"]
152+
= -1
153+
154+
>> AlphabeticOrder["Papá", "Papagayo", "Spanish"]
155+
= 1
156+
"""
157+
158+
summary_text = "compare strings according to an alphabet"
159+
160+
def eval(self, string1: String, string2: String, evaluation: Evaluation):
161+
"""AlphabeticOrder[string1_String, string2_String]"""
162+
return Integer(eval_alphabetic_order(string1.value, string2.value))
163+
164+
def eval_with_lang(self, string1: String, string2: String, lang: String, evaluation: Evaluation):
165+
"""AlphabeticOrder[string1_String, string2_String, lang_String]"""
166+
return Integer(eval_alphabetic_order(string1.value, string2.value, lang.value, ))
167+
168+
84169
## FIXME: move to mathics-core. Will have to change references to Pymathics`$Language to $Language
85170
class Language(Predefined):
86171
"""
@@ -95,15 +180,13 @@ class Language(Predefined):
95180
96181
See the language in effect used for functions like 'Alphabet[]':
97182
98-
>> old_language = $Language
99-
= ...
100-
101183
By setting its value, The letters of 'Alphabet[]' are changed:
102184
103185
>> $Language = "German"; Alphabet[]
104186
= ...
105187
106-
#> $Language = old_language;
188+
#> $Language = "English"
189+
= English
107190
108191
See also <url>
109192
:Alphabet:

test/test_basic.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# -*- coding: utf-8 -*-
22

3+
from mathics.core.atoms import String
34
from mathics.core.load_builtin import import_and_load_builtins
45
from mathics.session import MathicsSession
56

67
import_and_load_builtins()
78

89
session = MathicsSession(character_encoding="UTF-8")
10+
assert session.evaluate('LoadModule["pymathics.icu"]') == String("pymathics.icu")
911

1012

1113
def check_evaluation(str_expr: str, expected: str, assert_message=""):
@@ -19,8 +21,7 @@ def check_evaluation(str_expr: str, expected: str, assert_message=""):
1921
assert result == expected
2022

2123

22-
def test_language():
23-
session.evaluate('LoadModule["pymathics.icu"]') == "pymathics.icu"
24+
def test_alphabet():
2425
check_evaluation(
2526
'Alphabet["es"]',
2627
(

0 commit comments

Comments
 (0)