4
4
Languages - Human-Language Alphabets and Locales via PyICU.
5
5
"""
6
6
7
- # PYTHON MODULES USED IN HERE
8
-
9
- # PyICU: human-language alphabets and locales
10
-
11
-
12
7
from typing import List , Optional
13
8
14
- from icu import Locale , LocaleData
15
- from mathics .core .atoms import String
9
+ from icu import Collator , Locale , LocaleData
10
+ from mathics .core .atoms import Integer , String
16
11
from mathics .core .builtin import Builtin , Predefined
17
12
from mathics .core .convert .expression import to_mathics_list
18
13
from mathics .core .evaluation import Evaluation
19
14
20
- availableLocales = Locale .getAvailableLocales ()
15
+ available_locales = Locale .getAvailableLocales ()
21
16
language2locale = {
22
17
availableLocale .getDisplayLanguage (): locale_name
23
- for locale_name , availableLocale in availableLocales .items ()
18
+ for locale_name , availableLocale in available_locales .items ()
24
19
}
25
20
26
21
# The current value of $Language
27
22
LANGUAGE = "English"
28
23
24
+
29
25
def eval_alphabet (language_name : String ) -> Optional [List [String ]]:
30
26
31
27
py_language_name = language_name .value
32
28
locale = language2locale .get (py_language_name , py_language_name )
33
- if locale not in availableLocales :
29
+ if locale not in available_locales :
34
30
return
35
31
alphabet_set = LocaleData (locale ).getExemplarSet (0 , 0 )
36
32
return to_mathics_list (* alphabet_set , elements_conversion_fn = String )
37
33
38
34
35
+ def eval_alphabetic_order (string1 : str , string2 : str , language_name = LANGUAGE ) -> int :
36
+ """
37
+ Compare two strings using locale-sensitive alphabetic order.
38
+
39
+ Returns:
40
+ 1 if string1 appears before string2 in alphabetic order,
41
+ -1 if string1 appears after string2,
42
+ 0 if they are identical.
43
+ """
44
+ locale_str = language_to_locale (language_name )
45
+ collator = Collator .createInstance (Locale (locale_str ))
46
+ comparison = collator .compare (string1 , string2 )
47
+ if comparison < 0 :
48
+ return 1
49
+ elif comparison > 0 :
50
+ return - 1
51
+ else :
52
+ return 0
53
+
54
+
55
+ def language_to_locale (language_name : str , fallback = "en_US" ) -> str :
56
+ """
57
+ Convert a language name (e.g., "English") to an ICU locale string (e.g., "en_US").
58
+ Returns the first matching locale string or a fallback if not found.
59
+
60
+ Args:
61
+ language_name (str): Language name in English (e.g., "English", "French").
62
+ fallback (str): Locale string to return if not found.
63
+
64
+ Returns:
65
+ str: Locale string (e.g., "en_US", "fr_FR").
66
+ """
67
+ # Normalize input
68
+ language_name = language_name .strip ().lower ()
69
+
70
+ for loc_str in available_locales :
71
+ loc = Locale (loc_str )
72
+ # Get display language in English.
73
+ # FIXME? Generalize or do better later?
74
+ disp_lang = loc .getDisplayLanguage (Locale ("en" )).lower ()
75
+ if disp_lang == language_name :
76
+ return loc_str
77
+
78
+ # Could not find exact match, return fallback
79
+ return fallback
80
+
81
+
39
82
class Alphabet (Builtin ):
40
83
"""
41
84
Basic lowercase alphabet via <url>:Unicode: https://home.unicode.org/</url> and <url>:PyICU: https://pypi.org/project/PyICU/</url>
@@ -48,7 +91,7 @@ class Alphabet(Builtin):
48
91
</dl>
49
92
50
93
>> Alphabet["Ukrainian"]
51
- = {a, ä, b, c, d, e, f, g, h, i, j, k, l, m, n, o, ö, p, q, r, s, ß, t, u, ü, v, w, x, y, z }
94
+ = {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ }
52
95
53
96
The alphabet when nothing is specified, "English" is used:
54
97
>> Alphabet[]
@@ -81,6 +124,48 @@ def eval(self, alpha: String, evaluation):
81
124
return
82
125
return alphabet_list
83
126
127
+
128
+ class AlphabeticOrder (Builtin ):
129
+ """
130
+ <url>:WMA:https://reference.wolfram.com/language/ref/AlphabeticOrder.html</url>
131
+ <dl>
132
+ <dt>'AlphabetOrder'[$string_1$, $string_2$]
133
+ <dd>gives 1 if $string_1$ appears before $string_2$ in alphabetical order, -1 if it is after, and 0 if it is identical.
134
+ </dl>
135
+
136
+ >> AlphabeticOrder["apple", "banana"]
137
+ = 1
138
+
139
+ >> AlphabeticOrder["parrot", "parrot"]
140
+ = 0
141
+
142
+ When words are the same but only differ in case, usually lowercase letters come first:
143
+ >> AlphabeticOrder["A", "a"]
144
+ = -1
145
+
146
+ Longer words follow their prefixes:
147
+ >> AlphabeticOrder["Papagayo", "Papa", "Spanish"]
148
+ = -1
149
+
150
+ But accented letters usually appear at the end of the alphabet:
151
+ >> AlphabeticOrder["Papá", "Papa", "Spanish"]
152
+ = -1
153
+
154
+ >> AlphabeticOrder["Papá", "Papagayo", "Spanish"]
155
+ = 1
156
+ """
157
+
158
+ summary_text = "compare strings according to an alphabet"
159
+
160
+ def eval (self , string1 : String , string2 : String , evaluation : Evaluation ):
161
+ """AlphabeticOrder[string1_String, string2_String]"""
162
+ return Integer (eval_alphabetic_order (string1 .value , string2 .value ))
163
+
164
+ def eval_with_lang (self , string1 : String , string2 : String , lang : String , evaluation : Evaluation ):
165
+ """AlphabeticOrder[string1_String, string2_String, lang_String]"""
166
+ return Integer (eval_alphabetic_order (string1 .value , string2 .value , lang .value , ))
167
+
168
+
84
169
## FIXME: move to mathics-core. Will have to change references to Pymathics`$Language to $Language
85
170
class Language (Predefined ):
86
171
"""
@@ -95,15 +180,13 @@ class Language(Predefined):
95
180
96
181
See the language in effect used for functions like 'Alphabet[]':
97
182
98
- >> old_language = $Language
99
- = ...
100
-
101
183
By setting its value, The letters of 'Alphabet[]' are changed:
102
184
103
185
>> $Language = "German"; Alphabet[]
104
186
= ...
105
187
106
- #> $Language = old_language;
188
+ #> $Language = "English"
189
+ = English
107
190
108
191
See also <url>
109
192
:Alphabet:
0 commit comments