Skip to content

Commit ccc1392

Browse files
committed
Add a collator strength and locale
1 parent b16df27 commit ccc1392

File tree

5 files changed

+68
-49
lines changed

5 files changed

+68
-49
lines changed

ext/intl/grapheme/grapheme_string.c

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -81,19 +81,22 @@ PHP_FUNCTION(grapheme_strlen)
8181
/* {{{ Find position of first occurrence of a string within another */
8282
PHP_FUNCTION(grapheme_strpos)
8383
{
84-
char *haystack, *needle;
85-
size_t haystack_len, needle_len;
84+
char *haystack, *needle, *locale = "";
85+
size_t haystack_len, needle_len, locale_len;
8686
const char *found;
8787
zend_long loffset = 0;
8888
int32_t offset = 0;
89+
zend_long strength = UCOL_DEFAULT_STRENGTH;
8990
size_t noffset = 0;
9091
zend_long ret_pos;
9192

92-
ZEND_PARSE_PARAMETERS_START(2, 3)
93+
ZEND_PARSE_PARAMETERS_START(2, 5)
9394
Z_PARAM_STRING(haystack, haystack_len)
9495
Z_PARAM_STRING(needle, needle_len)
9596
Z_PARAM_OPTIONAL
9697
Z_PARAM_LONG(loffset)
98+
Z_PARAM_STRING(locale, locale_len)
99+
Z_PARAM_LONG(strength)
97100
ZEND_PARSE_PARAMETERS_END();
98101

99102
if ( OUTSIDE_STRING(loffset, haystack_len) ) {
@@ -121,7 +124,7 @@ PHP_FUNCTION(grapheme_strpos)
121124
}
122125

123126
/* do utf16 part of the strpos */
124-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, "" /* last */ );
127+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, locale, strength /* last */ );
125128

126129
if ( ret_pos >= 0 ) {
127130
RETURN_LONG(ret_pos);
@@ -139,15 +142,17 @@ PHP_FUNCTION(grapheme_stripos)
139142
const char *found;
140143
zend_long loffset = 0;
141144
int32_t offset = 0;
145+
zend_long strength = UCOL_DEFAULT_STRENGTH;
142146
zend_long ret_pos;
143147
int is_ascii;
144148

145-
ZEND_PARSE_PARAMETERS_START(2, 4)
149+
ZEND_PARSE_PARAMETERS_START(2, 5)
146150
Z_PARAM_STRING(haystack, haystack_len)
147151
Z_PARAM_STRING(needle, needle_len)
148152
Z_PARAM_OPTIONAL
149153
Z_PARAM_LONG(loffset)
150154
Z_PARAM_STRING(locale, locale_len)
155+
Z_PARAM_LONG(strength)
151156
ZEND_PARSE_PARAMETERS_END();
152157

153158
if ( OUTSIDE_STRING(loffset, haystack_len) ) {
@@ -186,7 +191,7 @@ PHP_FUNCTION(grapheme_stripos)
186191
}
187192

188193
/* do utf16 part of the strpos */
189-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale /*last */ );
194+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale, strength /*last */ );
190195

191196
if ( ret_pos >= 0 ) {
192197
RETURN_LONG(ret_pos);
@@ -205,14 +210,16 @@ PHP_FUNCTION(grapheme_strrpos)
205210
size_t haystack_len, needle_len;
206211
zend_long loffset = 0;
207212
int32_t offset = 0;
213+
zend_long strength = UCOL_DEFAULT_STRENGTH;
208214
zend_long ret_pos;
209215
int is_ascii;
210216

211-
ZEND_PARSE_PARAMETERS_START(2, 3)
217+
ZEND_PARSE_PARAMETERS_START(2, 4)
212218
Z_PARAM_STRING(haystack, haystack_len)
213219
Z_PARAM_STRING(needle, needle_len)
214220
Z_PARAM_OPTIONAL
215221
Z_PARAM_LONG(loffset)
222+
Z_PARAM_LONG(strength)
216223
ZEND_PARSE_PARAMETERS_END();
217224

218225
if ( OUTSIDE_STRING(loffset, haystack_len) ) {
@@ -244,7 +251,7 @@ PHP_FUNCTION(grapheme_strrpos)
244251
/* else we need to continue via utf16 */
245252
}
246253

247-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale /* last */);
254+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale, strength /* last */);
248255

249256
if ( ret_pos >= 0 ) {
250257
RETURN_LONG(ret_pos);
@@ -263,15 +270,17 @@ PHP_FUNCTION(grapheme_strripos)
263270
size_t haystack_len, needle_len, locale_len = 0;
264271
zend_long loffset = 0;
265272
int32_t offset = 0;
273+
zend_long strength = UCOL_DEFAULT_STRENGTH;
266274
zend_long ret_pos;
267275
int is_ascii;
268276

269-
ZEND_PARSE_PARAMETERS_START(2, 4)
277+
ZEND_PARSE_PARAMETERS_START(2, 5)
270278
Z_PARAM_STRING(haystack, haystack_len)
271279
Z_PARAM_STRING(needle, needle_len)
272280
Z_PARAM_OPTIONAL
273281
Z_PARAM_LONG(loffset)
274282
Z_PARAM_STRING(locale, locale_len)
283+
Z_PARAM_LONG(strength)
275284
ZEND_PARSE_PARAMETERS_END();
276285

277286
if ( OUTSIDE_STRING(loffset, haystack_len) ) {
@@ -312,7 +321,7 @@ PHP_FUNCTION(grapheme_strripos)
312321
/* else we need to continue via utf16 */
313322
}
314323

315-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale /*last */);
324+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale, strength /*last */);
316325

317326
if ( ret_pos >= 0 ) {
318327
RETURN_LONG(ret_pos);
@@ -327,13 +336,14 @@ PHP_FUNCTION(grapheme_strripos)
327336
/* {{{ Returns part of a string */
328337
PHP_FUNCTION(grapheme_substr)
329338
{
330-
char *str;
339+
char *str, *locale = "";
331340
zend_string *u8_sub_str;
332341
UChar *ustr;
333-
size_t str_len;
342+
size_t str_len, locale_len;
334343
int32_t ustr_len;
335344
zend_long lstart = 0, length = 0;
336345
int32_t start = 0;
346+
zend_long strength = UCOL_DEFAULT;
337347
int iter_val;
338348
UErrorCode status;
339349
unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
@@ -342,11 +352,13 @@ PHP_FUNCTION(grapheme_substr)
342352
int32_t (*iter_func)(UBreakIterator *);
343353
bool no_length = true;
344354

345-
ZEND_PARSE_PARAMETERS_START(2, 3)
355+
ZEND_PARSE_PARAMETERS_START(2, 5)
346356
Z_PARAM_STRING(str, str_len)
347357
Z_PARAM_LONG(lstart)
348358
Z_PARAM_OPTIONAL
349359
Z_PARAM_LONG_OR_NULL(length, no_length)
360+
Z_PARAM_STRING(locale, locale_len)
361+
Z_PARAM_LONG(strength)
350362
ZEND_PARSE_PARAMETERS_END();
351363

352364
if (lstart < INT32_MIN || lstart > INT32_MAX) {
@@ -544,24 +556,17 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
544556
const char *found;
545557
size_t haystack_len, needle_len, locale_len = 0;
546558
int32_t ret_pos, uchar_pos;
559+
zend_long strength = UCOL_DEFAULT_STRENGTH;
547560
bool part = false;
548561

549-
if (f_ignore_case == 1) {
550-
ZEND_PARSE_PARAMETERS_START(2, 4)
551-
Z_PARAM_STRING(haystack, haystack_len)
552-
Z_PARAM_STRING(needle, needle_len)
553-
Z_PARAM_OPTIONAL
554-
Z_PARAM_BOOL(part)
555-
Z_PARAM_STRING(locale, locale_len)
556-
ZEND_PARSE_PARAMETERS_END();
557-
} else {
558-
ZEND_PARSE_PARAMETERS_START(2, 3)
559-
Z_PARAM_STRING(haystack, haystack_len)
560-
Z_PARAM_STRING(needle, needle_len)
561-
Z_PARAM_OPTIONAL
562-
Z_PARAM_BOOL(part)
563-
ZEND_PARSE_PARAMETERS_END();
564-
}
562+
ZEND_PARSE_PARAMETERS_START(2, 5)
563+
Z_PARAM_STRING(haystack, haystack_len)
564+
Z_PARAM_STRING(needle, needle_len)
565+
Z_PARAM_OPTIONAL
566+
Z_PARAM_BOOL(part)
567+
Z_PARAM_STRING(locale, locale_len)
568+
Z_PARAM_LONG(strength)
569+
ZEND_PARSE_PARAMETERS_END();
565570

566571
if ( !f_ignore_case ) {
567572

@@ -587,7 +592,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
587592
}
588593

589594
/* need to work in utf16 */
590-
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale /*last */ );
595+
ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale, strength /*last */ );
591596

592597
if ( ret_pos < 0 ) {
593598
RETURN_FALSE;
@@ -932,14 +937,19 @@ PHP_FUNCTION(grapheme_levenshtein)
932937
zend_long cost_ins = 1;
933938
zend_long cost_rep = 1;
934939
zend_long cost_del = 1;
940+
char *locale = "";
941+
size_t locale_len = 0;
942+
zend_long strength = UCOL_DEFAULT_STRENGTH;
935943

936-
ZEND_PARSE_PARAMETERS_START(2, 5)
944+
ZEND_PARSE_PARAMETERS_START(2, 7)
937945
Z_PARAM_STR(string1)
938946
Z_PARAM_STR(string2)
939947
Z_PARAM_OPTIONAL
940948
Z_PARAM_LONG(cost_ins)
941949
Z_PARAM_LONG(cost_rep)
942950
Z_PARAM_LONG(cost_del)
951+
Z_PARAM_STRING(locale, locale_len)
952+
Z_PARAM_LONG(strength)
943953
ZEND_PARSE_PARAMETERS_END();
944954

945955
if (cost_ins <= 0 || cost_ins > UINT_MAX / 4) {
@@ -1056,14 +1066,15 @@ PHP_FUNCTION(grapheme_levenshtein)
10561066
RETVAL_FALSE;
10571067
goto out_bi2;
10581068
}
1059-
UCollator *collator = ucol_open("", &ustatus);
1069+
UCollator *collator = ucol_open(locale, &ustatus);
10601070
if (U_FAILURE(ustatus)) {
10611071
intl_error_set_code(NULL, ustatus);
10621072

10631073
intl_error_set_custom_msg(NULL, "Error on ucol_open", 0);
10641074
RETVAL_FALSE;
10651075
goto out_collator;
10661076
}
1077+
ucol_setStrength(collator, strength);
10671078

10681079
zend_long *p1, *p2, *tmp;
10691080
p1 = safe_emalloc((size_t) strlen_2 + 1, sizeof(zend_long), 0);

ext/intl/grapheme/grapheme_util.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
9494

9595

9696
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
97-
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, char* locale)
97+
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, char* locale, int32_t strength)
9898
{
9999
UChar *uhaystack = NULL, *uneedle = NULL;
100100
int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
@@ -141,6 +141,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
141141

142142
if(f_ignore_case) {
143143
UCollator *coll = usearch_getCollator(src);
144+
ucol_setStrength(coll, strength);
144145
status = U_ZERO_ERROR;
145146
ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status);
146147
STRPOS_CHECK_STATUS(status, "Error setting collation strength");

ext/intl/grapheme/grapheme_util.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ zend_long grapheme_ascii_check(const unsigned char *day, size_t len);
2525
void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char **sub_str, int32_t *sub_str_len);
2626
zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset);
2727

28-
int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case);
29-
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, char* locale);
28+
int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case, char* locale, int32_t strength);
29+
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, char* locale, int32_t strength);
3030

3131
int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len );
3232

ext/intl/php_intl.stub.php

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -431,23 +431,23 @@ function numfmt_get_error_message(NumberFormatter $formatter): string {}
431431

432432
function grapheme_strlen(string $string): int|false|null {}
433433

434-
function grapheme_strpos(string $haystack, string $needle, int $offset = 0): int|false {}
434+
function grapheme_strpos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {}
435435

436-
function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {}
436+
function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {}
437437

438-
function grapheme_strrpos(string $haystack, string $needle, int $offset = 0): int|false {}
438+
function grapheme_strrpos(string $haystack, string $needle, int $offset = 0, int $strength = UCOL_DEFAULT_STRENGTH): int|false {}
439439

440-
function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {}
440+
function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {}
441441

442-
function grapheme_substr(string $string, int $offset, ?int $length = null): string|false {}
442+
function grapheme_substr(string $string, int $offset, ?int $length = null, int $strength = UCOL_DEFAULT_STRENGTH): string|false {}
443443

444-
function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {}
444+
function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false, int $strength = UCOL_DEFAULT_STRENGTH): string|false {}
445445

446-
function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = ""): string|false {}
446+
function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {}
447447

448448
function grapheme_str_split(string $string, int $length = 1): array|false {}
449449

450-
function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1): int|false {}
450+
function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {}
451451

452452
/** @param int $next */
453453
function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {}

ext/intl/php_intl_arginfo.h

Lines changed: 13 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)