1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Fix GH-18566: [intl] Weird numeric sort in Collator

This aligns the behaviour with normal (non-intl) asort() by making the following changes:
  - Use the same trailing whitespace logic as Zend's is_numeric_ex()
  - Don't allow errors on trailing data

Targeting master because of the BC break.

Closes GH-18632.
This commit is contained in:
Niels Dossche
2025-05-24 12:33:45 +02:00
parent 5e21ffe09a
commit 5187ff2d17
5 changed files with 68 additions and 1 deletions

1
NEWS
View File

@@ -98,6 +98,7 @@ PHP NEWS
adding/removing likely subtags to a locale. (David Carlier)
. Added IntlListFormatter class to format a list of items with a locale
, operands types and units. (BogdanUngureanu)
. Fixed bug GH-18566 ([intl] Weird numeric sort in Collator). (nielsdos)
- LDAP:
. Allow ldap_get_option to retrieve global option by allowing NULL for

View File

@@ -53,6 +53,9 @@ PHP 8.5 UPGRADE NOTES
- Intl:
. The extension now requires at least ICU 57.1.
. The behaviour of Collator::SORT_REGULAR with respect to handling numeric
strings is now aligned with the behaviour of SORT_REGULAR in ext/standard.
This is a consequence of fixing bug GH-18566.
- LDAP:
. ldap_get_option() and ldap_set_option() now throw a ValueError when

View File

@@ -317,7 +317,7 @@ zval* collator_convert_string_to_number_if_possible( zval* str, zval *rv )
COLLATOR_CONVERT_RETURN_FAILED( str );
}
if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ 1 ) ) )
if ( ( is_numeric = collator_is_numeric( (UChar*) Z_STRVAL_P(str), UCHARS( Z_STRLEN_P(str) ), &lval, &dval, /* allow_errors */ false ) ) )
{
if( is_numeric == IS_LONG ) {
ZVAL_LONG(rv, lval);

View File

@@ -200,6 +200,14 @@ static zend_long collator_u_strtol(const UChar *nptr, UChar **endptr, int base)
}
/* }}} */
/* Consume (trailing) whitespace just like collator_u_strtol() consumes leading whitespace */
static zend_always_inline UChar *collator_skip_ws(UChar *end_ptr)
{
while (u_isspace(*end_ptr)) {
end_ptr++;
}
return end_ptr;
}
/* {{{ collator_is_numeric]
* Taken from PHP6:is_numeric_unicode()
@@ -217,6 +225,7 @@ uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double
errno=0;
local_lval = collator_u_strtol(str, &end_ptr_long, 10);
if (errno != ERANGE) {
end_ptr_long = collator_skip_ws(end_ptr_long);
if (end_ptr_long == str+length) { /* integer string */
if (lval) {
*lval = local_lval;
@@ -233,6 +242,7 @@ uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double
if (local_dval == 0 && end_ptr_double == str) {
end_ptr_double = NULL;
} else {
end_ptr_double = collator_skip_ws(end_ptr_double);
if (end_ptr_double == str+length) { /* floating point string */
if (!zend_finite(local_dval)) {
/* "inf","nan" and maybe other weird ones */

View File

@@ -0,0 +1,53 @@
--TEST--
GH-18566 ([intl] Weird numeric sort in Collator)
--EXTENSIONS--
intl
--FILE--
<?php
$arr = [
'2023-02-04 14:00:00',
'2023-01-08 12:00:00',
'2023-01-03 12:00:00',
'2023-01-03 12:00:00',
'2021-01-03 12:00:00',
'2023-01-05 14:00:00',
'2024-01-03 12:00:00',
'2023-01-03 12:00:00',
' ',
];
$coll = Collator::create('en');
$coll->asort($arr, Collator::SORT_REGULAR);
print_r($arr);
$arr = [
' 100000',
' 10',
' -100 ',
];
$coll = Collator::create('en');
$coll->asort($arr, Collator::SORT_REGULAR);
print_r($arr);
?>
--EXPECT--
Array
(
[8] =>
[4] => 2021-01-03 12:00:00
[2] => 2023-01-03 12:00:00
[3] => 2023-01-03 12:00:00
[7] => 2023-01-03 12:00:00
[5] => 2023-01-05 14:00:00
[1] => 2023-01-08 12:00:00
[0] => 2023-02-04 14:00:00
[6] => 2024-01-03 12:00:00
)
Array
(
[2] => -100
[1] => 10
[0] => 100000
)