1
0
mirror of https://github.com/php/php-src.git synced 2026-04-23 07:58:20 +02:00
Files
archived-php-src/ext/mbstring/tests/gh9535b.phpt
T
Alex Dowad c211e67b4e Remove XFAIL from test cases for mb_strcut when used with JIS or ISO-2022-JP encoding
The documentation for mb_strcut states:

    mb_strcut(
        string $string,
        int $start,
        ?int $length = null,
        ?string $encoding = null
    ): string

    mb_strcut() extracts a substring from a string similarly to mb_substr(),
    but operates on bytes instead of characters. If the cut position happens
    to be between two bytes of a multi-byte character, the cut is performed
    starting from the first byte of that character.

My understanding of the $length parameter for mb_strcut is that it
specified the range of bytes to extract from $string, and that all
characters encoded by those bytes should be included in the returned
string, even if that means the returned string would be longer than
$length bytes. This can happen either if 1) there is more than one way
to encode the same character in $encoding, and one way requires more
bytes than the other, or 2) $encoding uses escape sequences.

However, discussion with users of mb_strcut indicates that many of them
interpret $length as the maximum length of the *returned* string.
This is also the historical behavior of the function.

Hence, there is no need to modify the behavior of mb_strcut and then
remove XFAIL from these test cases afterwards. We can keep the current
behavior.
2023-04-02 13:52:14 +02:00

98 lines
2.7 KiB
PHP

--TEST--
Test output of mb_strcut for text encodings which use escape sequences
--EXTENSIONS--
mbstring
--FILE--
<?php
$encodings = [
'JIS',
'ISO-2022-JP',
'ISO-2022-JP-2004',
];
$input = '宛如繁星般宛如皎月般';
$bytes_length = 15;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = '星のように月のように';
$bytes_length = 20;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = 'あaいb';
$bytes_length = 10;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
$bytes_length = 10;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
$input = '???';
$bytes_length = 2;
foreach($encodings as $encoding) {
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
echo $encoding.': '.$reconverted_str.PHP_EOL;
}
echo PHP_EOL;
foreach($encodings as $encoding) {
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
}
?>
--EXPECTF--
JIS: 宛如繁星
ISO-2022-JP: 宛如繁星
ISO-2022-JP-2004: 宛如繁星
JIS: 星のように月の
ISO-2022-JP: 星のように月の
ISO-2022-JP-2004: 星のように月
JIS: あa
ISO-2022-JP: あa
ISO-2022-JP-2004: あa
JIS: AAAAAAAAAA
ISO-2022-JP: AAAAAAAAAA
ISO-2022-JP-2004: AAAAAAAAAA
JIS: ??
ISO-2022-JP: ??
ISO-2022-JP-2004: ??
string(2) "??"
string(2) "??"
string(2) "??"