1
0
mirror of https://github.com/php/php-src.git synced 2026-04-25 08:58:28 +02:00
Files
pakutoma 6fc8d014df Fix phpGH-10648: add check function pointer into mbfl_encoding
Previously, mbstring used the same logic for encoding validation as for
encoding conversion.

However, there are cases where we want to use different logic for validation
and conversion. For example, if a string ends up with missing input
required by the encoding, or if a character is input that is invalid
as an encoding but can be converted, the conversion should succeed and
the validation should fail.

To achieve this, a function pointer mb_check_fn has been added to
struct mbfl_encoding to implement the logic used for validation.
Also, added implementation of validation logic for UTF-7, UTF7-IMAP,
ISO-2022-JP and JIS.
2023-03-24 20:34:22 +02:00

156 lines
2.7 KiB
PHP

--TEST--
GH-10648 (mb_check_encoding() returns true for incorrect but interpretable ISO-2022-JP byte sequences)
--EXTENSIONS--
mbstring
--FILE--
<?php
$testcases = [
'ISO-2022-JP bytes' => '1b244224221b2842', // 'あ' in ISO-2022-JP
'ISO-2022-JP bytes without escape sequence' => '1b24422422', // 'ア' in JIS
'JIS X 0201 7bit kana with escape sequence' => '1b2849311b2842', // 'ア' in JIS
'JIS X 0201 7bit kana with SO/SI' => '0e310f', // 'ア' in JIS
'JIS X 0201 8bit kana' => 'b1', // 'ア' in JIS
'JIS X 0201 7bit kana with SO and ESC' => '0e311b2842', // 'ア' in JIS
'JIS X 0201 7bit kana with ESC and SI' => '1b2849310f', // 'ア' in JIS
'JIS X 0208 character' => '1b244242641b2842', // '鯛' in JIS and ISO-2022-JP, included in JIS X 0208
'JIS X 0212 character' => '1b2428446a591b2842', // '鮋' in JIS, included in JIS X 0212
'JIS X 0213 character' => '1b2428507d4c1b2842', // '𩸽' in ISO-2022-JP-2004, included in JIS X 0213
'JIS C 6220-1969 ESC ( H' => '1b284a1b2848', // an escape sequence transitioning to ASCII
'SO/SI when not in ASCII mode' => '1b284a0e0f1b2842', // an escape sequence transitioning to ASCII
];
foreach ($testcases as $title => $case) {
echo $title . PHP_EOL;
echo 'JIS:' . PHP_EOL;
var_dump(mb_check_encoding(hex2bin($case), 'JIS'));
echo mb_convert_encoding(hex2bin($case), 'UTF-8', 'JIS'). PHP_EOL;
var_dump(mb_get_info('illegal_chars'));
echo 'ISO-2022-JP:' . PHP_EOL;
var_dump(mb_check_encoding(hex2bin($case), 'ISO-2022-JP'));
echo mb_convert_encoding(hex2bin($case), 'UTF-8', 'ISO-2022-JP'). PHP_EOL;
var_dump(mb_get_info('illegal_chars'));
echo PHP_EOL;
}
?>
--EXPECT--
ISO-2022-JP bytes
JIS:
bool(true)
int(0)
ISO-2022-JP:
bool(true)
int(0)
ISO-2022-JP bytes without escape sequence
JIS:
bool(false)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0201 7bit kana with escape sequence
JIS:
bool(true)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0201 7bit kana with SO/SI
JIS:
bool(true)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0201 8bit kana
JIS:
bool(true)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0201 7bit kana with SO and ESC
JIS:
bool(false)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0201 7bit kana with ESC and SI
JIS:
bool(false)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0208 character
JIS:
bool(true)
int(0)
ISO-2022-JP:
bool(true)
int(0)
JIS X 0212 character
JIS:
bool(true)
int(0)
ISO-2022-JP:
bool(false)
int(0)
JIS X 0213 character
JIS:
bool(false)
?$(P}L
int(1)
ISO-2022-JP:
bool(false)
?$(P}L
int(2)
JIS C 6220-1969 ESC ( H
JIS:
bool(true)
int(2)
ISO-2022-JP:
bool(false)
int(2)
SO/SI when not in ASCII mode
JIS:
bool(false)
int(2)
ISO-2022-JP:
bool(false)
int(2)