mirror of
https://github.com/php/php-src.git
synced 2026-04-25 08:58:28 +02:00
6fc8d014df
Previously, mbstring used the same logic for encoding validation as for encoding conversion. However, there are cases where we want to use different logic for validation and conversion. For example, if a string ends up with missing input required by the encoding, or if a character is input that is invalid as an encoding but can be converted, the conversion should succeed and the validation should fail. To achieve this, a function pointer mb_check_fn has been added to struct mbfl_encoding to implement the logic used for validation. Also, added implementation of validation logic for UTF-7, UTF7-IMAP, ISO-2022-JP and JIS.
156 lines
2.7 KiB
PHP
156 lines
2.7 KiB
PHP
--TEST--
|
|
GH-10648 (mb_check_encoding() returns true for incorrect but interpretable ISO-2022-JP byte sequences)
|
|
--EXTENSIONS--
|
|
mbstring
|
|
--FILE--
|
|
<?php
|
|
|
|
$testcases = [
|
|
'ISO-2022-JP bytes' => '1b244224221b2842', // 'あ' in ISO-2022-JP
|
|
'ISO-2022-JP bytes without escape sequence' => '1b24422422', // 'ア' in JIS
|
|
'JIS X 0201 7bit kana with escape sequence' => '1b2849311b2842', // 'ア' in JIS
|
|
'JIS X 0201 7bit kana with SO/SI' => '0e310f', // 'ア' in JIS
|
|
'JIS X 0201 8bit kana' => 'b1', // 'ア' in JIS
|
|
'JIS X 0201 7bit kana with SO and ESC' => '0e311b2842', // 'ア' in JIS
|
|
'JIS X 0201 7bit kana with ESC and SI' => '1b2849310f', // 'ア' in JIS
|
|
'JIS X 0208 character' => '1b244242641b2842', // '鯛' in JIS and ISO-2022-JP, included in JIS X 0208
|
|
'JIS X 0212 character' => '1b2428446a591b2842', // '鮋' in JIS, included in JIS X 0212
|
|
'JIS X 0213 character' => '1b2428507d4c1b2842', // '𩸽' in ISO-2022-JP-2004, included in JIS X 0213
|
|
'JIS C 6220-1969 ESC ( H' => '1b284a1b2848', // an escape sequence transitioning to ASCII
|
|
'SO/SI when not in ASCII mode' => '1b284a0e0f1b2842', // an escape sequence transitioning to ASCII
|
|
];
|
|
|
|
foreach ($testcases as $title => $case) {
|
|
echo $title . PHP_EOL;
|
|
echo 'JIS:' . PHP_EOL;
|
|
var_dump(mb_check_encoding(hex2bin($case), 'JIS'));
|
|
echo mb_convert_encoding(hex2bin($case), 'UTF-8', 'JIS'). PHP_EOL;
|
|
var_dump(mb_get_info('illegal_chars'));
|
|
echo 'ISO-2022-JP:' . PHP_EOL;
|
|
var_dump(mb_check_encoding(hex2bin($case), 'ISO-2022-JP'));
|
|
echo mb_convert_encoding(hex2bin($case), 'UTF-8', 'ISO-2022-JP'). PHP_EOL;
|
|
var_dump(mb_get_info('illegal_chars'));
|
|
echo PHP_EOL;
|
|
}
|
|
?>
|
|
--EXPECT--
|
|
ISO-2022-JP bytes
|
|
JIS:
|
|
bool(true)
|
|
あ
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(true)
|
|
あ
|
|
int(0)
|
|
|
|
ISO-2022-JP bytes without escape sequence
|
|
JIS:
|
|
bool(false)
|
|
あ
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
あ
|
|
int(0)
|
|
|
|
JIS X 0201 7bit kana with escape sequence
|
|
JIS:
|
|
bool(true)
|
|
ア
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
|
|
JIS X 0201 7bit kana with SO/SI
|
|
JIS:
|
|
bool(true)
|
|
ア
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
|
|
JIS X 0201 8bit kana
|
|
JIS:
|
|
bool(true)
|
|
ア
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
|
|
JIS X 0201 7bit kana with SO and ESC
|
|
JIS:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
|
|
JIS X 0201 7bit kana with ESC and SI
|
|
JIS:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
ア
|
|
int(0)
|
|
|
|
JIS X 0208 character
|
|
JIS:
|
|
bool(true)
|
|
鯛
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(true)
|
|
鯛
|
|
int(0)
|
|
|
|
JIS X 0212 character
|
|
JIS:
|
|
bool(true)
|
|
鮋
|
|
int(0)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
鮋
|
|
int(0)
|
|
|
|
JIS X 0213 character
|
|
JIS:
|
|
bool(false)
|
|
?$(P}L
|
|
int(1)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
?$(P}L
|
|
int(2)
|
|
|
|
JIS C 6220-1969 ESC ( H
|
|
JIS:
|
|
bool(true)
|
|
|
|
int(2)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
|
|
int(2)
|
|
|
|
SO/SI when not in ASCII mode
|
|
JIS:
|
|
bool(false)
|
|
|
|
int(2)
|
|
ISO-2022-JP:
|
|
bool(false)
|
|
|
|
int(2)
|