1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Fix GH-15824 mb_detect_encoding() invalid "UTF8" (#15829)

I fixed from strcasecmp to strncasecmp.
However, strncasecmp is specify size to #3 parameter.
Hence, Add check length to mime and aliases.

Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
This commit is contained in:
tekimen
2024-09-11 09:40:35 +09:00
committed by GitHub
parent db545767e5
commit dc5f3b9562
2 changed files with 39 additions and 2 deletions

View File

@@ -349,7 +349,7 @@ const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
/* search MIME charset name */
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
if ((*encoding)->mime_name) {
if (strcasecmp((*encoding)->mime_name, name) == 0) {
if (strncasecmp((*encoding)->mime_name, name, name_len) == 0 && (*encoding)->mime_name[name_len] == '\0') {
return *encoding;
}
}
@@ -359,7 +359,7 @@ const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
if ((*encoding)->aliases) {
for (const char **alias = (*encoding)->aliases; *alias; alias++) {
if (strcasecmp(*alias, name) == 0) {
if (strncasecmp(name, *alias, name_len) == 0 && (*alias)[name_len] == '\0') {
return *encoding;
}
}

View File

@@ -0,0 +1,37 @@
--TEST--
GH-15824 (ValueError: mb_detect_encoding(): Argument #2 ($encodings) contains invalid encoding "UTF8")
--EXTENSIONS--
mbstring
--FILE--
<?php
echo "== alias name ==\n";
var_dump(mb_detect_encoding('abc', 'UTF8, ASCII'));
var_dump(mb_detect_encoding('こんにちは', 'UTF8, ASCII'));
var_dump(mb_detect_encoding('こんにちは', 'ASCII, UTF8'));
var_dump(mb_detect_encoding("\xC5", 'US-ASCII, LATIN4'));
var_dump(mb_detect_encoding("\xC5", 'US-ASCII, cyrillic'));
/* 0x9D is not located in CP1254 */
var_dump(mb_detect_encoding("\x9D", 'US-ASCII, CP1254, cyrillic'));
var_dump(mb_detect_encoding("\x9D", 'US-ASCII, CP1254, cyrillic', false));
echo "== mime name ==\n";
var_dump(mb_detect_encoding('abc', 'ANSI_X3.4-1968, ISO-8859-1'));
var_dump(mb_detect_encoding('abc', 'CP50220, ANSI_X3.4-1968'));
/* last comma is not mistake, intentionally */
var_dump(mb_detect_encoding(bin2hex('1b24422422242424262428242a1b2842'), 'CP50220, ANSI_X3.4-1968,', false));
var_dump(mb_detect_encoding('😄', 'US-ASCII, UTF-8-Mobile#KDDI-B, UTF-8'));
?>
--EXPECT--
== alias name ==
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
string(10) "ISO-8859-4"
string(10) "ISO-8859-5"
string(10) "ISO-8859-5"
string(10) "ISO-8859-5"
== mime name ==
string(5) "ASCII"
string(7) "CP50220"
string(7) "CP50220"
string(19) "UTF-8-Mobile#KDDI-B"