1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Fix infinite loop in mb_encode_mimeheader

This commit is contained in:
Alex Dowad
2024-03-18 23:23:28 +02:00
committed by Ben Ramsey
parent 3d9941fd1e
commit 3394efc63e
2 changed files with 61 additions and 27 deletions

View File

@@ -5858,6 +5858,9 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin
unsigned char *in = (unsigned char*)ZSTR_VAL(input);
size_t in_len = ZSTR_LEN(input);
ZEND_ASSERT(outcode->mime_name != NULL);
ZEND_ASSERT(outcode->mime_name[0] != '\0');
if (!in_len) {
return zend_empty_string;
}
@@ -5880,7 +5883,8 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin
unsigned int state = 0;
/* wchar_buf should be big enough that when it is full, we definitely have enough
* wchars to fill an entire line of output */
uint32_t wchar_buf[80];
const size_t wchar_buf_len = 90;
uint32_t wchar_buf[wchar_buf_len];
uint32_t *p, *e;
/* What part of wchar_buf is filled with still-unprocessed data which should not
* be overwritten? */
@@ -5891,7 +5895,7 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin
* spaces), just pass it through unchanged */
bool checking_leading_spaces = true;
while (in_len) {
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, 80, &state);
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, wchar_buf_len, &state);
p = wchar_buf;
e = wchar_buf + out_len;
@@ -5925,9 +5929,9 @@ no_passthrough: ;
* do so all the way to the end of the string */
while (in_len) {
/* Decode part of the input string, refill wchar_buf */
ZEND_ASSERT(offset < 80);
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state);
ZEND_ASSERT(out_len <= 80 - offset);
ZEND_ASSERT(offset + MBSTRING_MIN_WCHAR_BUFSIZE <= wchar_buf_len);
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, wchar_buf_len - offset, &state);
ZEND_ASSERT(out_len <= wchar_buf_len - offset);
p = wchar_buf;
e = wchar_buf + offset + out_len;
/* ASCII output is broken into space-delimited 'words'
@@ -5948,6 +5952,7 @@ no_passthrough: ;
* If we are already too far along on a line to include Base64/QPrint encoded data
* on the same line (without overrunning max line length), then add a line feed
* right now */
feed_and_mime_encode:
if (mb_convert_buf_len(&buf) - line_start + indent + strlen(outcode->mime_name) > 55) {
MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1);
buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
@@ -5985,7 +5990,13 @@ no_passthrough: ;
if (in_len) {
/* Copy chars which are part of an incomplete 'word' to the beginning
* of wchar_buf and reprocess them on the next iteration */
* of wchar_buf and reprocess them on the next iteration.
* But first make sure that the incomplete 'word' isn't so big that
* there will be no space to add any more decoded wchars in the buffer
* (which could lead to an infinite loop) */
if ((word_start - wchar_buf) < MBSTRING_MIN_WCHAR_BUFSIZE) {
goto feed_and_mime_encode;
}
offset = e - word_start;
if (offset) {
memmove(wchar_buf, word_start, offset * sizeof(uint32_t));
@@ -6027,17 +6038,17 @@ mime_encoding_needed: ;
/* Do we need to refill wchar_buf to make sure we don't run out of wchars
* in the middle of a line? */
if (p == wchar_buf) {
offset = e - p;
if (wchar_buf_len - offset < MBSTRING_MIN_WCHAR_BUFSIZE) {
goto start_new_line;
}
offset = e - p;
memmove(wchar_buf, p, offset * sizeof(uint32_t));
while(true) {
refill_wchar_buf: ;
ZEND_ASSERT(offset < 80);
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state);
ZEND_ASSERT(out_len <= 80 - offset);
ZEND_ASSERT(offset + MBSTRING_MIN_WCHAR_BUFSIZE <= wchar_buf_len);
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, wchar_buf_len - offset, &state);
ZEND_ASSERT(out_len <= wchar_buf_len - offset);
p = wchar_buf;
e = wchar_buf + offset + out_len;
@@ -6112,22 +6123,18 @@ start_new_line: ;
indent = 0; /* Indent argument must only affect the first line */
if (in_len) {
/* We still have more of input string remaining to decode */
if (in_len || p < e) {
/* We still have more input to process */
buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
buf.out = mb_convert_buf_add(buf.out, ' ');
line_start = mb_convert_buf_len(&buf);
/* Copy remaining wchars to beginning of buffer so they will be
* processed on the next iteration of outer 'do' loop */
offset = e - p;
memmove(wchar_buf, p, offset * sizeof(uint32_t));
goto refill_wchar_buf;
} else if (p < e) {
/* Input string is finished, but we still have trailing wchars
* remaining to be processed in wchar_buf */
buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
buf.out = mb_convert_buf_add(buf.out, ' ');
line_start = mb_convert_buf_len(&buf);
if (in_len && (wchar_buf_len - offset >= MBSTRING_MIN_WCHAR_BUFSIZE)) {
/* Copy any remaining wchars to beginning of buffer and refill
* the rest of the buffer */
memmove(wchar_buf, p, offset * sizeof(uint32_t));
goto refill_wchar_buf;
}
goto start_new_line;
} else {
/* We are done! */
@@ -6165,7 +6172,7 @@ PHP_FUNCTION(mb_encode_mimeheader)
charset = php_mb_get_encoding(charset_name, 2);
if (!charset) {
RETURN_THROWS();
} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') {
} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0' || charset == &mbfl_encoding_qprint) {
zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name));
RETURN_THROWS();
}

View File

@@ -115,11 +115,29 @@ var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
// In the general case, matching the old implementation's decision to transfer-encode or not
// perfectly would require allocating potentially unbounded scratch memory (up to the size of
// the input string), but we aim to only use a constant amount of temporarily allocated memory
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
// Regression test for infinite loop which was unintentionally caused when refactoring
var_dump(mb_encode_mimeheader(",9868949,9868978,9869015,9689100,9869121,9869615,9870690,9867116,98558119861183. ", "utf-8", "B"));
var_dump(mb_encode_mimeheader('xx ' . str_repeat("A", 81) . " ", "utf-8", "B"));
// Regression test for problem where MIME encoding loop would not leave enough space in wchar
// buffer for the next iteration, causing an assertion failure
mb_internal_encoding('MacJapanese');
var_dump(mb_encode_mimeheader("ne\xf6\xff\xff\xffs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1", 'CP50220', 'B', "A", 44));
// Regression test for failing assertion caused by the fact that QPrint deliberately generates no
// wchars for CR (0x0D) bytes
try {
mb_internal_encoding('Quoted-Printable');
var_dump(mb_encode_mimeheader("=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=00=00=00=00=01=00=00=00=00=00=00=00850r=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=0050r=08=0DCP850r850r0r", "Quoted-Printable", "B", "", 184));
} catch (\ValueError $e) {
echo $e->getMessage() . \PHP_EOL;
}
echo "Done";
?>
--EXPECT--
--EXPECTF--
string(0) ""
string(21) "=?UTF-8?Q?abc=00abc?="
string(16) "=?UTF-8?B?Pw==?="
@@ -156,5 +174,14 @@ string(75) " 111111111111111111111111111111111111111111111111111111111111111111
string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?="
string(77) "2 !3"
string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?="
string(296) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20!=33?="
string(344) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20!=33?="
string(135) "=?UTF-8?B?LDk4Njg5NDksOTg2ODk3OCw5ODY5MDE1LDk2ODkxMDAsOTg2OTEyMSw5ODY5?=
=?UTF-8?B?NjE1LDk4NzA2OTAsOTg2NzExNiw5ODU1ODExOTg2MTE4My4g?="
string(142) "xx =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB?=
=?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBIA==?="
string(690) "=?ISO-2022-JP?B?bmU/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/cxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MRskQiFEGyhCPxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MQ==?="
Deprecated: mb_encode_mimeheader(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s on line %d
mb_encode_mimeheader(): Argument #2 ($charset) "Quoted-Printable" cannot be used for MIME header encoding
Done