diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index cf4c1c32c59..618fff55362 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -5858,6 +5858,9 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin unsigned char *in = (unsigned char*)ZSTR_VAL(input); size_t in_len = ZSTR_LEN(input); + ZEND_ASSERT(outcode->mime_name != NULL); + ZEND_ASSERT(outcode->mime_name[0] != '\0'); + if (!in_len) { return zend_empty_string; } @@ -5880,7 +5883,8 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin unsigned int state = 0; /* wchar_buf should be big enough that when it is full, we definitely have enough * wchars to fill an entire line of output */ - uint32_t wchar_buf[80]; + const size_t wchar_buf_len = 90; + uint32_t wchar_buf[wchar_buf_len]; uint32_t *p, *e; /* What part of wchar_buf is filled with still-unprocessed data which should not * be overwritten? */ @@ -5891,7 +5895,7 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin * spaces), just pass it through unchanged */ bool checking_leading_spaces = true; while (in_len) { - size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, 80, &state); + size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, wchar_buf_len, &state); p = wchar_buf; e = wchar_buf + out_len; @@ -5925,9 +5929,9 @@ no_passthrough: ; * do so all the way to the end of the string */ while (in_len) { /* Decode part of the input string, refill wchar_buf */ - ZEND_ASSERT(offset < 80); - size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state); - ZEND_ASSERT(out_len <= 80 - offset); + ZEND_ASSERT(offset + MBSTRING_MIN_WCHAR_BUFSIZE <= wchar_buf_len); + size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, wchar_buf_len - offset, &state); + ZEND_ASSERT(out_len <= wchar_buf_len - offset); p = wchar_buf; e = wchar_buf + offset + out_len; /* ASCII output is broken into space-delimited 'words' @@ -5948,6 +5952,7 @@ no_passthrough: ; * If we are already too far along on a line to include Base64/QPrint encoded data * on the same line (without overrunning max line length), then add a line feed * right now */ +feed_and_mime_encode: if (mb_convert_buf_len(&buf) - line_start + indent + strlen(outcode->mime_name) > 55) { MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1); buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); @@ -5985,7 +5990,13 @@ no_passthrough: ; if (in_len) { /* Copy chars which are part of an incomplete 'word' to the beginning - * of wchar_buf and reprocess them on the next iteration */ + * of wchar_buf and reprocess them on the next iteration. + * But first make sure that the incomplete 'word' isn't so big that + * there will be no space to add any more decoded wchars in the buffer + * (which could lead to an infinite loop) */ + if ((word_start - wchar_buf) < MBSTRING_MIN_WCHAR_BUFSIZE) { + goto feed_and_mime_encode; + } offset = e - word_start; if (offset) { memmove(wchar_buf, word_start, offset * sizeof(uint32_t)); @@ -6027,17 +6038,17 @@ mime_encoding_needed: ; /* Do we need to refill wchar_buf to make sure we don't run out of wchars * in the middle of a line? */ - if (p == wchar_buf) { + offset = e - p; + if (wchar_buf_len - offset < MBSTRING_MIN_WCHAR_BUFSIZE) { goto start_new_line; } - offset = e - p; memmove(wchar_buf, p, offset * sizeof(uint32_t)); while(true) { refill_wchar_buf: ; - ZEND_ASSERT(offset < 80); - size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state); - ZEND_ASSERT(out_len <= 80 - offset); + ZEND_ASSERT(offset + MBSTRING_MIN_WCHAR_BUFSIZE <= wchar_buf_len); + size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, wchar_buf_len - offset, &state); + ZEND_ASSERT(out_len <= wchar_buf_len - offset); p = wchar_buf; e = wchar_buf + offset + out_len; @@ -6112,22 +6123,18 @@ start_new_line: ; indent = 0; /* Indent argument must only affect the first line */ - if (in_len) { - /* We still have more of input string remaining to decode */ + if (in_len || p < e) { + /* We still have more input to process */ buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); buf.out = mb_convert_buf_add(buf.out, ' '); line_start = mb_convert_buf_len(&buf); - /* Copy remaining wchars to beginning of buffer so they will be - * processed on the next iteration of outer 'do' loop */ offset = e - p; - memmove(wchar_buf, p, offset * sizeof(uint32_t)); - goto refill_wchar_buf; - } else if (p < e) { - /* Input string is finished, but we still have trailing wchars - * remaining to be processed in wchar_buf */ - buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); - buf.out = mb_convert_buf_add(buf.out, ' '); - line_start = mb_convert_buf_len(&buf); + if (in_len && (wchar_buf_len - offset >= MBSTRING_MIN_WCHAR_BUFSIZE)) { + /* Copy any remaining wchars to beginning of buffer and refill + * the rest of the buffer */ + memmove(wchar_buf, p, offset * sizeof(uint32_t)); + goto refill_wchar_buf; + } goto start_new_line; } else { /* We are done! */ @@ -6165,7 +6172,7 @@ PHP_FUNCTION(mb_encode_mimeheader) charset = php_mb_get_encoding(charset_name, 2); if (!charset) { RETURN_THROWS(); - } else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') { + } else if (charset->mime_name == NULL || charset->mime_name[0] == '\0' || charset == &mbfl_encoding_qprint) { zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name)); RETURN_THROWS(); } diff --git a/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt b/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt index 7bf05b43ae3..d9a3407bf3f 100644 --- a/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt +++ b/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt @@ -115,11 +115,29 @@ var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ // In the general case, matching the old implementation's decision to transfer-encode or not // perfectly would require allocating potentially unbounded scratch memory (up to the size of // the input string), but we aim to only use a constant amount of temporarily allocated memory -var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", "")); +var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", "")); + +// Regression test for infinite loop which was unintentionally caused when refactoring +var_dump(mb_encode_mimeheader(",9868949,9868978,9869015,9689100,9869121,9869615,9870690,9867116,98558119861183. ", "utf-8", "B")); +var_dump(mb_encode_mimeheader('xx ' . str_repeat("A", 81) . " ", "utf-8", "B")); + +// Regression test for problem where MIME encoding loop would not leave enough space in wchar +// buffer for the next iteration, causing an assertion failure +mb_internal_encoding('MacJapanese'); +var_dump(mb_encode_mimeheader("ne\xf6\xff\xff\xffs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1", 'CP50220', 'B', "A", 44)); + +// Regression test for failing assertion caused by the fact that QPrint deliberately generates no +// wchars for CR (0x0D) bytes +try { + mb_internal_encoding('Quoted-Printable'); + var_dump(mb_encode_mimeheader("=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=00=00=00=00=01=00=00=00=00=00=00=00850r=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=0050r=08=0DCP850r850r0r", "Quoted-Printable", "B", "", 184)); +} catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; +} echo "Done"; ?> ---EXPECT-- +--EXPECTF-- string(0) "" string(21) "=?UTF-8?Q?abc=00abc?=" string(16) "=?UTF-8?B?Pw==?=" @@ -156,5 +174,14 @@ string(75) " 111111111111111111111111111111111111111111111111111111111111111111 string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?=" string(77) "2 !3" string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?=" -string(296) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20!=33?=" +string(344) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20!=33?=" +string(135) "=?UTF-8?B?LDk4Njg5NDksOTg2ODk3OCw5ODY5MDE1LDk2ODkxMDAsOTg2OTEyMSw5ODY5?= + =?UTF-8?B?NjE1LDk4NzA2OTAsOTg2NzExNiw5ODU1ODExOTg2MTE4My4g?=" +string(142) "xx =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB?= + =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBIA==?=" +string(690) "=?ISO-2022-JP?B?bmU/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/cxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MRskQiFEGyhCPxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MQ==?=" + + +Deprecated: mb_encode_mimeheader(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s on line %d +mb_encode_mimeheader(): Argument #2 ($charset) "Quoted-Printable" cannot be used for MIME header encoding Done