diff --git a/ext/mbstring/libmbfl/filters/mbfilter_base64.c b/ext/mbstring/libmbfl/filters/mbfilter_base64.c index ede3eef18ce..f53ea4d9ae6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_base64.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_base64.c @@ -99,15 +99,13 @@ int mbfl_filt_conv_base64enc(int c, mbfl_convert_filter *filter) filter->cache |= (c & 0xff) << 8; } else { filter->status &= ~0xff; - if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) { - n = (filter->status & 0xff00) >> 8; - if (n > 72) { - CK((*filter->output_function)(0x0d, filter->data)); /* CR */ - CK((*filter->output_function)(0x0a, filter->data)); /* LF */ - filter->status &= ~0xff00; - } - filter->status += 0x400; + n = (filter->status & 0xff00) >> 8; + if (n > 72) { + CK((*filter->output_function)(0x0d, filter->data)); /* CR */ + CK((*filter->output_function)(0x0a, filter->data)); /* LF */ + filter->status &= ~0xff00; } + filter->status += 0x400; n = filter->cache | (c & 0xff); CK((*filter->output_function)(mbfl_base64_table[(n >> 18) & 0x3f], filter->data)); CK((*filter->output_function)(mbfl_base64_table[(n >> 12) & 0x3f], filter->data)); @@ -129,11 +127,9 @@ int mbfl_filt_conv_base64enc_flush(mbfl_convert_filter *filter) filter->cache = 0; /* flush fragments */ if (status >= 1) { - if ((filter->status & MBFL_BASE64_STS_MIME_HEADER) == 0) { - if (len > 72){ - CK((*filter->output_function)(0x0d, filter->data)); /* CR */ - CK((*filter->output_function)(0x0a, filter->data)); /* LF */ - } + if (len > 72){ + CK((*filter->output_function)(0x0d, filter->data)); /* CR */ + CK((*filter->output_function)(0x0a, filter->data)); /* LF */ } CK((*filter->output_function)(mbfl_base64_table[(cache >> 18) & 0x3f], filter->data)); CK((*filter->output_function)(mbfl_base64_table[(cache >> 12) & 0x3f], filter->data)); diff --git a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c index 5fde30ee809..1ff2f3c2161 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c @@ -29,7 +29,6 @@ #include "mbfilter.h" #include "mbfilter_qprint.h" -#include "unicode_prop.h" static size_t mb_qprint_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); static void mb_wchar_to_qprint(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); @@ -96,28 +95,25 @@ int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter) break; } - if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) { - if (s == 0x0a || (s == 0x0d && c != 0x0a)) { /* line feed */ - CK((*filter->output_function)(0x0d, filter->data)); /* CR */ - CK((*filter->output_function)(0x0a, filter->data)); /* LF */ - filter->status &= ~0xff00; - break; - } else if (s == 0x0d) { - break; - } + if (s == '\n' || (s == '\r' && c != '\n')) { /* line feed */ + CK((*filter->output_function)('\r', filter->data)); + CK((*filter->output_function)('\n', filter->data)); + filter->status &= ~0xff00; + break; + } else if (s == 0x0d) { + break; } - if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0 && n >= 72) { /* soft line feed */ - CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ - CK((*filter->output_function)(0x0d, filter->data)); /* CR */ - CK((*filter->output_function)(0x0a, filter->data)); /* LF */ + if (n >= 72) { /* soft line feed */ + CK((*filter->output_function)('=', filter->data)); + CK((*filter->output_function)('\r', filter->data)); + CK((*filter->output_function)('\n', filter->data)); filter->status &= ~0xff00; } - if (s <= 0 || s >= 0x80 || s == 0x3d /* not ASCII or '=' */ - || ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) && mime_char_needs_qencode[s])) { + if (s <= 0 || s >= 0x80 || s == '=') { /* not ASCII or '=' */ /* hex-octet */ - CK((*filter->output_function)(0x3d, filter->data)); /* '=' */ + CK((*filter->output_function)('=', filter->data)); n = (s >> 4) & 0xf; if (n < 10) { n += 48; /* '0' */ @@ -132,14 +128,10 @@ int mbfl_filt_conv_qprintenc(int c, mbfl_convert_filter *filter) n += 55; } CK((*filter->output_function)(n, filter->data)); - if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) { - filter->status += 0x300; - } + filter->status += 0x300; } else { CK((*filter->output_function)(s, filter->data)); - if ((filter->status & MBFL_QPRINT_STS_MIME_HEADER) == 0) { - filter->status += 0x100; - } + filter->status += 0x100; } break; } diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index cbf487b1a5b..02af1cde457 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -523,312 +523,3 @@ mbfl_strcut( return result; } - - -/* - * MIME header encode - */ -struct mime_header_encoder_data { - mbfl_convert_filter *conv1_filter; - mbfl_convert_filter *block_filter; - mbfl_convert_filter *conv2_filter; - mbfl_convert_filter *conv2_filter_backup; - mbfl_convert_filter *encod_filter; - mbfl_convert_filter *encod_filter_backup; - mbfl_memory_device outdev; - mbfl_memory_device tmpdev; - int status1; - int status2; - size_t prevpos; - size_t linehead; - size_t firstindent; - int encnamelen; - int lwsplen; - char encname[128]; - char lwsp[16]; -}; - -static int -mime_header_encoder_block_collector(int c, void *data) -{ - size_t n; - struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data; - - switch (pe->status2) { - case 1: /* encoded word */ - pe->prevpos = pe->outdev.pos; - mbfl_convert_filter_copy(pe->conv2_filter, pe->conv2_filter_backup); - mbfl_convert_filter_copy(pe->encod_filter, pe->encod_filter_backup); - (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); - (*pe->conv2_filter->filter_flush)(pe->conv2_filter); - (*pe->encod_filter->filter_flush)(pe->encod_filter); - n = pe->outdev.pos - pe->linehead + pe->firstindent; - pe->outdev.pos = pe->prevpos; - mbfl_convert_filter_copy(pe->conv2_filter_backup, pe->conv2_filter); - mbfl_convert_filter_copy(pe->encod_filter_backup, pe->encod_filter); - if (n >= 74) { - (*pe->conv2_filter->filter_flush)(pe->conv2_filter); - (*pe->encod_filter->filter_flush)(pe->encod_filter); - mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */ - mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); - pe->linehead = pe->outdev.pos; - pe->firstindent = 0; - mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen); - c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); - } else { - c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); - } - break; - - default: - mbfl_memory_device_strncat(&pe->outdev, pe->encname, pe->encnamelen); - c = (*pe->conv2_filter->filter_function)(c, pe->conv2_filter); - pe->status2 = 1; - break; - } - - return 0; -} - -static int -mime_header_encoder_collector(int c, void *data) -{ - static int qp_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00 */ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 */ - 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0, 0, 1, 0, 1, /* 0x10 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x50 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 0x70 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x80 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x90 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xA0 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xB0 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xD0 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xE0 */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xF0 */ - }; - - size_t n; - struct mime_header_encoder_data *pe = (struct mime_header_encoder_data *)data; - - switch (pe->status1) { - case 11: /* encoded word */ - (*pe->block_filter->filter_function)(c, pe->block_filter); - break; - - default: /* ASCII */ - if (c <= 0x00ff && !qp_table[(c & 0xff)]) { /* ordinary characters */ - mbfl_memory_device_output(c, &pe->tmpdev); - pe->status1 = 1; - } else if (pe->status1 == 0 && c == 0x20) { /* repeat SPACE */ - mbfl_memory_device_output(c, &pe->tmpdev); - } else { - if (pe->tmpdev.pos < 74 && c == 0x20) { - n = pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent; - if (n > 74) { - mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */ - pe->linehead = pe->outdev.pos; - pe->firstindent = 0; - } else if (pe->outdev.pos > 0) { - mbfl_memory_device_output(0x20, &pe->outdev); - } - mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev); - mbfl_memory_device_reset(&pe->tmpdev); - pe->status1 = 0; - } else { - n = pe->outdev.pos - pe->linehead + pe->encnamelen + pe->firstindent; - if (n > 60) { - mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); /* LWSP */ - pe->linehead = pe->outdev.pos; - pe->firstindent = 0; - } else if (pe->outdev.pos > 0) { - mbfl_memory_device_output(0x20, &pe->outdev); - } - mbfl_convert_filter_devcat(pe->block_filter, &pe->tmpdev); - mbfl_memory_device_reset(&pe->tmpdev); - (*pe->block_filter->filter_function)(c, pe->block_filter); - pe->status1 = 11; - } - } - break; - } - - return 0; -} - -mbfl_string * -mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result) -{ - if (pe->status1 >= 10) { - (*pe->conv2_filter->filter_flush)(pe->conv2_filter); - (*pe->encod_filter->filter_flush)(pe->encod_filter); - mbfl_memory_device_strncat(&pe->outdev, "\x3f\x3d", 2); /* ?= */ - } else if (pe->tmpdev.pos > 0) { - if (pe->outdev.pos > 0) { - if ((pe->outdev.pos - pe->linehead + pe->tmpdev.pos + pe->firstindent) > 74) { - mbfl_memory_device_strncat(&pe->outdev, pe->lwsp, pe->lwsplen); - } else { - mbfl_memory_device_output(0x20, &pe->outdev); - } - } - mbfl_memory_device_devcat(&pe->outdev, &pe->tmpdev); - } - mbfl_memory_device_reset(&pe->tmpdev); - pe->prevpos = 0; - pe->linehead = 0; - pe->status1 = 0; - pe->status2 = 0; - - return mbfl_memory_device_result(&pe->outdev, result); -} - -struct mime_header_encoder_data* -mime_header_encoder_new( - const mbfl_encoding *incode, - const mbfl_encoding *outcode, - const mbfl_encoding *transenc) -{ - size_t n; - const char *s; - struct mime_header_encoder_data *pe; - - /* get output encoding and check MIME charset name */ - if (outcode->mime_name == NULL || outcode->mime_name[0] == '\0') { - return NULL; - } - - pe = emalloc(sizeof(struct mime_header_encoder_data)); - mbfl_memory_device_init(&pe->outdev, 0, 0); - mbfl_memory_device_init(&pe->tmpdev, 0, 0); - pe->prevpos = 0; - pe->linehead = 0; - pe->firstindent = 0; - pe->status1 = 0; - pe->status2 = 0; - - /* make the encoding description string exp. "=?ISO-2022-JP?B?" */ - n = 0; - pe->encname[n++] = 0x3d; - pe->encname[n++] = 0x3f; - s = outcode->mime_name; - while (*s) { - pe->encname[n++] = *s++; - } - pe->encname[n++] = 0x3f; - if (transenc->no_encoding == mbfl_no_encoding_qprint) { - pe->encname[n++] = 0x51; - } else { - pe->encname[n++] = 0x42; - transenc = &mbfl_encoding_base64; - } - pe->encname[n++] = 0x3f; - pe->encname[n] = '\0'; - pe->encnamelen = n; - - n = 0; - pe->lwsp[n++] = 0x0d; - pe->lwsp[n++] = 0x0a; - pe->lwsp[n++] = 0x20; - pe->lwsp[n] = '\0'; - pe->lwsplen = n; - - /* transfer encode filter */ - pe->encod_filter = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev)); - pe->encod_filter_backup = mbfl_convert_filter_new(outcode, transenc, mbfl_memory_device_output, 0, &(pe->outdev)); - - /* Output code filter */ - pe->conv2_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter); - pe->conv2_filter_backup = mbfl_convert_filter_new(&mbfl_encoding_wchar, outcode, mbfl_filter_output_pipe, 0, pe->encod_filter); - - /* encoded block filter */ - pe->block_filter = mbfl_convert_filter_new(&mbfl_encoding_wchar, &mbfl_encoding_wchar, mime_header_encoder_block_collector, 0, pe); - - /* Input code filter */ - pe->conv1_filter = mbfl_convert_filter_new(incode, &mbfl_encoding_wchar, mime_header_encoder_collector, 0, pe); - - if (pe->encod_filter == NULL || - pe->encod_filter_backup == NULL || - pe->conv2_filter == NULL || - pe->conv2_filter_backup == NULL || - pe->conv1_filter == NULL) { - mime_header_encoder_delete(pe); - return NULL; - } - - if (transenc->no_encoding == mbfl_no_encoding_qprint) { - pe->encod_filter->status |= MBFL_QPRINT_STS_MIME_HEADER; - pe->encod_filter_backup->status |= MBFL_QPRINT_STS_MIME_HEADER; - } else { - pe->encod_filter->status |= MBFL_BASE64_STS_MIME_HEADER; - pe->encod_filter_backup->status |= MBFL_BASE64_STS_MIME_HEADER; - } - - return pe; -} - -void -mime_header_encoder_delete(struct mime_header_encoder_data *pe) -{ - if (pe) { - mbfl_convert_filter_delete(pe->conv1_filter); - mbfl_convert_filter_delete(pe->block_filter); - mbfl_convert_filter_delete(pe->conv2_filter); - mbfl_convert_filter_delete(pe->conv2_filter_backup); - mbfl_convert_filter_delete(pe->encod_filter); - mbfl_convert_filter_delete(pe->encod_filter_backup); - mbfl_memory_device_clear(&pe->outdev); - mbfl_memory_device_clear(&pe->tmpdev); - efree((void*)pe); - } -} - -mbfl_string * -mbfl_mime_header_encode( - mbfl_string *string, - mbfl_string *result, - const mbfl_encoding *outcode, - const mbfl_encoding *encoding, - const char *linefeed, - int indent) -{ - size_t n; - unsigned char *p; - struct mime_header_encoder_data *pe; - - mbfl_string_init(result); - result->encoding = &mbfl_encoding_ascii; - - pe = mime_header_encoder_new(string->encoding, outcode, encoding); - if (pe == NULL) { - return NULL; - } - - if (linefeed != NULL) { - n = 0; - while (*linefeed && n < 8) { - pe->lwsp[n++] = *linefeed++; - } - pe->lwsp[n++] = 0x20; - pe->lwsp[n] = '\0'; - pe->lwsplen = n; - } - if (indent > 0 && indent < 74) { - pe->firstindent = indent; - } - - n = string->len; - p = string->val; - while (n > 0) { - (*pe->conv1_filter->filter_function)(*p++, pe->conv1_filter); - n--; - } - - result = mime_header_encoder_result(pe, result); - mime_header_encoder_delete(pe); - - return result; -} diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index e3678584fa3..5f23c2b98c0 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -168,29 +168,4 @@ static inline int mbfl_is_error(size_t len) { MBFLAPI extern mbfl_string * mbfl_strcut(mbfl_string *string, mbfl_string *result, size_t from, size_t length); -/* - * MIME header encode - */ -struct mime_header_encoder_data; /* forward declaration */ - -MBFLAPI extern struct mime_header_encoder_data * -mime_header_encoder_new( - const mbfl_encoding *incode, - const mbfl_encoding *outcode, - const mbfl_encoding *encoding); - -MBFLAPI extern void -mime_header_encoder_delete(struct mime_header_encoder_data *pe); - -MBFLAPI extern mbfl_string * -mime_header_encoder_result(struct mime_header_encoder_data *pe, mbfl_string *result); - -MBFLAPI extern mbfl_string * -mbfl_mime_header_encode( - mbfl_string *string, mbfl_string *result, - const mbfl_encoding *outcode, - const mbfl_encoding *encoding, - const char *linefeed, - int indent); - #endif /* MBFL_MBFILTER_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h index 32504a3fc3b..c5fa66f6e6e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -44,9 +44,6 @@ /* Marker for an erroneous input byte (or sequence of bytes) */ #define MBFL_BAD_INPUT (-1) -#define MBFL_QPRINT_STS_MIME_HEADER 0x1000000 -#define MBFL_BASE64_STS_MIME_HEADER 0x1000000 - #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2 diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index c20cb7bded4..a8eb2976cac 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -162,17 +162,17 @@ static inline void mb_convert_buf_init(mb_convert_buf *buf, size_t initsize, uin #define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed) \ ZEND_ASSERT(out <= limit); \ if ((limit - out) < (needed)) { \ - size_t oldsize = limit - (unsigned char*)ZSTR_VAL(buf->str); \ + size_t oldsize = limit - (unsigned char*)ZSTR_VAL((buf)->str); \ size_t newsize = oldsize + MAX(oldsize >> 1, needed); \ - zend_string *newstr = erealloc(buf->str, _ZSTR_STRUCT_SIZE(newsize)); \ - out = (unsigned char*)ZSTR_VAL(newstr) + (out - (unsigned char*)ZSTR_VAL(buf->str)); \ + zend_string *newstr = erealloc((buf)->str, _ZSTR_STRUCT_SIZE(newsize)); \ + out = (unsigned char*)ZSTR_VAL(newstr) + (out - (unsigned char*)ZSTR_VAL((buf)->str)); \ limit = (unsigned char*)ZSTR_VAL(newstr) + newsize; \ - buf->str = newstr; \ + (buf)->str = newstr; \ } -#define MB_CONVERT_BUF_STORE(buf, _out, _limit) buf->out = _out; buf->limit = _limit +#define MB_CONVERT_BUF_STORE(buf, _out, _limit) (buf)->out = _out; (buf)->limit = _limit -#define MB_CONVERT_BUF_LOAD(buf, _out, _limit) _out = buf->out; _limit = buf->limit +#define MB_CONVERT_BUF_LOAD(buf, _out, _limit) _out = (buf)->out; _limit = (buf)->limit #define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn) \ MB_CONVERT_BUF_STORE(buf, out, limit); \ @@ -209,6 +209,22 @@ static inline unsigned char* mb_convert_buf_add4(unsigned char *out, char c1, ch return out; } +static inline unsigned char* mb_convert_buf_appends(unsigned char *out, const char *s) +{ + while (*s) { + *out++ = *s++; + } + return out; +} + +static inline unsigned char* mb_convert_buf_appendn(unsigned char *out, const char *s, size_t n) +{ + while (n--) { + *out++ = *s++; + } + return out; +} + static inline zend_string* mb_convert_buf_result_raw(mb_convert_buf *buf) { ZEND_ASSERT(buf->out <= buf->limit); @@ -246,6 +262,24 @@ static inline zend_string* mb_convert_buf_result(mb_convert_buf *buf, const mbfl return ret; } +/* Used if we initialize an `mb_convert_buf` but then discover we don't actually + * want to return `zend_string` */ +static inline void mb_convert_buf_free(mb_convert_buf *buf) +{ + efree(buf->str); +} + +static inline size_t mb_convert_buf_len(mb_convert_buf *buf) +{ + return buf->out - (unsigned char*)ZSTR_VAL(buf->str); +} + +static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len) +{ + buf->out = (unsigned char*)ZSTR_VAL(buf->str) + len; + ZEND_ASSERT(buf->out <= buf->limit); +} + MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name); MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding); MBFLAPI extern const mbfl_encoding **mbfl_get_supported_encodings(void); diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a52cc2a0927..3277284be51 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -46,6 +46,7 @@ #include "libmbfl/filters/mbfilter_utf16.h" #include "libmbfl/filters/mbfilter_singlebyte.h" #include "libmbfl/filters/translit_kana_jisx0201_jisx0208.h" +#include "libmbfl/filters/unicode_prop.h" #include "php_variables.h" #include "php_globals.h" @@ -91,6 +92,8 @@ static bool mb_check_str_encoding(zend_string *str, const mbfl_encoding *encodin static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict); +static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encoding *incode, const mbfl_encoding *outcode, bool base64, char *linefeed, size_t linefeed_len, zend_long indent); + /* See mbfilter_cp5022x.c */ uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, int mode); /* }}} */ @@ -3201,66 +3204,6 @@ PHP_FUNCTION(mb_encoding_aliases) } /* }}} */ -/* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */ -PHP_FUNCTION(mb_encode_mimeheader) -{ - const mbfl_encoding *charset, *transenc; - mbfl_string string, result, *ret; - zend_string *charset_name = NULL; - char *trans_enc_name = NULL, *string_val; - size_t trans_enc_name_len; - char *linefeed = "\r\n"; - size_t linefeed_len; - zend_long indent = 0; - - string.encoding = MBSTRG(current_internal_encoding); - - ZEND_PARSE_PARAMETERS_START(1, 5) - Z_PARAM_STRING(string_val, string.len) - Z_PARAM_OPTIONAL - Z_PARAM_STR(charset_name) - Z_PARAM_STRING(trans_enc_name, trans_enc_name_len) - Z_PARAM_STRING(linefeed, linefeed_len) - Z_PARAM_LONG(indent) - ZEND_PARSE_PARAMETERS_END(); - - string.val = (unsigned char*)string_val; - charset = &mbfl_encoding_pass; - transenc = &mbfl_encoding_base64; - - if (charset_name != NULL) { - charset = php_mb_get_encoding(charset_name, 2); - if (!charset) { - RETURN_THROWS(); - } else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') { - zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name)); - RETURN_THROWS(); - } - } else { - const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); - if (lang != NULL) { - charset = mbfl_no2encoding(lang->mail_charset); - transenc = mbfl_no2encoding(lang->mail_header_encoding); - } - } - - if (trans_enc_name != NULL) { - if (*trans_enc_name == 'B' || *trans_enc_name == 'b') { - transenc = &mbfl_encoding_base64; - } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') { - transenc = &mbfl_encoding_qprint; - } - } - - mbfl_string_init(&result); - ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent); - ZEND_ASSERT(ret != NULL); - // TODO: avoid reallocation ??? - RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ - efree(ret->val); -} -/* }}} */ - static zend_string* jp_kana_convert(zend_string *input, const mbfl_encoding *encoding, unsigned int mode) { /* Each wchar may potentially expand to 2 when we perform kana conversion... @@ -4156,8 +4099,7 @@ PHP_FUNCTION(mb_send_mail) size_t to_len; char *message; size_t message_len; - char *subject; - size_t subject_len; + zend_string *subject; zend_string *extra_cmd = NULL; HashTable *headers_ht = NULL; zend_string *str_headers = NULL; @@ -4169,9 +4111,7 @@ PHP_FUNCTION(mb_send_mail) int cnt_trans_enc:1; } suppressed_hdrs = { 0, 0 }; - char *subject_buf = NULL, *p; - mbfl_string orig_str, conv_str; - mbfl_string *pstr; /* pointer to mbfl string for return value */ + char *p; enum mbfl_no_encoding; const mbfl_encoding *tran_cs, /* transfer text charset */ *head_enc, /* header transfer encoding */ @@ -4181,10 +4121,6 @@ PHP_FUNCTION(mb_send_mail) HashTable ht_headers; zval *s; - /* initialize */ - mbfl_string_init(&orig_str); - mbfl_string_init(&conv_str); - /* character-set, transfer-encoding */ tran_cs = &mbfl_encoding_utf8; head_enc = &mbfl_encoding_base64; @@ -4198,7 +4134,7 @@ PHP_FUNCTION(mb_send_mail) ZEND_PARSE_PARAMETERS_START(3, 5) Z_PARAM_PATH(to, to_len) - Z_PARAM_PATH(subject, subject_len) + Z_PARAM_PATH_STR(subject) Z_PARAM_PATH(message, message_len) Z_PARAM_OPTIONAL Z_PARAM_ARRAY_HT_OR_STR(headers_ht, str_headers) @@ -4310,22 +4246,17 @@ PHP_FUNCTION(mb_send_mail) } /* Subject: */ - orig_str.val = (unsigned char *)subject; - orig_str.len = subject_len; - orig_str.encoding = MBSTRG(current_internal_encoding); - if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) { - orig_str.encoding = mb_guess_encoding((unsigned char*)subject, subject_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + const mbfl_encoding *enc = MBSTRG(current_internal_encoding); + if (enc == &mbfl_encoding_pass) { + enc = mb_guess_encoding((unsigned char*)ZSTR_VAL(subject), ZSTR_LEN(subject), MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); } const char *line_sep = PG(mail_mixed_lf_and_crlf) ? "\n" : CRLF; size_t line_sep_len = strlen(line_sep); - pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, line_sep, strlen("Subject: [PHP-jp nnnnnnnn]") + line_sep_len); - if (pstr != NULL) { - subject_buf = subject = (char *)pstr->val; - } + + subject = mb_mime_header_encode(subject, enc, tran_cs, head_enc == &mbfl_encoding_base64, (char*)line_sep, line_sep_len, strlen("Subject: [PHP-jp nnnnnnnn]") + line_sep_len); /* message body */ const mbfl_encoding *msg_enc = MBSTRG(current_internal_encoding); - if (msg_enc == &mbfl_encoding_pass) { msg_enc = mb_guess_encoding((unsigned char*)message, message_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); } @@ -4401,18 +4332,15 @@ PHP_FUNCTION(mb_send_mail) extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); } - RETVAL_BOOL(!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)); + RETVAL_BOOL(!err && php_mail(to_r, ZSTR_VAL(subject), message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)); if (extra_cmd) { zend_string_release_ex(extra_cmd, 0); } - if (to_r != to) { efree(to_r); } - if (subject_buf) { - efree((void *)subject_buf); - } + zend_string_release(subject); zend_string_free(conv); zend_hash_destroy(&ht_headers); if (str_headers) { @@ -5634,6 +5562,418 @@ static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ } /* }}} */ +static const unsigned char base64_table[] = { + /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */ + 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d, + /* 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', */ + 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a, + /* 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', */ + 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d, + /* 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', */ + 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a, + /* '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' */ + 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00 +}; + +static size_t transfer_encoded_size(mb_convert_buf *tmpbuf, bool base64) +{ + if (base64) { + return ((mb_convert_buf_len(tmpbuf) + 2) / 3) * 4; + } else { + size_t enc_size = 0; + unsigned char *p = (unsigned char*)ZSTR_VAL(tmpbuf->str); + while (p < tmpbuf->out) { + unsigned char c = *p++; + enc_size += (c > 0x7F || c == '=' || mime_char_needs_qencode[c]) ? 3 : 1; + } + return enc_size; + } +} + +static void transfer_encode_mime_bytes(mb_convert_buf *tmpbuf, mb_convert_buf *outbuf, bool base64) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(outbuf, out, limit); + unsigned char *p = (unsigned char*)ZSTR_VAL(tmpbuf->str), *e = tmpbuf->out; + + if (base64) { + MB_CONVERT_BUF_ENSURE(outbuf, out, limit, ((e - p) + 2) / 3 * 4); + while ((e - p) >= 3) { + unsigned char a = *p++; + unsigned char b = *p++; + unsigned char c = *p++; + uint32_t bits = (a << 16) | (b << 8) | c; + out = mb_convert_buf_add4(out, + base64_table[(bits >> 18) & 0x3F], + base64_table[(bits >> 12) & 0x3F], + base64_table[(bits >> 6) & 0x3F], + base64_table[bits & 0x3F]); + } + if (p != e) { + if ((e - p) == 1) { + uint32_t bits = *p++; + out = mb_convert_buf_add4(out, base64_table[(bits >> 2) & 0x3F], base64_table[(bits & 0x3) << 4], '=', '='); + } else { + unsigned char a = *p++; + unsigned char b = *p++; + uint32_t bits = (a << 8) | b; + out = mb_convert_buf_add4(out, base64_table[(bits >> 10) & 0x3F], base64_table[(bits >> 4) & 0x3F], base64_table[(bits & 0xF) << 2], '='); + } + } + } else { + MB_CONVERT_BUF_ENSURE(outbuf, out, limit, (e - p) * 3); + while (p < e) { + unsigned char c = *p++; + if (c > 0x7F || c == '=' || mime_char_needs_qencode[c]) { + out = mb_convert_buf_add3(out, '=', "0123456789ABCDEF"[(c >> 4) & 0xF], "0123456789ABCDEF"[c & 0xF]); + } else { + out = mb_convert_buf_add(out, c); + } + } + } + + mb_convert_buf_reset(tmpbuf, 0); + MB_CONVERT_BUF_STORE(outbuf, out, limit); +} + +static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encoding *incode, const mbfl_encoding *outcode, bool base64, char *linefeed, size_t linefeed_len, zend_long indent) +{ + unsigned char *in = (unsigned char*)ZSTR_VAL(input); + size_t in_len = ZSTR_LEN(input); + + if (!in_len) { + return zend_empty_string; + } + + if (indent < 0 || indent >= 74) { + indent = 0; + } + + if (linefeed_len > 8) { + linefeed_len = 8; + } + /* Maintain legacy behavior as regards embedded NUL (zero) bytes in linefeed string */ + for (size_t i = 0; i < linefeed_len; i++) { + if (linefeed[i] == '\0') { + linefeed_len = i; + break; + } + } + + unsigned int state = 0; + /* wchar_buf should be big enough that when it is full, we definitely have enough + * wchars to fill an entire line of output */ + uint32_t wchar_buf[80]; + uint32_t *p, *e; + /* What part of wchar_buf is filled with still-unprocessed data which should not + * be overwritten? */ + unsigned int offset = 0; + size_t line_start = 0; + + /* If the entire input string is ASCII with no spaces (except possibly leading + * spaces), just pass it through unchanged */ + bool checking_leading_spaces = true; + while (in_len) { + size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, 80, &state); + p = wchar_buf; + e = wchar_buf + out_len; + + while (p < e) { + uint32_t w = *p++; + if (checking_leading_spaces) { + if (w == ' ') { + continue; + } else { + checking_leading_spaces = false; + } + } + if (w < 0x21 || w > 0x7E || w == '=' || w == '?' || w == '_') { + /* We cannot simply pass input string through unchanged; start again */ + in = (unsigned char*)ZSTR_VAL(input); + in_len = ZSTR_LEN(input); + goto no_passthrough; + } + } + } + + return zend_string_copy(input); /* This just increments refcount */ + +no_passthrough: ; + + mb_convert_buf buf; + mb_convert_buf_init(&buf, in_len, '?', MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR); + + /* Encode some prefix of the input string as plain ASCII if possible + * If we find it necessary to switch to Base64/QPrint encoding, we will + * do so all the way to the end of the string */ + while (in_len) { + /* Decode part of the input string, refill wchar_buf */ + ZEND_ASSERT(offset < 80); + size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state); + ZEND_ASSERT(out_len <= 80 - offset); + p = wchar_buf; + e = wchar_buf + offset + out_len; + /* ASCII output is broken into space-delimited 'words' + * If we find a non-ASCII character in the middle of a word, we will + * transfer-encode the entire word */ + uint32_t *word_start = p; + + /* Don't consider adding line feed for spaces at the beginning of a word */ + while (p < e && *p == ' ' && (p - word_start) <= 74) { + p++; + } + + while (p < e) { + uint32_t w = *p++; + + if (w < 0x20 || w > 0x7E || w == '?' || w == '=' || w == '_' || (w == ' ' && (p - word_start) > 74)) { + /* Non-ASCII character (or line too long); switch to Base64/QPrint encoding + * If we are already too far along on a line to include Base64/QPrint encoded data + * on the same line (without overrunning max line length), then add a line feed + * right now */ + if (mb_convert_buf_len(&buf) - line_start + indent + strlen(outcode->mime_name) > 55) { + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1); + buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); + buf.out = mb_convert_buf_add(buf.out, ' '); + indent = 0; + line_start = mb_convert_buf_len(&buf); + } else if (mb_convert_buf_len(&buf) > 0) { + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, 1); + buf.out = mb_convert_buf_add(buf.out, ' '); + } + p = word_start; /* Back up to where MIME encoding of input chars should start */ + goto mime_encoding_needed; + } else if (w == ' ') { + /* When we see a space, check whether we should insert a line break */ + if (mb_convert_buf_len(&buf) - line_start + (p - word_start) + indent > 75) { + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1); + buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); + buf.out = mb_convert_buf_add(buf.out, ' '); + indent = 0; + line_start = mb_convert_buf_len(&buf); + } else if (mb_convert_buf_len(&buf) > 0) { + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + 1); + buf.out = mb_convert_buf_add(buf.out, ' '); + } + /* Output one (space-delimited) word as plain ASCII */ + while (word_start < p-1) { + buf.out = mb_convert_buf_add(buf.out, *word_start++ & 0xFF); + } + word_start++; + while (p < e && *p == ' ') { + p++; + } + } + } + + if (in_len) { + /* Copy chars which are part of an incomplete 'word' to the beginning + * of wchar_buf and reprocess them on the next iteration */ + offset = e - word_start; + if (offset) { + memmove(wchar_buf, word_start, offset * sizeof(uint32_t)); + } + } else { + /* We have reached the end of the input string while still in 'ASCII mode'; + * process any trailing ASCII chars which were not followed by a space */ + if (word_start < e && mb_convert_buf_len(&buf) > 0) { + /* The whole input string was not just one big ASCII 'word' with no spaces + * consider adding a line feed if necessary to prevent output lines from + * being too long */ + if (mb_convert_buf_len(&buf) - line_start + (p - word_start) + indent > 74) { + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1); + buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); + buf.out = mb_convert_buf_add(buf.out, ' '); + } else { + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + 1); + buf.out = mb_convert_buf_add(buf.out, ' '); + } + } + while (word_start < e) { + buf.out = mb_convert_buf_add(buf.out, *word_start++ & 0xFF); + } + } + } + + /* Ensure output string is marked as valid UTF-8 (ASCII strings are always 'valid UTF-8') */ + return mb_convert_buf_result(&buf, &mbfl_encoding_utf8); + +mime_encoding_needed: ; + + /* We will generate the output line by line, first converting wchars to bytes + * in the requested output encoding, then transfer-encoding those bytes as + * Base64 or QPrint + * 'tmpbuf' will receive the bytes which need to be transfer-encoded before + * sending them to 'buf' */ + mb_convert_buf tmpbuf; + mb_convert_buf_init(&tmpbuf, in_len, '?', MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR); + + /* Do we need to refill wchar_buf to make sure we don't run out of wchars + * in the middle of a line? */ + if (p == wchar_buf) { + goto start_new_line; + } + offset = e - p; + memmove(wchar_buf, p, offset * sizeof(uint32_t)); + + while(true) { +refill_wchar_buf: ; + ZEND_ASSERT(offset < 80); + size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state); + ZEND_ASSERT(out_len <= 80 - offset); + p = wchar_buf; + e = wchar_buf + offset + out_len; + +start_new_line: ; + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, strlen(outcode->mime_name) + 5); + buf.out = mb_convert_buf_add2(buf.out, '=', '?'); + buf.out = mb_convert_buf_appends(buf.out, outcode->mime_name); + buf.out = mb_convert_buf_add3(buf.out, '?', base64 ? 'B' : 'Q', '?'); + + /* How many wchars should we try converting to Base64/QPrint-encoded bytes? + * We do something like a 'binary search' to find the greatest number which + * can be included on this line without exceeding max line length */ + unsigned int n = 12; + size_t space_available = 73 - indent - (mb_convert_buf_len(&buf) - line_start); + + while (true) { + ZEND_ASSERT(p < e); + + /* Remember where we were in process of generating output, so we can back + * up if necessary */ + size_t tmppos = mb_convert_buf_len(&tmpbuf); + unsigned int tmpstate = tmpbuf.state; + + /* Try encoding 'n' wchars in output text encoding and sending output + * bytes to 'tmpbuf'. Hopefully this is not too many to fit on the + * current line. */ + n = MIN(n, e - p); + outcode->from_wchar(p, n, &tmpbuf, false); + + /* For some output text encodings, there may be a few ending bytes + * which need to be emitted to output before we break a line. + * Again, remember where we were so we can back up */ + size_t tmppos2 = mb_convert_buf_len(&tmpbuf); + unsigned int tmpstate2 = tmpbuf.state; + outcode->from_wchar(NULL, 0, &tmpbuf, true); + + if (transfer_encoded_size(&tmpbuf, base64) <= space_available || (n == 1 && tmppos == 0)) { + /* If we convert 'n' more wchars on the current line, it will not + * overflow the maximum line length */ + p += n; + + if (p == e) { + /* We are done; we shouldn't reach here if there is more remaining + * of the input string which needs to be processed */ + ZEND_ASSERT(!in_len); + transfer_encode_mime_bytes(&tmpbuf, &buf, base64); + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, 2); + buf.out = mb_convert_buf_add2(buf.out, '?', '='); + mb_convert_buf_free(&tmpbuf); + return mb_convert_buf_result(&buf, &mbfl_encoding_utf8); + } else { + /* It's possible that more chars might fit on the current line, + * so back up to where we were before emitting any ending bytes */ + mb_convert_buf_reset(&tmpbuf, tmppos2); + tmpbuf.state = tmpstate2; + } + } else { + /* Converting 'n' more wchars on this line would be too much. + * Back up to where we were before we tried that. */ + mb_convert_buf_reset(&tmpbuf, tmppos); + tmpbuf.state = tmpstate; + + if (n == 1) { + /* We have found the exact number of chars which will fit on the + * current line. Finish up and move to a new line. */ + outcode->from_wchar(NULL, 0, &tmpbuf, true); + transfer_encode_mime_bytes(&tmpbuf, &buf, base64); + tmpbuf.state = 0; + + MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, 3 + linefeed_len); + buf.out = mb_convert_buf_add2(buf.out, '?', '='); + + indent = 0; /* Indent argument must only affect the first line */ + + if (in_len) { + /* We still have more of input string remaining to decode */ + buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); + buf.out = mb_convert_buf_add(buf.out, ' '); + line_start = mb_convert_buf_len(&buf); + /* Copy remaining wchars to beginning of buffer so they will be + * processed on the next iteration of outer 'do' loop */ + offset = e - p; + memmove(wchar_buf, p, offset * sizeof(uint32_t)); + goto refill_wchar_buf; + } else if (p < e) { + /* Input string is finished, but we still have trailing wchars + * remaining to be processed in wchar_buf */ + buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len); + buf.out = mb_convert_buf_add(buf.out, ' '); + line_start = mb_convert_buf_len(&buf); + goto start_new_line; + } else { + /* We are done! */ + mb_convert_buf_free(&tmpbuf); + return mb_convert_buf_result(&buf, &mbfl_encoding_utf8); + } + } else { + /* Try a smaller number of wchars */ + n = MAX(n >> 1, 1); + } + } + } + } +} + +PHP_FUNCTION(mb_encode_mimeheader) +{ + const mbfl_encoding *charset = &mbfl_encoding_pass; + zend_string *str, *charset_name = NULL, *transenc_name = NULL; + char *linefeed = "\r\n"; + size_t linefeed_len = 2; + zend_long indent = 0; + bool base64 = true; + + ZEND_PARSE_PARAMETERS_START(1, 5) + Z_PARAM_STR(str) + Z_PARAM_OPTIONAL + Z_PARAM_STR(charset_name) + Z_PARAM_STR(transenc_name) + Z_PARAM_STRING(linefeed, linefeed_len) + Z_PARAM_LONG(indent) + ZEND_PARSE_PARAMETERS_END(); + + if (charset_name != NULL) { + charset = php_mb_get_encoding(charset_name, 2); + if (!charset) { + RETURN_THROWS(); + } else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') { + zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name)); + RETURN_THROWS(); + } + } else { + const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); + if (lang != NULL) { + charset = mbfl_no2encoding(lang->mail_charset); + const mbfl_encoding *transenc = mbfl_no2encoding(lang->mail_header_encoding); + char t = transenc->name[0]; + if (t == 'Q' || t == 'q') { + base64 = false; + } + } + } + + if (transenc_name != NULL && ZSTR_LEN(transenc_name) > 0) { + char t = ZSTR_VAL(transenc_name)[0]; + if (t == 'Q' || t == 'q') { + base64 = false; + } + } + + RETURN_STR(mb_mime_header_encode(str, MBSTRG(current_internal_encoding), charset, base64, linefeed, linefeed_len, indent)); +} + static int8_t decode_base64(unsigned char c) { if (c >= 'A' && c <= 'Z') { diff --git a/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt b/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt new file mode 100644 index 00000000000..7bf05b43ae3 --- /dev/null +++ b/ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt @@ -0,0 +1,160 @@ +--TEST-- +Test mb_encode_mimeheader() function : test cases found by fuzzer +--EXTENSIONS-- +mbstring +--FILE-- +\x00\x00\x00\x00", "HZ", "Q", "", 71)); + +// ASCII strings with no spaces should pass through unchanged +var_dump(mb_encode_mimeheader("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5", "BIG-5", "B")); + +// Regression test: After decoding part of a line as ASCII, before we switch into Base64/QPrint encoding mode, +// refill our buffer of wchars so we don't hit the end of the buffer in the middle of a line +var_dump(mb_encode_mimeheader("\x20\x20\x20\x202\x20\x20\x20sssssssssssssssssssssssssss\x20\x20\x20\x20W\x20\x20\x20\x20\x20\x20W\x20\x20\x20\x20\xb9S\x01\x00\xf0`\x00\x00\x20\x20\x20\x20mSCII\xee\x20\x20\x20\x20mSCII\xeeI\xee", "ArmSCII-8", "B", "")); + +// Regression test: Input string with a huge number of spaces +var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00", "CP936", "Q", "")); + +// Regression test: Long string, all ASCII, but with spaces at the beginning +var_dump(mb_encode_mimeheader("\x20\x201111111111111111111111111111111111111111111111111111111111111111111111111", "ASCII", "Q", "")); + +// Only a single character in input, but when we convert it to outcode and then +// transfer-encode it, it takes too many bytes to fit on a single line +// Legacy implementation would always include at least one wchar in each encoded word; +// imitate the same behavior +var_dump(mb_encode_mimeheader("\xe7\xad\xb5", "HZ", "Q", "", 44)); + +// Regression test: Exploring corner cases of when legacy implementation would output plain ASCII +// with no transfer encoding, and when it would transfer-encode +var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", "")); +var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3\x20", "GB18030", "Q", "")); + +// Change in behavior: The old implementation would output the following string as plain ASCII, +// but the new one transfer-encodes it +// In the general case, matching the old implementation's decision to transfer-encode or not +// perfectly would require allocating potentially unbounded scratch memory (up to the size of +// the input string), but we aim to only use a constant amount of temporarily allocated memory +var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", "")); + +echo "Done"; +?> +--EXPECT-- +string(0) "" +string(21) "=?UTF-8?Q?abc=00abc?=" +string(16) "=?UTF-8?B?Pw==?=" +string(19) "=?US-ASCII?B?Pw==?=" +string(18) "=?US-ASCII?Q?=3F?=" +string(19) "=?US-ASCII?B?PQ==?=" +string(18) "=?US-ASCII?Q?=3D?=" +string(19) "=?US-ASCII?B?Xw==?=" +string(18) "=?US-ASCII?Q?=5F?=" +string(19) "=?US-ASCII?B?fw==?=" +string(1) " " +string(1) " " +string(3) " " +string(3) " " +string(8) "ab ab " +string(8) "ab ab " +string(1) "`" +string(1) "S" +string(2) "S4" +string(2) "S4" +string(61) "=?UCS-4?Q?=00=00=00=32=00=00=00=34?= =?UCS-4?Q?=00=00=00=0A?=" +string(21) "o =?US-ASCII?B?AA==?=" +string(68) "=?UCS-4?B?AAAAAAAAABEAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==?=" +string(271) "=?UCS-2?B?AAEAAAA/AD8APwA/AD8APwA9AD8APwA/AD0APwABAAAAYQAAAAAAPwA/AD8=?= =?UCS-2?B?AD0APwA/AD8APwA/AD8APwA/AD8APwA/ADQAPwA0AD8APwA/AD8APwA9AD8=?= =?UCS-2?B?AAEAAAAAAAAAAQAAAAAABgA/AD8APwA/AD8APwA/AD8APwA9AD8APwA/AD8=?= =?UCS-2?B?AD8APwA/AD8APwA/AD8ANAA/AD8APwA/AD8APwA0?=" +string(27) "=aaaaaa= =?US-ASCII?Q?=3F?=" +string(9) "=aaaaaa=?" +string(55) ", =?ISO-2022-JP?Q?o=00=01=00=00?= =?ISO-2022-JP?Q?=28?=" +string(19) " =?US-ASCII?Q?=3F?=" +string(76) " =?HZ-GB-2312?Q?=3F=7E=7EH=7E=7E=3F=3F=00=00=3F=3F=3F=3F=3F=3E=00=00=00=00?=" +string(75) "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyBIG5" +string(108) " 2 sssssssssssssssssssssssssss W W =?ArmSCII-8?B?ICAgP1MBAD9gAAAgICAgbVNDSUk/ICAgIG1TQ0lJP0k/?=" +string(294) "=?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?CP936?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=00?=" +string(75) " 1111111111111111111111111111111111111111111111111111111111111111111111111" +string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?=" +string(77) "2 !3" +string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?=" +string(296) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20!=33?=" +Done