1
0
mirror of https://github.com/php/php-src.git synced 2026-03-31 04:32:19 +02:00
Files
archived-php-src/sapi/fuzzer/fuzzer-mbstring.c
Alex Dowad d5d9900661 When fuzzing mbstring encoding conversion code, compare output with different intermediate buffer sizes
Currently, php-fuzz-mbstring only confirms that no crashes (including
ASAN violations) occur when converting text from one encoding to
another.

Try performing each conversion operation with two different sizes for
the intermediate buffer which is used to pass data from the decoder to
the encoder. If the encoding conversion code is correct, the size of
that intermediate buffer shouldn't matter; we should always get exactly
the same results.

This is a much stricter test, which is more likely to catch bugs.
2023-02-05 20:04:05 +02:00

119 lines
4.3 KiB
C

/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Stanislav Malyshev <stas@php.net> |
+----------------------------------------------------------------------+
*/
#include "zend.h"
#include "fuzzer.h"
#include "fuzzer-sapi.h"
#include "ext/mbstring/mbstring.h"
zend_string* convert_encoding(const uint8_t *Data, size_t Size, const mbfl_encoding *FromEncoding, const mbfl_encoding *ToEncoding, size_t BufSize, unsigned int *NumErrors)
{
uint32_t *wchar_buf = ecalloc(BufSize, sizeof(uint32_t));
unsigned int state = 0;
mb_convert_buf buf;
mb_convert_buf_init(&buf, Size, '?', MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR);
while (Size) {
size_t out_len = FromEncoding->to_wchar((unsigned char**)&Data, &Size, wchar_buf, BufSize, &state);
ZEND_ASSERT(out_len <= BufSize);
ToEncoding->from_wchar(wchar_buf, out_len, &buf, !Size);
}
*NumErrors = buf.errors;
zend_string *result = mb_convert_buf_result(&buf, ToEncoding);
efree(wchar_buf);
return result;
}
void assert_zend_string_eql(zend_string *str1, zend_string *str2)
{
ZEND_ASSERT(ZSTR_LEN(str1) == ZSTR_LEN(str2));
for (int i = 0; i < ZSTR_LEN(str1); i++) {
ZEND_ASSERT(ZSTR_VAL(str1)[i] == ZSTR_VAL(str2)[i]);
}
}
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
const uint8_t *Comma1 = memchr(Data, ',', Size);
if (!Comma1) {
return 0;
}
size_t ToEncodingNameLen = Comma1 - Data;
char *ToEncodingName = estrndup((char *) Data, ToEncodingNameLen);
Data = Comma1 + 1;
Size -= ToEncodingNameLen + 1;
const uint8_t *Comma2 = memchr(Data, ',', Size);
if (!Comma2) {
efree(ToEncodingName);
return 0;
}
size_t FromEncodingNameLen = Comma2 - Data;
char *FromEncodingName = estrndup((char *) Data, FromEncodingNameLen);
Data = Comma2 + 1;
Size -= FromEncodingNameLen + 1;
const mbfl_encoding *ToEncoding = mbfl_name2encoding(ToEncodingName);
const mbfl_encoding *FromEncoding = mbfl_name2encoding(FromEncodingName);
if (!ToEncoding || !FromEncoding || Size < 2 || fuzzer_request_startup() == FAILURE) {
efree(ToEncodingName);
efree(FromEncodingName);
return 0;
}
/* Rather than converting an entire (possibly very long) string at once, mbstring converts
* strings 'chunk by chunk'; the decoder will run until it fills up its output buffer with
* wchars, then the encoder will process those wchars, then the decoder runs again until it
* again fills up its output buffer, and so on
*
* The most error-prone part of the decoder/encoder code is where we exit a decoder/encoder
* function and save its state to allow later resumption
* To stress-test that aspect of the decoders/encoders, try performing an encoding conversion
* operation with different, random buffer sizes
* If the code is correct, the result should always be the same either way */
size_t bufsize1 = *Data++;
size_t bufsize2 = *Data++;
bufsize1 = MAX(bufsize1, MBSTRING_MIN_WCHAR_BUFSIZE);
bufsize2 = MAX(bufsize2, MBSTRING_MIN_WCHAR_BUFSIZE);
Size -= 2;
unsigned int errors1 = 0, errors2 = 0;
zend_string *Result1 = convert_encoding(Data, Size, FromEncoding, ToEncoding, bufsize1, &errors1);
zend_string *Result2 = convert_encoding(Data, Size, FromEncoding, ToEncoding, bufsize2, &errors2);
assert_zend_string_eql(Result1, Result2);
ZEND_ASSERT(errors1 == errors2);
zend_string_release(Result1);
zend_string_release(Result2);
efree(ToEncodingName);
efree(FromEncodingName);
fuzzer_request_shutdown();
return 0;
}
int LLVMFuzzerInitialize(int *argc, char ***argv) {
fuzzer_init_php(NULL);
return 0;
}