From 5aaf7b49374e6c55bb5cd8b8544068107706a7a2 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 12 Feb 2025 22:37:29 +0100 Subject: [PATCH] Fix zlib support for large files gzread() and gzwrite() have effectively a 4GiB limit at the moment because the APIs of the zlib library use unsigned ints. For example, this means that the count argument of gzread() and gzwrite() & co effectively are modulo 2**32. Fix this by adding a loop to handle all bytes. As for automated testing, I didn't find an easy way to write a phpt for this that wouldn't use a lot of memory or requires a large file. For instance, the gzread() test that I manually ran requires a 4MiB input file (and I can't shrink it because zlib has a max window size). Here are the testing instructions, run on 64-bit: To test for gzwrite(): ```php $f = gzopen("out.txt.gz", "w"); gzwrite($f, str_repeat('a', 4*1024*1024*1024+64)); // 4GiB + 64 bytes ``` Then use `zcat out.txt.gz|wc -c` to check that all bytes were written (should be 4294967360). To test for gzread(): Create a file containing all a's for example that is 4GiB + 64 bytes. Then compress it into out.txt.gz using the gzip command. Then run: ```php $f = gzopen("out.txt.gz", "r"); $str = gzread($f, 4*1024*1024*1024+64); var_dump(strlen($str)); // 4294967360 var_dump(substr($str, -3)); // string (3) "aaa" ``` Closes GH-17775. --- NEWS | 1 + ext/zlib/zlib_fopen_wrapper.c | 49 ++++++++++++++++++++++++++++------- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 9493e4f6af7..876ba582906 100644 --- a/NEWS +++ b/NEWS @@ -50,6 +50,7 @@ PHP NEWS . Fixed bug GH-17745 (zlib extension incorrectly handles object arguments). (nielsdos) . Fix memory leak when encoding check fails. (nielsdos) + . Fix zlib support for large files. (nielsdos) 13 Feb 2025, PHP 8.3.17 diff --git a/ext/zlib/zlib_fopen_wrapper.c b/ext/zlib/zlib_fopen_wrapper.c index b414b33a872..31b5212a720 100644 --- a/ext/zlib/zlib_fopen_wrapper.c +++ b/ext/zlib/zlib_fopen_wrapper.c @@ -33,24 +33,55 @@ struct php_gz_stream_data_t { static ssize_t php_gziop_read(php_stream *stream, char *buf, size_t count) { struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract; - int read; + ssize_t total_read = 0; - /* XXX this needs to be looped for the case count > UINT_MAX */ - read = gzread(self->gz_file, buf, count); + /* Despite the count argument of gzread() being "unsigned int", + * the return value is "int". Error returns are values < 0, otherwise the count is returned. + * To properly distinguish error values from success value, we therefore need to cap at INT_MAX. + */ + do { + unsigned int chunk_size = MIN(count, INT_MAX); + int read = gzread(self->gz_file, buf, chunk_size); + count -= chunk_size; - if (gzeof(self->gz_file)) { - stream->eof = 1; - } + if (gzeof(self->gz_file)) { + stream->eof = 1; + } - return read; + if (UNEXPECTED(read < 0)) { + return read; + } + + total_read += read; + buf += read; + } while (count > 0 && !stream->eof); + + return total_read; } static ssize_t php_gziop_write(php_stream *stream, const char *buf, size_t count) { struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract; + ssize_t total_written = 0; - /* XXX this needs to be looped for the case count > UINT_MAX */ - return gzwrite(self->gz_file, (char *) buf, count); + /* Despite the count argument of gzread() being "unsigned int", + * the return value is "int". Error returns are values < 0, otherwise the count is returned. + * To properly distinguish error values from success value, we therefore need to cap at INT_MAX. + */ + do { + unsigned int chunk_size = MIN(count, INT_MAX); + int written = gzwrite(self->gz_file, buf, chunk_size); + count -= chunk_size; + + if (UNEXPECTED(written < 0)) { + return written; + } + + total_written += written; + buf += written; + } while (count > 0); + + return total_written; } static int php_gziop_seek(php_stream *stream, zend_off_t offset, int whence, zend_off_t *newoffs)