1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Fix zlib support for large files

gzread() and gzwrite() have effectively a 4GiB limit at the moment
because the APIs of the zlib library use unsigned ints.
For example, this means that the count argument of gzread() and gzwrite()
& co effectively are modulo 2**32.
Fix this by adding a loop to handle all bytes.

As for automated testing, I didn't find an easy way to write a phpt for
this that wouldn't use a lot of memory or requires a large file.
For instance, the gzread() test that I manually ran requires a 4MiB
input file (and I can't shrink it because zlib has a max window size).

Here are the testing instructions, run on 64-bit:
To test for gzwrite():
```php
$f = gzopen("out.txt.gz", "w");
gzwrite($f, str_repeat('a', 4*1024*1024*1024+64)); // 4GiB + 64 bytes
```
Then use `zcat out.txt.gz|wc -c` to check that all bytes were written
(should be 4294967360).

To test for gzread():
Create a file containing all a's for example that is 4GiB + 64 bytes.
Then compress it into out.txt.gz using the gzip command.
Then run:
```php
$f = gzopen("out.txt.gz", "r");
$str = gzread($f, 4*1024*1024*1024+64);
var_dump(strlen($str)); // 4294967360
var_dump(substr($str, -3)); // string (3) "aaa"
```

Closes GH-17775.
This commit is contained in:
Niels Dossche
2025-02-12 22:37:29 +01:00
parent 678ecff980
commit 5aaf7b4937
2 changed files with 41 additions and 9 deletions

View File

@@ -33,24 +33,55 @@ struct php_gz_stream_data_t {
static ssize_t php_gziop_read(php_stream *stream, char *buf, size_t count)
{
struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract;
int read;
ssize_t total_read = 0;
/* XXX this needs to be looped for the case count > UINT_MAX */
read = gzread(self->gz_file, buf, count);
/* Despite the count argument of gzread() being "unsigned int",
* the return value is "int". Error returns are values < 0, otherwise the count is returned.
* To properly distinguish error values from success value, we therefore need to cap at INT_MAX.
*/
do {
unsigned int chunk_size = MIN(count, INT_MAX);
int read = gzread(self->gz_file, buf, chunk_size);
count -= chunk_size;
if (gzeof(self->gz_file)) {
stream->eof = 1;
}
if (gzeof(self->gz_file)) {
stream->eof = 1;
}
return read;
if (UNEXPECTED(read < 0)) {
return read;
}
total_read += read;
buf += read;
} while (count > 0 && !stream->eof);
return total_read;
}
static ssize_t php_gziop_write(php_stream *stream, const char *buf, size_t count)
{
struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract;
ssize_t total_written = 0;
/* XXX this needs to be looped for the case count > UINT_MAX */
return gzwrite(self->gz_file, (char *) buf, count);
/* Despite the count argument of gzread() being "unsigned int",
* the return value is "int". Error returns are values < 0, otherwise the count is returned.
* To properly distinguish error values from success value, we therefore need to cap at INT_MAX.
*/
do {
unsigned int chunk_size = MIN(count, INT_MAX);
int written = gzwrite(self->gz_file, buf, chunk_size);
count -= chunk_size;
if (UNEXPECTED(written < 0)) {
return written;
}
total_written += written;
buf += written;
} while (count > 0);
return total_written;
}
static int php_gziop_seek(php_stream *stream, zend_off_t offset, int whence, zend_off_t *newoffs)