mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
Fix zlib support for large files
gzread() and gzwrite() have effectively a 4GiB limit at the moment
because the APIs of the zlib library use unsigned ints.
For example, this means that the count argument of gzread() and gzwrite()
& co effectively are modulo 2**32.
Fix this by adding a loop to handle all bytes.
As for automated testing, I didn't find an easy way to write a phpt for
this that wouldn't use a lot of memory or requires a large file.
For instance, the gzread() test that I manually ran requires a 4MiB
input file (and I can't shrink it because zlib has a max window size).
Here are the testing instructions, run on 64-bit:
To test for gzwrite():
```php
$f = gzopen("out.txt.gz", "w");
gzwrite($f, str_repeat('a', 4*1024*1024*1024+64)); // 4GiB + 64 bytes
```
Then use `zcat out.txt.gz|wc -c` to check that all bytes were written
(should be 4294967360).
To test for gzread():
Create a file containing all a's for example that is 4GiB + 64 bytes.
Then compress it into out.txt.gz using the gzip command.
Then run:
```php
$f = gzopen("out.txt.gz", "r");
$str = gzread($f, 4*1024*1024*1024+64);
var_dump(strlen($str)); // 4294967360
var_dump(substr($str, -3)); // string (3) "aaa"
```
Closes GH-17775.
This commit is contained in:
@@ -33,24 +33,55 @@ struct php_gz_stream_data_t {
|
||||
static ssize_t php_gziop_read(php_stream *stream, char *buf, size_t count)
|
||||
{
|
||||
struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract;
|
||||
int read;
|
||||
ssize_t total_read = 0;
|
||||
|
||||
/* XXX this needs to be looped for the case count > UINT_MAX */
|
||||
read = gzread(self->gz_file, buf, count);
|
||||
/* Despite the count argument of gzread() being "unsigned int",
|
||||
* the return value is "int". Error returns are values < 0, otherwise the count is returned.
|
||||
* To properly distinguish error values from success value, we therefore need to cap at INT_MAX.
|
||||
*/
|
||||
do {
|
||||
unsigned int chunk_size = MIN(count, INT_MAX);
|
||||
int read = gzread(self->gz_file, buf, chunk_size);
|
||||
count -= chunk_size;
|
||||
|
||||
if (gzeof(self->gz_file)) {
|
||||
stream->eof = 1;
|
||||
}
|
||||
if (gzeof(self->gz_file)) {
|
||||
stream->eof = 1;
|
||||
}
|
||||
|
||||
return read;
|
||||
if (UNEXPECTED(read < 0)) {
|
||||
return read;
|
||||
}
|
||||
|
||||
total_read += read;
|
||||
buf += read;
|
||||
} while (count > 0 && !stream->eof);
|
||||
|
||||
return total_read;
|
||||
}
|
||||
|
||||
static ssize_t php_gziop_write(php_stream *stream, const char *buf, size_t count)
|
||||
{
|
||||
struct php_gz_stream_data_t *self = (struct php_gz_stream_data_t *) stream->abstract;
|
||||
ssize_t total_written = 0;
|
||||
|
||||
/* XXX this needs to be looped for the case count > UINT_MAX */
|
||||
return gzwrite(self->gz_file, (char *) buf, count);
|
||||
/* Despite the count argument of gzread() being "unsigned int",
|
||||
* the return value is "int". Error returns are values < 0, otherwise the count is returned.
|
||||
* To properly distinguish error values from success value, we therefore need to cap at INT_MAX.
|
||||
*/
|
||||
do {
|
||||
unsigned int chunk_size = MIN(count, INT_MAX);
|
||||
int written = gzwrite(self->gz_file, buf, chunk_size);
|
||||
count -= chunk_size;
|
||||
|
||||
if (UNEXPECTED(written < 0)) {
|
||||
return written;
|
||||
}
|
||||
|
||||
total_written += written;
|
||||
buf += written;
|
||||
} while (count > 0);
|
||||
|
||||
return total_written;
|
||||
}
|
||||
|
||||
static int php_gziop_seek(php_stream *stream, zend_off_t offset, int whence, zend_off_t *newoffs)
|
||||
|
||||
Reference in New Issue
Block a user