mirror of
https://github.com/php/php-src.git
synced 2026-04-27 10:16:41 +02:00
776296e12f
Previously, mbstring had a special mode whereby it would convert
erroneous input byte sequences to output like "BAD+XXXX", where "XXXX"
would be the erroneous bytes expressed in hexadecimal. This mode could
be enabled by calling `mb_substitute_character("long")`.
However, accurately reproducing input byte sequences from the cached
state of a conversion filter is often tricky, and this significantly
complicates the implementation. Further, the means used for passing
the erroneous bytes through to where the "BAD+XXXX" text is generated
only allows for up to 3 bytes to be passed, meaning that some erroneous
byte sequences are truncated anyways.
More to the point, a search of publically available PHP code indicates
that nobody is really using this feature anyways.
Incidentally, this feature also provided error output like "JIS+XXXX"
if the input 'should have' represented a JISX 0208 codepoint, but it
decodes to a codepoint which does not exist in the JISX 0208 charset.
Similarly, specific error output was provided for non-existent
JISX 0212 codepoints, and likewise for JISX 0213, CP932, and a few
other charsets. All of that is now consigned to the flames.
However, "long" error markers also include a somewhat more useful
"U+XXXX" marker for Unicode codepoints which were successfully
decoded from the input text, but cannot be represented in the output
encoding. Those are still supported.
With this change, there is no need to use a variety of special values
in the high bits of a wchar to represent different types of error
values. We can (and will) just use a single error value. This will be
equal to -1.
One complicating factor: Text conversion functions return an integer to
indicate whether the conversion operation should be immediately
aborted, and the magic 'abort' marker is -1. Also, almost all of these
functions would return the received byte/codepoint to indicate success.
That doesn't work with the new error value; if an input filter detects
an error and passes -1 to the output filter, and the output filter
returns it back, that would be taken to mean 'abort'.
Therefore, amend all these functions to return 0 for success.
185 lines
4.5 KiB
C
185 lines
4.5 KiB
C
/*
|
|
* "streamable kanji code filter and converter"
|
|
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
|
|
*
|
|
* LICENSE NOTICES
|
|
*
|
|
* This file is part of "streamable kanji code filter and converter",
|
|
* which is distributed under the terms of GNU Lesser General Public
|
|
* License (version 2) as published by the Free Software Foundation.
|
|
*
|
|
* This software is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with "streamable kanji code filter and converter";
|
|
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
|
|
* Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
* The author of this file:
|
|
*
|
|
*/
|
|
/*
|
|
* The source code included in this files was separated from mbfilter.c
|
|
* by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
|
|
* mbfilter.c is included in this package .
|
|
*
|
|
*/
|
|
|
|
#include <stddef.h>
|
|
#include <string.h>
|
|
|
|
#include "zend.h"
|
|
#include "mbfl_memory_device.h"
|
|
|
|
/*
|
|
* memory device output functions
|
|
*/
|
|
void mbfl_memory_device_init(mbfl_memory_device *device, size_t initsz, size_t allocsz)
|
|
{
|
|
device->buffer = (initsz > 0) ? emalloc(initsz) : NULL;
|
|
device->length = initsz;
|
|
device->pos = 0;
|
|
device->allocsz = MAX(allocsz, MBFL_MEMORY_DEVICE_ALLOC_SIZE);
|
|
}
|
|
|
|
void mbfl_memory_device_realloc(mbfl_memory_device *device, size_t initsz, size_t allocsz)
|
|
{
|
|
if (initsz > device->length) {
|
|
device->buffer = erealloc(device->buffer, initsz);
|
|
device->length = initsz;
|
|
}
|
|
device->allocsz = MAX(allocsz, MBFL_MEMORY_DEVICE_ALLOC_SIZE);
|
|
}
|
|
|
|
void mbfl_memory_device_clear(mbfl_memory_device *device)
|
|
{
|
|
if (device->buffer) {
|
|
efree(device->buffer);
|
|
}
|
|
device->buffer = NULL;
|
|
device->length = device->pos = 0;
|
|
}
|
|
|
|
void mbfl_memory_device_reset(mbfl_memory_device *device)
|
|
{
|
|
device->pos = 0;
|
|
}
|
|
|
|
void mbfl_memory_device_unput(mbfl_memory_device *device)
|
|
{
|
|
if (device->pos > 0) {
|
|
device->pos--;
|
|
}
|
|
}
|
|
|
|
mbfl_string* mbfl_memory_device_result(mbfl_memory_device *device, mbfl_string *result)
|
|
{
|
|
result->len = device->pos;
|
|
mbfl_memory_device_output('\0', device);
|
|
result->val = device->buffer;
|
|
device->buffer = NULL;
|
|
device->length = device->pos = 0;
|
|
return result;
|
|
}
|
|
|
|
int mbfl_memory_device_output(int c, void *data)
|
|
{
|
|
mbfl_memory_device *device = (mbfl_memory_device *)data;
|
|
|
|
if (device->pos >= device->length) {
|
|
/* reallocate buffer */
|
|
|
|
if (device->length > SIZE_MAX - device->allocsz) {
|
|
/* overflow */
|
|
return -1;
|
|
}
|
|
|
|
size_t newlen = device->length + device->allocsz;
|
|
device->buffer = erealloc(device->buffer, newlen);
|
|
device->length = newlen;
|
|
}
|
|
|
|
device->buffer[device->pos++] = (unsigned char)c;
|
|
return 0;
|
|
}
|
|
|
|
int mbfl_memory_device_strcat(mbfl_memory_device *device, const char *psrc)
|
|
{
|
|
return mbfl_memory_device_strncat(device, psrc, strlen(psrc));
|
|
}
|
|
|
|
int mbfl_memory_device_strncat(mbfl_memory_device *device, const char *psrc, size_t len)
|
|
{
|
|
if (len > device->length - device->pos) {
|
|
/* reallocate buffer */
|
|
|
|
if (len > SIZE_MAX - MBFL_MEMORY_DEVICE_ALLOC_SIZE
|
|
|| device->length > SIZE_MAX - (len + MBFL_MEMORY_DEVICE_ALLOC_SIZE)) {
|
|
/* overflow */
|
|
return -1;
|
|
}
|
|
|
|
size_t newlen = device->length + len + MBFL_MEMORY_DEVICE_ALLOC_SIZE;
|
|
device->buffer = erealloc(device->buffer, newlen);
|
|
device->length = newlen;
|
|
}
|
|
|
|
unsigned char *w = &device->buffer[device->pos];
|
|
memcpy(w, psrc, len);
|
|
device->pos += len;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mbfl_memory_device_devcat(mbfl_memory_device *dest, mbfl_memory_device *src)
|
|
{
|
|
return mbfl_memory_device_strncat(dest, (const char*)src->buffer, src->pos);
|
|
}
|
|
|
|
void mbfl_wchar_device_init(mbfl_wchar_device *device)
|
|
{
|
|
device->buffer = NULL;
|
|
device->length = 0;
|
|
device->pos = 0;
|
|
device->allocsz = MBFL_MEMORY_DEVICE_ALLOC_SIZE;
|
|
}
|
|
|
|
void mbfl_wchar_device_clear(mbfl_wchar_device *device)
|
|
{
|
|
if (device->buffer) {
|
|
efree(device->buffer);
|
|
}
|
|
device->buffer = NULL;
|
|
device->length = device->pos = 0;
|
|
}
|
|
|
|
int mbfl_wchar_device_output(int c, void *data)
|
|
{
|
|
mbfl_wchar_device *device = (mbfl_wchar_device *)data;
|
|
|
|
if (device->pos >= device->length) {
|
|
/* reallocate buffer */
|
|
size_t newlen;
|
|
|
|
if (device->length > SIZE_MAX - device->allocsz) {
|
|
/* overflow */
|
|
return -1;
|
|
}
|
|
|
|
newlen = device->length + device->allocsz;
|
|
if (newlen > SIZE_MAX / sizeof(int)) {
|
|
/* overflow */
|
|
return -1;
|
|
}
|
|
|
|
device->buffer = erealloc(device->buffer, newlen * sizeof(int));
|
|
device->length = newlen;
|
|
}
|
|
|
|
device->buffer[device->pos++] = c;
|
|
return 0;
|
|
}
|