1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 16:22:37 +01:00
Files
archived-php-src/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
Peter Kokot 8622362394 Remove unused strcasecmp definition (#17050)
The strcasecmp usage was removed via
dc5f3b9562.
2025-03-21 18:30:22 +01:00

396 lines
9.4 KiB
C

/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
* mbfilter.c is included in this package .
*
*/
#include "libmbfl/config.h"
#ifdef HAVE_STRINGS_H
/* For strncasecmp */
#include <strings.h>
#endif
#include "mbfl_encoding.h"
#include "mbfilter_pass.h"
#include "mbfilter_8bit.h"
#include "filters/mbfilter_base64.h"
#include "filters/mbfilter_cjk.h"
#include "filters/mbfilter_qprint.h"
#include "filters/mbfilter_uuencode.h"
#include "filters/mbfilter_7bit.h"
#include "filters/mbfilter_utf7.h"
#include "filters/mbfilter_utf7imap.h"
#include "filters/mbfilter_utf8.h"
#include "filters/mbfilter_utf16.h"
#include "filters/mbfilter_utf32.h"
#include "filters/mbfilter_ucs4.h"
#include "filters/mbfilter_ucs2.h"
#include "filters/mbfilter_htmlent.h"
#include "filters/mbfilter_singlebyte.h"
static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_base64,
&mbfl_encoding_uuencode,
&mbfl_encoding_html_ent,
&mbfl_encoding_qprint,
&mbfl_encoding_7bit,
&mbfl_encoding_8bit,
&mbfl_encoding_ucs4,
&mbfl_encoding_ucs4be,
&mbfl_encoding_ucs4le,
&mbfl_encoding_ucs2,
&mbfl_encoding_ucs2be,
&mbfl_encoding_ucs2le,
&mbfl_encoding_utf32,
&mbfl_encoding_utf32be,
&mbfl_encoding_utf32le,
&mbfl_encoding_utf16,
&mbfl_encoding_utf16be,
&mbfl_encoding_utf16le,
&mbfl_encoding_utf8,
&mbfl_encoding_utf7,
&mbfl_encoding_utf7imap,
&mbfl_encoding_ascii,
&mbfl_encoding_euc_jp,
&mbfl_encoding_sjis,
&mbfl_encoding_eucjp_win,
&mbfl_encoding_eucjp2004,
&mbfl_encoding_sjis_docomo,
&mbfl_encoding_sjis_kddi,
&mbfl_encoding_sjis_sb,
&mbfl_encoding_sjis_mac,
&mbfl_encoding_sjis2004,
&mbfl_encoding_utf8_docomo,
&mbfl_encoding_utf8_kddi_a,
&mbfl_encoding_utf8_kddi_b,
&mbfl_encoding_utf8_sb,
&mbfl_encoding_cp932,
&mbfl_encoding_sjiswin,
&mbfl_encoding_cp51932,
&mbfl_encoding_jis,
&mbfl_encoding_2022jp,
&mbfl_encoding_2022jpms,
&mbfl_encoding_gb18030,
&mbfl_encoding_gb18030_2022,
&mbfl_encoding_cp1252,
&mbfl_encoding_cp1254,
&mbfl_encoding_8859_1,
&mbfl_encoding_8859_2,
&mbfl_encoding_8859_3,
&mbfl_encoding_8859_4,
&mbfl_encoding_8859_5,
&mbfl_encoding_8859_6,
&mbfl_encoding_8859_7,
&mbfl_encoding_8859_8,
&mbfl_encoding_8859_9,
&mbfl_encoding_8859_10,
&mbfl_encoding_8859_13,
&mbfl_encoding_8859_14,
&mbfl_encoding_8859_15,
&mbfl_encoding_8859_16,
&mbfl_encoding_euc_cn,
&mbfl_encoding_cp936,
&mbfl_encoding_hz,
&mbfl_encoding_euc_tw,
&mbfl_encoding_big5,
&mbfl_encoding_cp950,
&mbfl_encoding_euc_kr,
&mbfl_encoding_uhc,
&mbfl_encoding_2022kr,
&mbfl_encoding_cp1251,
&mbfl_encoding_cp866,
&mbfl_encoding_koi8r,
&mbfl_encoding_koi8u,
&mbfl_encoding_armscii8,
&mbfl_encoding_cp850,
&mbfl_encoding_2022jp_2004,
&mbfl_encoding_2022jp_kddi,
&mbfl_encoding_cp50220,
&mbfl_encoding_cp50221,
&mbfl_encoding_cp50222,
NULL
};
/* The following perfect hashing table was amended from gperf, and hashing code was generated using gperf.
* The table was amended to refer to the table above such that it is lighter for the data cache.
* You can use the generate_name_perfect_hash_table.php script to help generate the necessary lookup tables. */
static const int8_t mbfl_encoding_ptr_list_after_hashing[] = {
-1, -1, -1, -1,
-1, -1,
66,
-1,
73,
-1,
78,
61,
76,
-1,
59,
46,
52,
54,
49,
57,
69,
21,
50,
58,
75,
35,
9,
64,
48,
56,
74,
47,
55,
40,
45,
53,
18,
39,
72,
60,
23,
10,
30,
36,
67,
71,
37,
27,
77,
26,
51,
12,
6,
11,
7,
29,
5,
24,
0,
2,
13,
43,
31,
33,
38,
63,
8,
1,
15,
-1,
16,
-1,
14,
3,
44,
-1,
20,
-1,
32,
-1,
68,
25,
17,
28,
-1, -1, -1,
22,
-1, -1,
4,
-1, -1,
62,
-1, -1,
34,
-1,
41,
-1, -1, -1,
42,
70,
19,
-1, -1, -1,
65
};
static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len)
{
static const unsigned char asso_values[] =
{
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 1, 109, 109, 1, 19,
0, 16, 13, 3, 7, 35, 1, 20, 109, 109,
109, 109, 109, 109, 109, 16, 1, 0, 44, 6,
26, 53, 8, 0, 25, 32, 13, 12, 1, 0,
25, 0, 32, 18, 51, 3, 109, 15, 109, 109,
1, 109, 109, 109, 109, 109, 109, 16, 1, 0,
44, 6, 26, 53, 8, 0, 25, 32, 13, 12,
1, 0, 25, 0, 32, 18, 51, 3, 109, 15,
109, 109, 1, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
109, 109, 109, 109, 109, 109
};
unsigned int hval = len;
switch (hval)
{
default:
hval += asso_values[(unsigned char)str[6]];
ZEND_FALLTHROUGH;
case 6:
hval += asso_values[(unsigned char)str[5]];
ZEND_FALLTHROUGH;
case 5:
hval += asso_values[(unsigned char)str[4]];
ZEND_FALLTHROUGH;
case 4:
case 3:
hval += asso_values[(unsigned char)str[2]];
ZEND_FALLTHROUGH;
case 2:
case 1:
hval += asso_values[(unsigned char)str[0]];
break;
}
return hval + asso_values[(unsigned char)str[len - 1]];
}
#define NAME_HASH_MIN_NAME_LENGTH 2
#define NAME_HASH_MAX_NAME_LENGTH 23
const mbfl_encoding *mbfl_name2encoding(const char *name)
{
return mbfl_name2encoding_ex(name, strlen(name));
}
const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
{
const mbfl_encoding *const *encoding;
/* Sanity check perfect hash for name.
* Never enable this in production, this is only a development-time sanity check! */
#if ZEND_DEBUG && 0
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
size_t name_length = strlen((*encoding)->name);
if (!(name_length <= NAME_HASH_MAX_NAME_LENGTH && name_length >= NAME_HASH_MIN_NAME_LENGTH)) {
fprintf(stderr, "name length is not satisfying bound check: %zu %s\n", name_length, (*encoding)->name);
abort();
}
unsigned int key = mbfl_name2encoding_perfect_hash((*encoding)->name, name_length);
if (mbfl_encoding_ptr_list[mbfl_encoding_ptr_list_after_hashing[key]] != *encoding) {
fprintf(stderr, "mbfl_name2encoding_perfect_hash: key %u %s mismatch\n", key, (*encoding)->name);
abort();
}
}
#endif
/* Use perfect hash lookup for name */
if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) {
unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len);
if (key < sizeof(mbfl_encoding_ptr_list_after_hashing) / sizeof(mbfl_encoding_ptr_list_after_hashing[0])) {
int8_t offset = mbfl_encoding_ptr_list_after_hashing[key];
if (offset >= 0) {
encoding = mbfl_encoding_ptr_list + offset;
if (strncasecmp((*encoding)->name, name, name_len) == 0) {
return *encoding;
}
}
}
}
/* search MIME charset name */
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
if ((*encoding)->mime_name) {
if (strncasecmp((*encoding)->mime_name, name, name_len) == 0 && (*encoding)->mime_name[name_len] == '\0') {
return *encoding;
}
}
}
/* search aliases */
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
if ((*encoding)->aliases) {
for (const char **alias = (*encoding)->aliases; *alias; alias++) {
if (strncasecmp(name, *alias, name_len) == 0 && (*alias)[name_len] == '\0') {
return *encoding;
}
}
}
}
return NULL;
}
const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding)
{
const mbfl_encoding **encoding;
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
if ((*encoding)->no_encoding == no_encoding) {
return *encoding;
}
}
return NULL;
}
const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding)
{
const mbfl_encoding *encoding = mbfl_no2encoding(no_encoding);
return encoding ? encoding->name : "";
}
const mbfl_encoding **mbfl_get_supported_encodings(void)
{
return mbfl_encoding_ptr_list;
}
const char *mbfl_encoding_preferred_mime_name(const mbfl_encoding *encoding)
{
if (encoding->mime_name && encoding->mime_name[0] != '\0') {
return encoding->mime_name;
}
return NULL;
}