From 3e7acf901db76f4dd63ee46e93fbf0e4b2982c2f Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Wed, 4 Nov 2020 20:10:14 +0200 Subject: [PATCH] Remove mbstring identify filters mbstring had an 'identify filter' for almost every supported text encoding which was used when auto-detecting the most likely encoding for a string. It would run over the string and set a 'flag' if it saw anything which did not appear likely to be the encoding in question. One problem with this scheme was that encodings which merely appeared less likely to be the correct one were completely rejected, even if there was no better candidate. Another problem was that the 'identify filters' had a huge amount of code duplication with the 'conversion filters'. Eliminate the identify filters. Instead, when auto-detecting text encoding, use conversion filters to see whether the input string is valid in candidate encodings or not. At the same type, watch the type of codepoints which the string decodes to and mark it as less likely if non-printable characters (ESC, form feed, bell, etc.) or 'private use area' codepoints are seen. Interestingly, one old test case in which JIS text was misidentified as UTF-8 (and this wrong behavior was enshrined in the test) was 'fixed' and the JIS string is now auto-detected as JIS. --- ext/mbstring/config.m4 | 3 +- ext/mbstring/config.w32 | 4 +- ext/mbstring/libmbfl/filters/mbfilter_7bit.c | 16 - ext/mbstring/libmbfl/filters/mbfilter_7bit.h | 1 - .../libmbfl/filters/mbfilter_armscii8.c | 16 - .../libmbfl/filters/mbfilter_armscii8.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_ascii.c | 21 - ext/mbstring/libmbfl/filters/mbfilter_ascii.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_big5.c | 39 -- ext/mbstring/libmbfl/filters/mbfilter_big5.h | 2 - .../libmbfl/filters/mbfilter_cp1251.c | 17 - .../libmbfl/filters/mbfilter_cp1251.h | 1 - .../libmbfl/filters/mbfilter_cp1252.c | 16 - .../libmbfl/filters/mbfilter_cp1252.h | 1 - .../libmbfl/filters/mbfilter_cp1254.c | 16 - .../libmbfl/filters/mbfilter_cp1254.h | 1 - .../libmbfl/filters/mbfilter_cp5022x.c | 363 ------------------ .../libmbfl/filters/mbfilter_cp5022x.h | 6 +- .../libmbfl/filters/mbfilter_cp51932.c | 46 --- .../libmbfl/filters/mbfilter_cp51932.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_cp850.c | 6 - ext/mbstring/libmbfl/filters/mbfilter_cp850.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_cp866.c | 6 - ext/mbstring/libmbfl/filters/mbfilter_cp866.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 28 -- ext/mbstring/libmbfl/filters/mbfilter_cp932.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_cp936.c | 26 -- ext/mbstring/libmbfl/filters/mbfilter_cp936.h | 1 - .../libmbfl/filters/mbfilter_euc_cn.c | 36 -- .../libmbfl/filters/mbfilter_euc_cn.h | 1 - .../libmbfl/filters/mbfilter_euc_jp.c | 85 ---- .../libmbfl/filters/mbfilter_euc_jp.h | 1 - .../libmbfl/filters/mbfilter_euc_jp_2004.c | 7 - .../libmbfl/filters/mbfilter_euc_jp_2004.h | 1 - .../libmbfl/filters/mbfilter_euc_jp_win.c | 64 +-- .../libmbfl/filters/mbfilter_euc_jp_win.h | 1 - .../libmbfl/filters/mbfilter_euc_kr.c | 36 -- .../libmbfl/filters/mbfilter_euc_kr.h | 1 - .../libmbfl/filters/mbfilter_euc_tw.c | 60 --- .../libmbfl/filters/mbfilter_euc_tw.h | 1 - .../libmbfl/filters/mbfilter_gb18030.c | 60 --- .../libmbfl/filters/mbfilter_gb18030.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_hz.c | 54 --- ext/mbstring/libmbfl/filters/mbfilter_hz.h | 1 - .../libmbfl/filters/mbfilter_iso2022_jp_ms.c | 102 ----- .../libmbfl/filters/mbfilter_iso2022_jp_ms.h | 1 - .../libmbfl/filters/mbfilter_iso2022_kr.c | 78 ---- .../libmbfl/filters/mbfilter_iso2022_kr.h | 1 - .../libmbfl/filters/mbfilter_iso2022jp_2004.c | 97 ----- .../libmbfl/filters/mbfilter_iso2022jp_2004.h | 1 - .../filters/mbfilter_iso2022jp_mobile.c | 7 - .../filters/mbfilter_iso2022jp_mobile.h | 1 - .../libmbfl/filters/mbfilter_iso8859_1.c | 6 - .../libmbfl/filters/mbfilter_iso8859_1.h | 1 - .../libmbfl/filters/mbfilter_iso8859_10.c | 6 - .../libmbfl/filters/mbfilter_iso8859_10.h | 1 - .../libmbfl/filters/mbfilter_iso8859_13.c | 6 - .../libmbfl/filters/mbfilter_iso8859_13.h | 1 - .../libmbfl/filters/mbfilter_iso8859_14.c | 6 - .../libmbfl/filters/mbfilter_iso8859_14.h | 1 - .../libmbfl/filters/mbfilter_iso8859_15.c | 6 - .../libmbfl/filters/mbfilter_iso8859_15.h | 1 - .../libmbfl/filters/mbfilter_iso8859_16.c | 6 - .../libmbfl/filters/mbfilter_iso8859_16.h | 1 - .../libmbfl/filters/mbfilter_iso8859_2.c | 6 - .../libmbfl/filters/mbfilter_iso8859_2.h | 1 - .../libmbfl/filters/mbfilter_iso8859_3.c | 16 - .../libmbfl/filters/mbfilter_iso8859_3.h | 1 - .../libmbfl/filters/mbfilter_iso8859_4.c | 6 - .../libmbfl/filters/mbfilter_iso8859_4.h | 1 - .../libmbfl/filters/mbfilter_iso8859_5.c | 6 - .../libmbfl/filters/mbfilter_iso8859_5.h | 1 - .../libmbfl/filters/mbfilter_iso8859_6.c | 16 - .../libmbfl/filters/mbfilter_iso8859_6.h | 1 - .../libmbfl/filters/mbfilter_iso8859_7.c | 16 - .../libmbfl/filters/mbfilter_iso8859_7.h | 1 - .../libmbfl/filters/mbfilter_iso8859_8.c | 16 - .../libmbfl/filters/mbfilter_iso8859_8.h | 1 - .../libmbfl/filters/mbfilter_iso8859_9.c | 6 - .../libmbfl/filters/mbfilter_iso8859_9.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_jis.c | 189 --------- ext/mbstring/libmbfl/filters/mbfilter_jis.h | 2 - ext/mbstring/libmbfl/filters/mbfilter_koi8r.c | 6 - ext/mbstring/libmbfl/filters/mbfilter_koi8r.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_koi8u.c | 6 - ext/mbstring/libmbfl/filters/mbfilter_koi8u.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_sjis.c | 32 -- ext/mbstring/libmbfl/filters/mbfilter_sjis.h | 1 - .../libmbfl/filters/mbfilter_sjis_2004.c | 7 - .../libmbfl/filters/mbfilter_sjis_2004.h | 1 - .../libmbfl/filters/mbfilter_sjis_mac.c | 7 - .../libmbfl/filters/mbfilter_sjis_mac.h | 2 - .../libmbfl/filters/mbfilter_sjis_mobile.c | 19 - .../libmbfl/filters/mbfilter_sjis_mobile.h | 4 - .../libmbfl/filters/mbfilter_sjis_open.c | 28 -- .../libmbfl/filters/mbfilter_sjis_open.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_ucs2.c | 27 -- ext/mbstring/libmbfl/filters/mbfilter_ucs2.h | 3 - ext/mbstring/libmbfl/filters/mbfilter_uhc.c | 48 --- ext/mbstring/libmbfl/filters/mbfilter_uhc.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_utf16.c | 126 ------ ext/mbstring/libmbfl/filters/mbfilter_utf16.h | 4 +- ext/mbstring/libmbfl/filters/mbfilter_utf32.c | 146 ------- ext/mbstring/libmbfl/filters/mbfilter_utf32.h | 4 +- ext/mbstring/libmbfl/filters/mbfilter_utf7.c | 57 --- ext/mbstring/libmbfl/filters/mbfilter_utf7.h | 1 - ext/mbstring/libmbfl/filters/mbfilter_utf8.c | 81 ---- ext/mbstring/libmbfl/filters/mbfilter_utf8.h | 1 - .../libmbfl/filters/mbfilter_utf8_mobile.c | 25 -- .../libmbfl/filters/mbfilter_utf8_mobile.h | 5 - ext/mbstring/libmbfl/mbfl/mbfilter.c | 225 ++++------- ext/mbstring/libmbfl/mbfl/mbfilter.h | 3 +- ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c | 6 - ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h | 1 - ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 10 +- ext/mbstring/libmbfl/mbfl/mbfl_ident.c | 254 ------------ ext/mbstring/libmbfl/mbfl/mbfl_ident.h | 70 ---- ext/mbstring/mbstring.c | 21 +- ext/mbstring/tests/bug45722.phpt | 2 +- ext/mbstring/tests/mb_convert_encoding.phpt | 26 +- ext/mbstring/tests/mb_detect_encoding.phpt | 3 +- 121 files changed, 118 insertions(+), 2817 deletions(-) delete mode 100644 ext/mbstring/libmbfl/mbfl/mbfl_ident.c delete mode 100644 ext/mbstring/libmbfl/mbfl/mbfl_ident.h diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 06e55746170..8c4b01fb523 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -161,7 +161,6 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/mbfl/mbfl_convert.c libmbfl/mbfl/mbfl_encoding.c libmbfl/mbfl/mbfl_filter_output.c - libmbfl/mbfl/mbfl_ident.c libmbfl/mbfl/mbfl_language.c libmbfl/mbfl/mbfl_memory_device.c libmbfl/mbfl/mbfl_string.c @@ -177,7 +176,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/nls/nls_tr.c libmbfl/nls/nls_ua.c ]) - PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h]) + PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h]) ]) dnl diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 93b8bc9a2f8..239446f6009 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -40,14 +40,14 @@ if (PHP_MBSTRING != "no") { ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \ mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \ - mbfl_filter_output.c mbfl_ident.c mbfl_language.c mbfl_memory_device.c \ + mbfl_filter_output.c mbfl_language.c mbfl_memory_device.c \ mbfl_string.c", "mbstring"); ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \ nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \ nls_ua.c nls_tr.c", "mbstring"); - PHP_INSTALL_HEADERS("ext/mbstring", "mbstring.h libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h"); + PHP_INSTALL_HEADERS("ext/mbstring", "mbstring.h libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h"); AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support'); diff --git a/ext/mbstring/libmbfl/filters/mbfilter_7bit.c b/ext/mbstring/libmbfl/filters/mbfilter_7bit.c index 331dc8e743c..bdf0cc78d97 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_7bit.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_7bit.c @@ -31,8 +31,6 @@ #include "mbfilter.h" #include "mbfilter_7bit.h" -static int mbfl_filt_ident_7bit(int c, mbfl_identify_filter *filter); - const mbfl_encoding mbfl_encoding_7bit = { mbfl_no_encoding_7bit, "7bit", @@ -44,12 +42,6 @@ const mbfl_encoding mbfl_encoding_7bit = { NULL }; -const struct mbfl_identify_vtbl vtbl_identify_7bit = { - mbfl_no_encoding_7bit, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_7bit -}; - const struct mbfl_convert_vtbl vtbl_8bit_7bit = { mbfl_no_encoding_8bit, mbfl_no_encoding_7bit, @@ -88,11 +80,3 @@ int mbfl_filt_conv_any_7bit(int c, mbfl_convert_filter *filter) } return c; } - -static int mbfl_filt_ident_7bit(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x80) { - filter->flag = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_7bit.h b/ext/mbstring/libmbfl/filters/mbfilter_7bit.h index f842a2ad3be..d0dfe2c5181 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_7bit.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_7bit.h @@ -34,7 +34,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_7bit; -extern const struct mbfl_identify_vtbl vtbl_identify_7bit; extern const struct mbfl_convert_vtbl vtbl_8bit_7bit; extern const struct mbfl_convert_vtbl vtbl_7bit_8bit; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_armscii8.c b/ext/mbstring/libmbfl/filters/mbfilter_armscii8.c index 55668fceb9c..70999f9102c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_armscii8.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_armscii8.c @@ -30,8 +30,6 @@ #include "mbfilter_armscii8.h" #include "unicode_table_armscii8.h" -static int mbfl_filt_ident_armscii8(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_armscii8_aliases[] = {"ArmSCII-8", "ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL}; const mbfl_encoding mbfl_encoding_armscii8 = { @@ -45,12 +43,6 @@ const mbfl_encoding mbfl_encoding_armscii8 = { &vtbl_wchar_armscii8 }; -const struct mbfl_identify_vtbl vtbl_identify_armscii8 = { - mbfl_no_encoding_armscii8, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_armscii8 -}; - const struct mbfl_convert_vtbl vtbl_wchar_armscii8 = { mbfl_no_encoding_wchar, mbfl_no_encoding_armscii8, @@ -108,11 +100,3 @@ int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_armscii8(int c, mbfl_identify_filter *filter) -{ - if (c >= armscii8_ucs_table_min && !armscii8_ucs_table[c - armscii8_ucs_table_min]) { - filter->flag = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_armscii8.h b/ext/mbstring/libmbfl/filters/mbfilter_armscii8.h index 0b14934995c..223aa8d2069 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_armscii8.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_armscii8.h @@ -28,7 +28,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_armscii8; -extern const struct mbfl_identify_vtbl vtbl_identify_armscii8; extern const struct mbfl_convert_vtbl vtbl_wchar_armscii8; extern const struct mbfl_convert_vtbl vtbl_armscii8_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ascii.c b/ext/mbstring/libmbfl/filters/mbfilter_ascii.c index e411d8a7073..bfe7d1da985 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ascii.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_ascii.c @@ -31,8 +31,6 @@ #include "mbfilter.h" #include "mbfilter_ascii.h" -static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_ascii_aliases[] = {"ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991", "US-ASCII", "ISO646-US", "us", "IBM367", "IBM-367", "cp367", "csASCII", NULL}; const mbfl_encoding mbfl_encoding_ascii = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_ascii = { &vtbl_wchar_ascii }; -const struct mbfl_identify_vtbl vtbl_identify_ascii = { - mbfl_no_encoding_ascii, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_ascii -}; - const struct mbfl_convert_vtbl vtbl_ascii_wchar = { mbfl_no_encoding_ascii, mbfl_no_encoding_wchar, @@ -101,16 +93,3 @@ int mbfl_filt_conv_wchar_ascii(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x20 && c < 0x80) { - ; - } else if (c == 0x0d || c == 0x0a || c == 0x09 || c == 0) { /* CR or LF or HTAB or null */ - ; - } else { - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ascii.h b/ext/mbstring/libmbfl/filters/mbfilter_ascii.h index 3c8aead9efc..ae048fbd09e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ascii.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_ascii.h @@ -34,7 +34,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_ascii; -extern const struct mbfl_identify_vtbl vtbl_identify_ascii; extern const struct mbfl_convert_vtbl vtbl_ascii_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_ascii; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.c b/ext/mbstring/libmbfl/filters/mbfilter_big5.c index 95c75a37e34..110150134c3 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_big5.c @@ -32,8 +32,6 @@ #include "unicode_table_big5.h" -static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -77,18 +75,6 @@ const mbfl_encoding mbfl_encoding_cp950 = { &vtbl_wchar_cp950 }; -const struct mbfl_identify_vtbl vtbl_identify_big5 = { - mbfl_no_encoding_big5, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_big5 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp950 = { - mbfl_no_encoding_cp950, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_big5 -}; - const struct mbfl_convert_vtbl vtbl_big5_wchar = { mbfl_no_encoding_big5, mbfl_no_encoding_wchar, @@ -322,28 +308,3 @@ mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter) -{ - int c1; - if (filter->encoding->no_encoding == mbfl_no_encoding_cp950) { - c1 = 0x80; - } else { - c1 = 0xa0; - } - - if (filter->status) { /* kanji second char */ - if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > c1 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.h b/ext/mbstring/libmbfl/filters/mbfilter_big5.h index c5a0955f8d7..e475b6bd0c5 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_big5.h @@ -33,12 +33,10 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_big5; -extern const struct mbfl_identify_vtbl vtbl_identify_big5; extern const struct mbfl_convert_vtbl vtbl_big5_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_big5; extern const mbfl_encoding mbfl_encoding_cp950; -extern const struct mbfl_identify_vtbl vtbl_identify_cp950; extern const struct mbfl_convert_vtbl vtbl_cp950_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp950; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c index cf5107b0d60..a85d19bb282 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c @@ -31,8 +31,6 @@ #include "mbfilter_cp1251.h" #include "unicode_table_cp1251.h" -static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL}; const mbfl_encoding mbfl_encoding_cp1251 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp1251 = { &vtbl_wchar_cp1251 }; -const struct mbfl_identify_vtbl vtbl_identify_cp1251 = { - mbfl_no_encoding_cp1251, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp1251 -}; - const struct mbfl_convert_vtbl vtbl_wchar_cp1251 = { mbfl_no_encoding_wchar, mbfl_no_encoding_cp1251, @@ -107,12 +99,3 @@ int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter) -{ - /* Only one byte in this single-byte encoding is not used */ - if (c == 0x98) { - filter->flag = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1251.h b/ext/mbstring/libmbfl/filters/mbfilter_cp1251.h index b76cc9f6806..68ebd0932be 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1251.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1251.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp1251; -extern const struct mbfl_identify_vtbl vtbl_identify_cp1251; extern const struct mbfl_convert_vtbl vtbl_wchar_cp1251; extern const struct mbfl_convert_vtbl vtbl_cp1251_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c index af5a879e166..9770ba87e49 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1252.c @@ -31,8 +31,6 @@ #include "mbfilter_cp1252.h" #include "unicode_table_cp1252.h" -static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL}; const mbfl_encoding mbfl_encoding_cp1252 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp1252 = { &vtbl_wchar_cp1252 }; -const struct mbfl_identify_vtbl vtbl_identify_cp1252 = { - mbfl_no_encoding_cp1252, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp1252 -}; - const struct mbfl_convert_vtbl vtbl_cp1252_wchar = { mbfl_no_encoding_cp1252, mbfl_no_encoding_wchar, @@ -115,11 +107,3 @@ int mbfl_filt_conv_cp1252_wchar(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x80 && c < 0xA0 && !cp1252_ucs_table[c - 0x80]) { - filter->flag = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1252.h b/ext/mbstring/libmbfl/filters/mbfilter_cp1252.h index afb2fffa9b9..73017e33b2e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1252.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1252.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp1252; -extern const struct mbfl_identify_vtbl vtbl_identify_cp1252; extern const struct mbfl_convert_vtbl vtbl_cp1252_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp1252; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c index e0ca60e7c08..910ddda1c68 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c @@ -31,8 +31,6 @@ #include "mbfilter_cp1254.h" #include "unicode_table_cp1254.h" -static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL}; const mbfl_encoding mbfl_encoding_cp1254 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp1254 = { &vtbl_wchar_cp1254 }; -const struct mbfl_identify_vtbl vtbl_identify_cp1254 = { - mbfl_no_encoding_cp1254, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp1254 -}; - const struct mbfl_convert_vtbl vtbl_cp1254_wchar = { mbfl_no_encoding_cp1254, mbfl_no_encoding_wchar, @@ -107,11 +99,3 @@ int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter) CK((*filter->output_function)(s, filter->data)); return c; } - -static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter) -{ - if (c >= 0x81 && c <= 0x9E && !cp1254_ucs_table[c - cp1254_ucs_table_min]) { - filter->flag = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h index 15a63eae30a..21327e3b7f9 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp1254; -extern const struct mbfl_identify_vtbl vtbl_identify_cp1254; extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c index 798487d3428..a4d1724e8fa 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c @@ -36,10 +36,6 @@ typedef struct _mbfl_filt_conv_wchar_cp50220_ctx { mbfl_convert_filter last; } mbfl_filt_conv_wchar_cp50220_ctx; -static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter); static void mbfl_filt_conv_wchar_cp50220_ctor(mbfl_convert_filter *filt); static void mbfl_filt_conv_wchar_cp50220_dtor(mbfl_convert_filter *filt); static void mbfl_filt_conv_wchar_cp50220_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest); @@ -99,36 +95,6 @@ const mbfl_encoding mbfl_encoding_cp50222 = { &vtbl_wchar_cp50222 }; -const struct mbfl_identify_vtbl vtbl_identify_jis_ms = { - mbfl_no_encoding_jis_ms, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_jis_ms -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50220 = { - mbfl_no_encoding_cp50220, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50220 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50220raw = { - mbfl_no_encoding_cp50220raw, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50220 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50221 = { - mbfl_no_encoding_cp50221, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50221 -}; - -const struct mbfl_identify_vtbl vtbl_identify_cp50222 = { - mbfl_no_encoding_cp50222, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp50222 -}; - const struct mbfl_convert_vtbl vtbl_jis_ms_wchar = { mbfl_no_encoding_jis_ms, mbfl_no_encoding_wchar, @@ -948,332 +914,3 @@ mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) return 0; } - - -static int mbfl_filt_ident_jis_ms(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - if (c == 0x1b) { - goto retry; - } else if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_cp50220(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_cp50221(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_cp50222(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h index e7cde7dff83..e97cd0fab8b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h @@ -37,11 +37,7 @@ extern const mbfl_encoding mbfl_encoding_cp50220; extern const mbfl_encoding mbfl_encoding_cp50220raw; extern const mbfl_encoding mbfl_encoding_cp50221; extern const mbfl_encoding mbfl_encoding_cp50222; -extern const struct mbfl_identify_vtbl vtbl_identify_jis_ms; -extern const struct mbfl_identify_vtbl vtbl_identify_cp50220; -extern const struct mbfl_identify_vtbl vtbl_identify_cp50220raw; -extern const struct mbfl_identify_vtbl vtbl_identify_cp50221; -extern const struct mbfl_identify_vtbl vtbl_identify_cp50222; + extern const struct mbfl_convert_vtbl vtbl_jis_ms_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_jis_ms; extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c index 9b893bc6388..cb95469408b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c @@ -34,8 +34,6 @@ #include "unicode_table_jis.h" #include "cp932_table.h" -static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -55,15 +53,8 @@ static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 }; - static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; -const struct mbfl_identify_vtbl vtbl_identify_cp51932 = { - mbfl_no_encoding_cp51932, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp51932 -}; - const mbfl_encoding mbfl_encoding_cp51932 = { mbfl_no_encoding_cp51932, "CP51932", @@ -299,40 +290,3 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ - filter->status = 1; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got first half */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* got 0x8e */ - if (c < 0xa1 || c > 0xdf) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h index bbf56f48ca8..f48ec7cb3d4 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp51932; -extern const struct mbfl_identify_vtbl vtbl_identify_cp51932; extern const struct mbfl_convert_vtbl vtbl_cp51932_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp51932; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.c b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c index 50390c179f0..941f1eee350 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp850.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c @@ -40,12 +40,6 @@ const mbfl_encoding mbfl_encoding_cp850 = { &vtbl_wchar_cp850 }; -const struct mbfl_identify_vtbl vtbl_identify_cp850 = { - mbfl_no_encoding_cp850, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_wchar_cp850 = { mbfl_no_encoding_wchar, mbfl_no_encoding_cp850, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.h b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h index 2a0e8e9e65a..d823f2975de 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp850.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h @@ -27,7 +27,6 @@ #define MBFL_MBFILTER_CP850_H extern const mbfl_encoding mbfl_encoding_cp850; -extern const struct mbfl_identify_vtbl vtbl_identify_cp850; extern const struct mbfl_convert_vtbl vtbl_wchar_cp850; extern const struct mbfl_convert_vtbl vtbl_cp850_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp866.c b/ext/mbstring/libmbfl/filters/mbfilter_cp866.c index de56b2c4507..f0aeece2978 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp866.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp866.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_cp866 = { &vtbl_wchar_cp866 }; -const struct mbfl_identify_vtbl vtbl_identify_cp866 = { - mbfl_no_encoding_cp866, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_wchar_cp866 = { mbfl_no_encoding_wchar, mbfl_no_encoding_cp866, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp866.h b/ext/mbstring/libmbfl/filters/mbfilter_cp866.h index e9eb25a5413..a580132ce2a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp866.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp866.h @@ -31,7 +31,6 @@ #define MBFL_MBFILTER_CP866_H extern const mbfl_encoding mbfl_encoding_cp866; -extern const struct mbfl_identify_vtbl vtbl_identify_cp866; extern const struct mbfl_convert_vtbl vtbl_wchar_cp866; extern const struct mbfl_convert_vtbl vtbl_cp866_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 511d8644651..351a94c81c5 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -33,8 +33,6 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,12 +65,6 @@ const mbfl_encoding mbfl_encoding_cp932 = { &vtbl_wchar_cp932 }; -const struct mbfl_identify_vtbl vtbl_identify_cp932 = { - mbfl_no_encoding_cp932, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp932 -}; - const struct mbfl_convert_vtbl vtbl_cp932_wchar = { mbfl_no_encoding_cp932, mbfl_no_encoding_wchar, @@ -323,23 +315,3 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_cp932(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ - ; - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h b/ext/mbstring/libmbfl/filters/mbfilter_cp932.h index f0b6daeac7f..031276ddf14 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp932; -extern const struct mbfl_identify_vtbl vtbl_identify_cp932; extern const struct mbfl_convert_vtbl vtbl_cp932_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp932; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c index d73954267fe..4cfd1af8779 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c @@ -32,8 +32,6 @@ #define UNICODE_TABLE_CP936_DEF #include "unicode_table_cp936.h" -static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,12 +64,6 @@ const mbfl_encoding mbfl_encoding_cp936 = { &vtbl_wchar_cp936 }; -const struct mbfl_identify_vtbl vtbl_identify_cp936 = { - mbfl_no_encoding_cp936, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_cp936 -}; - const struct mbfl_convert_vtbl vtbl_cp936_wchar = { mbfl_no_encoding_cp936, mbfl_no_encoding_wchar, @@ -283,21 +275,3 @@ mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.h b/ext/mbstring/libmbfl/filters/mbfilter_cp936.h index cc5e9bd3333..d10391f5d22 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp936.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_cp936; -extern const struct mbfl_identify_vtbl vtbl_identify_cp936; extern const struct mbfl_convert_vtbl vtbl_cp936_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_cp936; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c index 0a7edaa9271..6ee0bf9bd0c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c @@ -32,8 +32,6 @@ #include "unicode_table_cp936.h" -static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,12 +64,6 @@ const mbfl_encoding mbfl_encoding_euc_cn = { &vtbl_wchar_euccn }; -const struct mbfl_identify_vtbl vtbl_identify_euccn = { - mbfl_no_encoding_euc_cn, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_euccn -}; - const struct mbfl_convert_vtbl vtbl_euccn_wchar = { mbfl_no_encoding_euc_cn, mbfl_no_encoding_wchar, @@ -209,31 +201,3 @@ mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h index 31251fcf6dc..7ef92d8b4b8 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_cn; -extern const struct mbfl_identify_vtbl vtbl_identify_euccn; extern const struct mbfl_convert_vtbl vtbl_euccn_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_euccn; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c index 44c5f3c4163..1589ae7966d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c @@ -33,7 +33,6 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter); static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter); const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ @@ -68,12 +67,6 @@ const mbfl_encoding mbfl_encoding_euc_jp = { &vtbl_wchar_eucjp }; -const struct mbfl_identify_vtbl vtbl_identify_eucjp = { - mbfl_no_encoding_euc_jp, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_eucjp -}; - const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { mbfl_no_encoding_euc_jp, mbfl_no_encoding_wchar, @@ -252,81 +245,3 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) return c; } - -/* Not all byte sequences in JIS X 0208 which would otherwise be valid are - * actually mapped to a character */ -static inline int in_unused_jisx0208_range(int c1, int c2) -{ - /* `c1`, `c2` are kuten codes */ - unsigned int s = (c1 - 0x21)*94 + c2 - 0x21; - return s >= jisx0208_ucs_table_size || !jisx0208_ucs_table[s]; -} - -static inline int in_unused_jisx0212_range(int c1, int c2) -{ - unsigned int s = (c1 - 0x21)*94 + c2 - 0x21; - return s >= jisx0212_ucs_table_size || !jisx0212_ucs_table[s]; -} - -int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter) -{ - unsigned char ku, ten; - - switch (filter->status & 0xF) { - case 0: /* latin */ - if (c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { - /* JIS X 0208, first byte - * In EUC-JP, each such byte ranges from 0xA1-0xFE; however, - * the bytes of JIS X 0208 kuten codes range from 0x21-0x7E */ - filter->status = ((c - 0xA1 + 0x21) << 8) | 1; - } else if (c == 0x8e) { /* JIS X 0201 */ - filter->status = 2; - } else if (c == 0x8f) { /* JIS X 0212 */ - filter->status = 3; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* 2nd byte of JIS X 0208 */ - ku = filter->status >> 8; - ten = c - 0xA1 + 0x21; - if (c < 0xa1 || c > 0xfe || in_unused_jisx0208_range(ku, ten)) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* JIS X 0201 */ - if (c < 0xa1 || c > 0xdf) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 3: /* JIS X 0212 */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } else { - filter->status = ((c - 0xA1 + 0x21) << 8) | 4; - } - break; - - case 4: /* JIS X 0212, final byte */ - ku = filter->status >> 8; - ten = c - 0xA1 + 0x21; - if (c < 0xa1 || c > 0xfe || in_unused_jisx0212_range(ku, ten)) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h index e1bd5653433..cc7aa3a6bff 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_jp; -extern const struct mbfl_identify_vtbl vtbl_identify_eucjp; extern const struct mbfl_convert_vtbl vtbl_eucjp_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c index c6c708b3378..d832358e1d8 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.c @@ -31,7 +31,6 @@ #include "mbfilter_euc_jp_2004.h" #include "mbfilter_sjis_2004.h" -extern int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter); extern const unsigned char mblen_table_eucjp[]; static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL}; @@ -47,12 +46,6 @@ const mbfl_encoding mbfl_encoding_eucjp2004 = { &vtbl_wchar_eucjp2004 }; -const struct mbfl_identify_vtbl vtbl_identify_eucjp2004 = { - mbfl_no_encoding_eucjp2004, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_eucjp -}; - const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = { mbfl_no_encoding_eucjp2004, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h index 55c06aef98b..affdd447f57 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_eucjp2004; -extern const struct mbfl_identify_vtbl vtbl_identify_eucjp2004; extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c index 7c1aebdea5b..b93fc9101a4 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c @@ -34,8 +34,6 @@ #include "unicode_table_jis.h" #include "cp932_table.h" -static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -55,15 +53,7 @@ static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 }; - -static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", - "eucJP-ms", NULL}; - -const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { - mbfl_no_encoding_eucjp_win, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_eucjp_win -}; +static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL}; const mbfl_encoding mbfl_encoding_eucjp_win = { mbfl_no_encoding_eucjp_win, @@ -373,55 +363,3 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_eucjp_win(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ - filter->status = 1; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got first half */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* got 0x8e */ - if (c < 0xa1 || c > 0xdf) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 3: /* got 0x8f */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status++; - break; - case 4: /* got 0x8f */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h index f17058d523e..bb1e4dc392d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_eucjp_win; -extern const struct mbfl_identify_vtbl vtbl_identify_eucjpwin; extern const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c index 0c44a998601..3f423f59694 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c @@ -31,8 +31,6 @@ #include "mbfilter_euc_kr.h" #include "unicode_table_uhc.h" -static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -65,12 +63,6 @@ const mbfl_encoding mbfl_encoding_euc_kr = { &vtbl_wchar_euckr }; -const struct mbfl_identify_vtbl vtbl_identify_euckr = { - mbfl_no_encoding_euc_kr, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_euckr -}; - const struct mbfl_convert_vtbl vtbl_euckr_wchar = { mbfl_no_encoding_euc_kr, mbfl_no_encoding_wchar, @@ -223,31 +215,3 @@ mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h index 1534d2bedc5..e0c13cf53ad 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_kr; -extern const struct mbfl_identify_vtbl vtbl_identify_euckr; extern const struct mbfl_convert_vtbl vtbl_euckr_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_euckr; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c index 74bca29d47e..68dd539b721 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c @@ -32,8 +32,6 @@ #include "unicode_table_cns11643.h" -static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,12 +65,6 @@ const mbfl_encoding mbfl_encoding_euc_tw = { &vtbl_wchar_euctw }; -const struct mbfl_identify_vtbl vtbl_identify_euctw = { - mbfl_no_encoding_euc_tw, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_euctw -}; - const struct mbfl_convert_vtbl vtbl_euctw_wchar = { mbfl_no_encoding_euc_tw, mbfl_no_encoding_wchar, @@ -271,55 +263,3 @@ mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter) } return c; } - -static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */ - filter->status = 1; - } else if (c == 0x8e) { /* DBCS lead byte */ - filter->status = 2; - } else { /* bad */ - filter->flag = 1; - } - break; - - case 1: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 2: /* got lead byte */ - if (c >= 0xa1 && c < 0xaf) { /* ok */ - filter->status = 3; - } else { - filter->flag = 1; /* bad */ - } - break; - - case 3: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 4; - break; - - case 4: /* got lead byte */ - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h index ed1f0912597..9c2ffa48021 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_euc_tw; -extern const struct mbfl_identify_vtbl vtbl_identify_euctw; extern const struct mbfl_convert_vtbl vtbl_euctw_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_euctw; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c index ec6e3973a53..ac59ec85504 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c @@ -33,8 +33,6 @@ #include "unicode_table_cp936.h" #include "unicode_table_gb18030.h" -static int mbfl_filt_ident_gb18030(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL}; const mbfl_encoding mbfl_encoding_gb18030 = { @@ -48,12 +46,6 @@ const mbfl_encoding mbfl_encoding_gb18030 = { &vtbl_wchar_gb18030 }; -const struct mbfl_identify_vtbl vtbl_identify_gb18030 = { - mbfl_no_encoding_gb18030, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_gb18030 -}; - const struct mbfl_convert_vtbl vtbl_gb18030_wchar = { mbfl_no_encoding_gb18030, mbfl_no_encoding_wchar, @@ -414,55 +406,3 @@ mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_gb18030(int c, mbfl_identify_filter *filter) -{ - int c1; - - c1 = (filter->status >> 8) & 0xff; - filter->status &= 0xff; - - if (filter->status == 0) { - if (c <= 0x80 || c == 0xff) { - filter->status = 0; - } else { - filter->status = 1; - filter->status |= (c << 8); - } - } else if (filter->status == 1) { /* dbcs/qbcs 2nd byte */ - if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c >= 0x30 && c <= 0x39) { /* qbcs */ - filter->status = 2; - } else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) { - filter->status = 0; /* UDA part 1,2 */ - } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { - filter->status = 0; /* UDA part 3 */ - } else if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) || - (c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) || - (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) || - (c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) || - (c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) { - filter->status = 0; /* DBCS */ - } else { - filter->flag = 1; /* bad */ - filter->status = 0; - } - } else if (filter->status == 2) { /* qbcs 3rd byte */ - if (c > 0x80 && c < 0xff) { - filter->status = 3; - } else { - filter->flag = 1; /* bad */ - filter->status = 0; - } - } else if (filter->status == 3) { /* qbcs 4th byte */ - if (c >= 0x30 && c < 0x40) { - filter->status = 0; - } else { - filter->flag = 1; /* bad */ - filter->status = 0; - } - } else { /* bad */ - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h index dc7d51b6e2c..e7f0eae16bf 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_gb18030; -extern const struct mbfl_identify_vtbl vtbl_identify_gb18030; extern const struct mbfl_convert_vtbl vtbl_gb18030_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_gb18030; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.c b/ext/mbstring/libmbfl/filters/mbfilter_hz.c index ceac85816cf..bf3f8c4b774 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_hz.c @@ -32,8 +32,6 @@ #include "unicode_table_cp936.h" -static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter); - const mbfl_encoding mbfl_encoding_hz = { mbfl_no_encoding_hz, "HZ", @@ -45,12 +43,6 @@ const mbfl_encoding mbfl_encoding_hz = { &vtbl_wchar_hz }; -const struct mbfl_identify_vtbl vtbl_identify_hz = { - mbfl_no_encoding_hz, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_hz -}; - const struct mbfl_convert_vtbl vtbl_hz_wchar = { mbfl_no_encoding_hz, mbfl_no_encoding_wchar, @@ -225,49 +217,3 @@ mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter) filter->status &= 0xff; return 0; } - -static int mbfl_filt_ident_hz(int c, mbfl_identify_filter *filter) -{ - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: GB2312 */ - case 0: - if (c == 0x7e) { - filter->status += 2; - } else if (filter->status == 0x10 && c > 0x20 && c < 0x7f) { /* DBCS first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x11: GB2312 second char */ - case 1: - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - case 2: - if (c == 0x7d) { /* '}' */ - filter->status = 0; - } else if (c == 0x7b) { /* '{' */ - filter->status = 0x10; - } else if (c == 0x7e) { /* '~' */ - filter->status = 0; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.h b/ext/mbstring/libmbfl/filters/mbfilter_hz.h index e7e35cacc81..6b1dfb1564c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_hz.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_hz; -extern const struct mbfl_identify_vtbl vtbl_identify_hz; extern const struct mbfl_convert_vtbl vtbl_hz_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_hz; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c index 32f4072ce2d..950365045c8 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -34,8 +34,6 @@ #include "unicode_table_jis.h" #include "cp932_table.h" -int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; const mbfl_encoding mbfl_encoding_2022jpms = { @@ -49,12 +47,6 @@ const mbfl_encoding mbfl_encoding_2022jpms = { &vtbl_wchar_2022jpms }; -const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { - mbfl_no_encoding_2022jpms, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_2022jpms -}; - const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { mbfl_no_encoding_2022jpms, mbfl_no_encoding_wchar, @@ -429,97 +421,3 @@ mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) return 0; } - -int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0xa0: X UDC */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0xa1: UDC second char */ - case 1: - filter->status &= ~0xf; - if (c == 0x1b) { - goto retry; - } else if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x3f) { /* '?' */ - filter->status = 0xa0; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h index 7c40b192466..fdc85183d7e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_2022jpms; -extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms; extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c index 8df1d6f9b56..feb29a0a2ab 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c @@ -31,8 +31,6 @@ #include "mbfilter_iso2022_kr.h" #include "unicode_table_uhc.h" -static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter); - const mbfl_encoding mbfl_encoding_2022kr = { mbfl_no_encoding_2022kr, "ISO-2022-KR", @@ -44,12 +42,6 @@ const mbfl_encoding mbfl_encoding_2022kr = { &vtbl_wchar_2022kr }; -const struct mbfl_identify_vtbl vtbl_identify_2022kr = { - mbfl_no_encoding_2022kr, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_2022kr -}; - const struct mbfl_convert_vtbl vtbl_wchar_2022kr = { mbfl_no_encoding_wchar, mbfl_no_encoding_2022kr, @@ -282,73 +274,3 @@ mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) return 0; } - -static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: KSC5601 mode */ -/* case 0x20: KSC5601 DBCS */ -/* case 0x40: KSC5601 SBCS */ - case 0: - if (!(filter->status & 0x10)) { - if (c == 0x1b) - filter->status += 2; - } else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x21: KSC5601 second char */ - case 1: - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x29) { /* ')' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $) */ - case 5: - if (c == 0x43) { /* 'C' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h index 7277c5f11c5..a00da5bbaca 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_2022kr; -extern const struct mbfl_identify_vtbl vtbl_identify_2022kr; extern const struct mbfl_convert_vtbl vtbl_wchar_2022kr; extern const struct mbfl_convert_vtbl vtbl_2022kr_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c index b381f09bea5..ce9267a2ecd 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.c @@ -34,7 +34,6 @@ #include "unicode_table_jis.h" extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter); -static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter); const mbfl_encoding mbfl_encoding_2022jp_2004 = { mbfl_no_encoding_2022jp_2004, @@ -47,12 +46,6 @@ const mbfl_encoding mbfl_encoding_2022jp_2004 = { &vtbl_wchar_2022jp_2004 }; -const struct mbfl_identify_vtbl vtbl_identify_2022jp_2004 = { - mbfl_no_encoding_2022jp_2004, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_2022jp_2004 -}; - const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = { mbfl_no_encoding_2022jp_2004, mbfl_no_encoding_wchar, @@ -72,93 +65,3 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = { mbfl_filt_conv_jis2004_flush, NULL, }; - -static int mbfl_filt_ident_2022jp_2004(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x80: X 0212 */ -/* case 0x90: X 0213 plane 1 */ -/* case 0xa0: X 0213 plane 2 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x42) { /* 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x51) { /* JIS X 0213 plane 1 */ - filter->status = 0x90; - } else if (c == 0x50) { /* JIS X 0213 plane 2 */ - filter->status = 0xa0; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.h index 929559b87f9..58b2dd31d8c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_2004.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_2022jp_2004; -extern const struct mbfl_identify_vtbl vtbl_identify_2022jp_2004; extern const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c index 00f0ddeda84..e6300675e81 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c @@ -36,7 +36,6 @@ #include "cp932_table.h" extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter); -extern int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; @@ -51,12 +50,6 @@ const mbfl_encoding mbfl_encoding_2022jp_kddi = { &vtbl_wchar_2022jp_kddi }; -const struct mbfl_identify_vtbl vtbl_identify_2022jp_kddi = { - mbfl_no_encoding_2022jp_kddi, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_2022jpms -}; - const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = { mbfl_no_encoding_2022jp_kddi, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h index a375e8df0e8..a53ec2c36f0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_2022jp_kddi; -extern const struct mbfl_identify_vtbl vtbl_identify_2022jp_kddi; extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.c index 899234f3b05..6c091e0fff0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.c @@ -43,12 +43,6 @@ const mbfl_encoding mbfl_encoding_8859_1 = { &vtbl_wchar_8859_1 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_1 = { - mbfl_no_encoding_8859_1, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_1_wchar = { mbfl_no_encoding_8859_1, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.h index b593f10f54e..91223d7738d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_1.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_1; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_1; extern const struct mbfl_convert_vtbl vtbl_8859_1_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_1; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.c index 62179fa001b..39288ec5959 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_10 = { &vtbl_wchar_8859_10 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_10 = { - mbfl_no_encoding_8859_10, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_10_wchar = { mbfl_no_encoding_8859_10, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.h index f9133404de3..7027ae5228d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_10.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_10; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_10; extern const struct mbfl_convert_vtbl vtbl_8859_10_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_10; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.c index 88651baf652..e44416dbc7b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_13 = { &vtbl_wchar_8859_13 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_13 = { - mbfl_no_encoding_8859_13, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_13_wchar = { mbfl_no_encoding_8859_13, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.h index dcd60755944..9240cadd2f3 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_13.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_13; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_13; extern const struct mbfl_convert_vtbl vtbl_8859_13_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_13; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.c index 515637ef047..1da474c0a5d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_14 = { &vtbl_wchar_8859_14 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_14 = { - mbfl_no_encoding_8859_14, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_14_wchar = { mbfl_no_encoding_8859_14, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.h index 90531e668a1..6f62e684f84 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_14.h @@ -16,7 +16,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_14; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_14; extern const struct mbfl_convert_vtbl vtbl_8859_14_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_14; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.c index 070d08dbe14..25ea9ae0f46 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_15 = { &vtbl_wchar_8859_15 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_15 = { - mbfl_no_encoding_8859_15, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_15_wchar = { mbfl_no_encoding_8859_15, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.h index a8fb339b0db..839889a7f43 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_15.h @@ -16,7 +16,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_15; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_15; extern const struct mbfl_convert_vtbl vtbl_8859_15_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_15; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c index c1035020110..84025b1d535 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_16 = { &vtbl_wchar_8859_16 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_16 = { - mbfl_no_encoding_8859_16, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_16_wchar = { mbfl_no_encoding_8859_16, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h index a1a53d7909f..e2b2ef21adc 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h @@ -13,7 +13,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_16; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_16; extern const struct mbfl_convert_vtbl vtbl_8859_16_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_16; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.c index 2aab9d2c168..37a34d5b5fa 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_2 = { &vtbl_wchar_8859_2 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_2 = { - mbfl_no_encoding_8859_2, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_2_wchar = { mbfl_no_encoding_8859_2, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.h index f344168a183..d0a8dc609ff 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_2.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_2; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_2; extern const struct mbfl_convert_vtbl vtbl_8859_2_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_2; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c index 5cd3bfce1d9..9e3e5b5d0c5 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c @@ -31,8 +31,6 @@ #include "mbfilter_iso8859_3.h" #include "unicode_table_iso8859_3.h" -static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_8859_3_aliases[] = {"ISO8859-3", "latin3", NULL}; const mbfl_encoding mbfl_encoding_8859_3 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_3 = { &vtbl_wchar_8859_3 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_3 = { - mbfl_no_encoding_8859_3, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_iso8859_3 -}; - const struct mbfl_convert_vtbl vtbl_8859_3_wchar = { mbfl_no_encoding_8859_3, mbfl_no_encoding_wchar, @@ -131,11 +123,3 @@ int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter) -{ - if (c >= 0xA0 && !iso8859_3_ucs_table[c - 0xA0]) { - filter->status = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.h index 746085373a5..f5f920b8d53 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_3; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_3; extern const struct mbfl_convert_vtbl vtbl_8859_3_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_3; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.c index a869230355e..8698ac7c005 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_4 = { &vtbl_wchar_8859_4 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_4 = { - mbfl_no_encoding_8859_4, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_4_wchar = { mbfl_no_encoding_8859_4, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.h index fe4dcdc6963..df74576f48d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_4.h @@ -31,7 +31,6 @@ #define MBFL_MBFILTER_ISO8859_4_H extern const mbfl_encoding mbfl_encoding_8859_4; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_4; extern const struct mbfl_convert_vtbl vtbl_8859_4_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_4; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.c index 33698e563d5..1ccf142c418 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_5 = { &vtbl_wchar_8859_5 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_5 = { - mbfl_no_encoding_8859_5, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_5_wchar = { mbfl_no_encoding_8859_5, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.h index 8b098441419..71f997d5bf6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_5.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_5; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_5; extern const struct mbfl_convert_vtbl vtbl_8859_5_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_5; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c index 8b1fc4bbadd..f0dacb8cbd1 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.c @@ -31,8 +31,6 @@ #include "mbfilter_iso8859_6.h" #include "unicode_table_iso8859_6.h" -static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_8859_6_aliases[] = {"ISO8859-6", "arabic", NULL}; const mbfl_encoding mbfl_encoding_8859_6 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_6 = { &vtbl_wchar_8859_6 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_6 = { - mbfl_no_encoding_8859_6, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_iso8859_6 -}; - const struct mbfl_convert_vtbl vtbl_8859_6_wchar = { mbfl_no_encoding_8859_6, mbfl_no_encoding_wchar, @@ -131,11 +123,3 @@ int mbfl_filt_conv_wchar_8859_6(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter) -{ - if (c >= 0xA0 && !iso8859_6_ucs_table[c - 0xA0]) { - filter->status = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.h index c7e16c7e0bc..4c75372ab56 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_6.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_6; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_6; extern const struct mbfl_convert_vtbl vtbl_8859_6_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_6; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.c index e87ae0ec36d..1dc0f3f2e89 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.c @@ -31,8 +31,6 @@ #include "mbfilter_iso8859_7.h" #include "unicode_table_iso8859_7.h" -static int mbfl_filt_ident_iso8859_7(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_8859_7_aliases[] = {"ISO8859-7", "greek", NULL}; const mbfl_encoding mbfl_encoding_8859_7 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_7 = { &vtbl_wchar_8859_7 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_7 = { - mbfl_no_encoding_8859_7, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_iso8859_7 -}; - const struct mbfl_convert_vtbl vtbl_8859_7_wchar = { mbfl_no_encoding_8859_7, mbfl_no_encoding_wchar, @@ -131,11 +123,3 @@ int mbfl_filt_conv_wchar_8859_7(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_iso8859_7(int c, mbfl_identify_filter *filter) -{ - /* These bytes are not mapped to any character in ISO-8859-7 */ - if (c == 0xAE || c == 0xD2 || c == 0xFF) - filter->status = 1; - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.h index 8421edb929f..cd38d0c06fb 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_7.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_7; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_7; extern const struct mbfl_convert_vtbl vtbl_8859_7_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_7; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.c index 3436d8d3ead..827b4dabeaf 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.c @@ -31,8 +31,6 @@ #include "mbfilter_iso8859_8.h" #include "unicode_table_iso8859_8.h" -static int mbfl_filt_ident_iso8859_8(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_8859_8_aliases[] = {"ISO8859-8", "hebrew", NULL}; const mbfl_encoding mbfl_encoding_8859_8 = { @@ -46,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_8 = { &vtbl_wchar_8859_8 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_8 = { - mbfl_no_encoding_8859_8, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_iso8859_8 -}; - const struct mbfl_convert_vtbl vtbl_8859_8_wchar = { mbfl_no_encoding_8859_8, mbfl_no_encoding_wchar, @@ -130,11 +122,3 @@ int mbfl_filt_conv_wchar_8859_8(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_iso8859_8(int c, mbfl_identify_filter *filter) -{ - if (c >= 0xA0 && !iso8859_8_ucs_table[c - 0xA0]) { - filter->status = 1; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.h index ee679814f1a..ab02ae831cd 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_8.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_8; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_8; extern const struct mbfl_convert_vtbl vtbl_8859_8_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_8; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.c index f4241982cf6..12e1a44d1a0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_8859_9 = { &vtbl_wchar_8859_9 }; -const struct mbfl_identify_vtbl vtbl_identify_8859_9 = { - mbfl_no_encoding_8859_9, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8859_9_wchar = { mbfl_no_encoding_8859_9, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.h index 5a612371137..e628a8219d0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_9.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_8859_9; -extern const struct mbfl_identify_vtbl vtbl_identify_8859_9; extern const struct mbfl_convert_vtbl vtbl_8859_9_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_8859_9; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index b08eec38b0a..ba43872d83e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -33,9 +33,6 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter); - const mbfl_encoding mbfl_encoding_jis = { mbfl_no_encoding_jis, "JIS", @@ -58,18 +55,6 @@ const mbfl_encoding mbfl_encoding_2022jp = { &vtbl_wchar_2022jp }; -const struct mbfl_identify_vtbl vtbl_identify_jis = { - mbfl_no_encoding_jis, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_jis -}; - -const struct mbfl_identify_vtbl vtbl_identify_2022jp = { - mbfl_no_encoding_2022jp, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_2022jp -}; - const struct mbfl_convert_vtbl vtbl_jis_wchar = { mbfl_no_encoding_jis, mbfl_no_encoding_wchar, @@ -483,177 +468,3 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) return 0; } - -static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - if (c == 0x1b) { - goto retry; - } else if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ ( */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter) -{ -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x80: X 0208 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (filter->status == 0x80 && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - ; - } else { - filter->flag = 1; /* bad */ - } - break; - -/* case 0x81: X 0208 second char */ - case 1: - if (c == 0x1b) { - filter->status++; - } else { - filter->status &= ~0xf; - if (c < 0x21 || c > 0x7e) { /* bad */ - filter->flag = 1; - } - } - break; - - /* ESC */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC $ */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - /* ESC ( */ - case 5: - if (c == 0x42) { /* 'B' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else { - filter->flag = 1; /* bad */ - filter->status &= ~0xf; - goto retry; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.h b/ext/mbstring/libmbfl/filters/mbfilter_jis.h index e7a164167e4..55787c9acb7 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.h @@ -34,8 +34,6 @@ extern const mbfl_encoding mbfl_encoding_jis; extern const mbfl_encoding mbfl_encoding_2022jp; -extern const struct mbfl_identify_vtbl vtbl_identify_2022jp; -extern const struct mbfl_identify_vtbl vtbl_identify_jis; extern const struct mbfl_convert_vtbl vtbl_jis_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_jis; extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8r.c b/ext/mbstring/libmbfl/filters/mbfilter_koi8r.c index 5c76fc066cd..de82c473cfa 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_koi8r.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8r.c @@ -44,12 +44,6 @@ const mbfl_encoding mbfl_encoding_koi8r = { &vtbl_wchar_koi8r }; -const struct mbfl_identify_vtbl vtbl_identify_koi8r = { - mbfl_no_encoding_koi8r, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_wchar_koi8r = { mbfl_no_encoding_wchar, mbfl_no_encoding_koi8r, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8r.h b/ext/mbstring/libmbfl/filters/mbfilter_koi8r.h index 6073d140ae6..16da179b161 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_koi8r.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8r.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_koi8r; -extern const struct mbfl_identify_vtbl vtbl_identify_koi8r; extern const struct mbfl_convert_vtbl vtbl_wchar_koi8r; extern const struct mbfl_convert_vtbl vtbl_koi8r_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c index c94f59aa5a2..26754ac0ab3 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c @@ -41,12 +41,6 @@ const mbfl_encoding mbfl_encoding_koi8u = { &vtbl_wchar_koi8u }; -const struct mbfl_identify_vtbl vtbl_identify_koi8u = { - mbfl_no_encoding_koi8u, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_wchar_koi8u = { mbfl_no_encoding_wchar, mbfl_no_encoding_koi8u, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h index 16b95c1fc49..8eb6c2c425a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h @@ -30,7 +30,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_koi8u; -extern const struct mbfl_identify_vtbl vtbl_identify_koi8u; extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u; extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c index 2615902ecde..39e7879c1a0 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c @@ -37,7 +37,6 @@ #include "unicode_table_jis.h" static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter); -int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -71,12 +70,6 @@ const mbfl_encoding mbfl_encoding_sjis = { &vtbl_wchar_sjis }; -const struct mbfl_identify_vtbl vtbl_identify_sjis = { - mbfl_no_encoding_sjis, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - const struct mbfl_convert_vtbl vtbl_sjis_wchar = { mbfl_no_encoding_sjis, mbfl_no_encoding_wchar, @@ -267,28 +260,3 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter) return c; } - -int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* Kanji, second byte */ - if (c < 0x40 || c > 0xFC || c == 0x7F) { - filter->flag = 1; - } else { - int s1, s2; - SJIS_DECODE(filter->status, c, s1, s2); - int w = ((s1 - 0x21) * 94) + s2 - 0x21; - if (w >= jisx0208_ucs_table_size || !jisx0208_ucs_table[w]) { - filter->flag = 1; - } - } - filter->status = 0; - } else if (c < 0x80 || (c > 0xA0 && c < 0xE0)) { /* Latin/Kana */ - ; - } else if (c > 0x80 && c < 0xF0 && c != 0xA0) { /* Kanji, first byte */ - filter->status = c; - } else { - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis.h index f271f399c92..b0689fce643 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_sjis; -extern const struct mbfl_identify_vtbl vtbl_identify_sjis; extern const struct mbfl_convert_vtbl vtbl_sjis_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_sjis; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c index e611f3f8468..8be40f02aaa 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c @@ -35,7 +35,6 @@ extern const unsigned char mblen_table_sjis[]; -extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n); extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); @@ -52,12 +51,6 @@ const mbfl_encoding mbfl_encoding_sjis2004 = { &vtbl_wchar_sjis2004 }; -const struct mbfl_identify_vtbl vtbl_identify_sjis2004 = { - mbfl_no_encoding_sjis2004, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { mbfl_no_encoding_sjis2004, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h index c75e3a910b7..5eb72ca8913 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_sjis2004; -extern const struct mbfl_identify_vtbl vtbl_identify_sjis2004; extern const struct mbfl_convert_vtbl vtbl_sjis2004_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c index ff8d01937f5..abeec417bd9 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c @@ -35,7 +35,6 @@ #include "sjis_mac2uni.h" -extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); extern const unsigned char mblen_table_sjis[]; static int mbfl_filt_conv_sjis_mac_flush(mbfl_convert_filter *filter); @@ -53,12 +52,6 @@ const mbfl_encoding mbfl_encoding_sjis_mac = { &vtbl_wchar_sjis_mac }; -const struct mbfl_identify_vtbl vtbl_identify_sjis_mac = { - mbfl_no_encoding_sjis_mac, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = { mbfl_no_encoding_sjis_mac, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h index f4b2adfeac4..970d14a4c9c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h @@ -33,8 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_sjis_mac; - -extern const struct mbfl_identify_vtbl vtbl_identify_sjis_mac; extern const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c index d5015a92a8f..255a457c583 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c @@ -36,7 +36,6 @@ #include "emoji2uni.h" extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); -extern int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter); extern const unsigned char mblen_table_sjis[]; static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL}; @@ -76,24 +75,6 @@ const mbfl_encoding mbfl_encoding_sjis_sb = { &vtbl_wchar_sjis_sb }; -const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo = { - mbfl_no_encoding_sjis_docomo, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - -const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi = { - mbfl_no_encoding_sjis_kddi, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - -const struct mbfl_identify_vtbl vtbl_identify_sjis_sb = { - mbfl_no_encoding_sjis_sb, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis -}; - const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = { mbfl_no_encoding_sjis_docomo, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h index 51b703da729..19e1920c6e7 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h @@ -36,10 +36,6 @@ extern const mbfl_encoding mbfl_encoding_sjis_docomo; extern const mbfl_encoding mbfl_encoding_sjis_kddi; extern const mbfl_encoding mbfl_encoding_sjis_sb; -extern const struct mbfl_identify_vtbl vtbl_identify_sjis_docomo; -extern const struct mbfl_identify_vtbl vtbl_identify_sjis_kddi; -extern const struct mbfl_identify_vtbl vtbl_identify_sjis_sb; - extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo; extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c index b3ec28740bb..d37f01568e2 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c @@ -33,8 +33,6 @@ #include "unicode_table_cp932_ext.h" #include "unicode_table_jis.h" -static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -67,12 +65,6 @@ const mbfl_encoding mbfl_encoding_sjis_open = { &vtbl_wchar_sjis_open }; -const struct mbfl_identify_vtbl vtbl_identify_sjis_open = { - mbfl_no_encoding_sjis_open, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_sjis_open -}; - const struct mbfl_convert_vtbl vtbl_sjis_open_wchar = { mbfl_no_encoding_sjis_open, mbfl_no_encoding_wchar, @@ -323,23 +315,3 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_sjis_open(int c, mbfl_identify_filter *filter) -{ - if (filter->status) { /* kanji second char */ - if (c < 0x40 || c > 0xfc || c == 0x7f) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - } else if (c >= 0 && c < 0x80) { /* latin ok */ - ; - } else if (c > 0xa0 && c < 0xe0) { /* kana ok */ - ; - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - } else { /* bad */ - filter->flag = 1; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h index 2c6c70373a5..764fad7e17a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h @@ -34,7 +34,6 @@ extern const mbfl_encoding mbfl_encoding_sjis_open; -extern const struct mbfl_identify_vtbl vtbl_identify_sjis_open; extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c index 84d94fcd268..39141ce9e7b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c @@ -30,8 +30,6 @@ #include "mbfilter.h" #include "mbfilter_ucs2.h" -static int mbfl_filt_ident_ucs2(int c, mbfl_identify_filter *filter); - static const char *mbfl_encoding_ucs2_aliases[] = {"ISO-10646-UCS-2", "UCS2" , "UNICODE", NULL}; /* This library historically had encodings called 'byte2be' and 'byte2le' @@ -74,24 +72,6 @@ const mbfl_encoding mbfl_encoding_ucs2le = { &vtbl_wchar_ucs2le }; -const struct mbfl_identify_vtbl vtbl_identify_ucs2 = { - mbfl_no_encoding_ucs2, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_ucs2 -}; - -const struct mbfl_identify_vtbl vtbl_identify_ucs2be = { - mbfl_no_encoding_ucs2be, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_ucs2 -}; - -const struct mbfl_identify_vtbl vtbl_identify_ucs2le = { - mbfl_no_encoding_ucs2le, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_ucs2 -}; - const struct mbfl_convert_vtbl vtbl_ucs2_wchar = { mbfl_no_encoding_ucs2, mbfl_no_encoding_wchar, @@ -264,10 +244,3 @@ int mbfl_filt_conv_wchar_ucs2le(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_ucs2(int c, mbfl_identify_filter *filter) -{ - /* Input string must be a multiple of 2 bytes */ - filter->status = (filter->status + 1) % 2; - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.h b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.h index f6d9b8b7728..bbf567a4933 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.h @@ -35,9 +35,6 @@ extern const mbfl_encoding mbfl_encoding_ucs2; extern const mbfl_encoding mbfl_encoding_ucs2be; extern const mbfl_encoding mbfl_encoding_ucs2le; -extern const struct mbfl_identify_vtbl vtbl_identify_ucs2; -extern const struct mbfl_identify_vtbl vtbl_identify_ucs2be; -extern const struct mbfl_identify_vtbl vtbl_identify_ucs2le; extern const struct mbfl_convert_vtbl vtbl_ucs2_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_ucs2; extern const struct mbfl_convert_vtbl vtbl_ucs2be_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uhc.c b/ext/mbstring/libmbfl/filters/mbfilter_uhc.c index c86612bac2d..a164fccbcdb 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_uhc.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_uhc.c @@ -32,8 +32,6 @@ #define UNICODE_TABLE_UHC_DEF #include "unicode_table_uhc.h" -static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter); - static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -66,12 +64,6 @@ const mbfl_encoding mbfl_encoding_uhc = { &vtbl_wchar_uhc }; -const struct mbfl_identify_vtbl vtbl_identify_uhc = { - mbfl_no_encoding_uhc, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_uhc -}; - const struct mbfl_convert_vtbl vtbl_uhc_wchar = { mbfl_no_encoding_uhc, mbfl_no_encoding_wchar, @@ -220,43 +212,3 @@ mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter) return c; } - -static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* latin */ - if (c >= 0 && c < 0x80) { /* ok */ - ; - } else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */ - filter->status= 1; - } else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */ - filter->status= 2; - } else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */ - filter->status= 3; - } else { /* bad */ - filter->flag = 1; - } - - case 1: - case 2: - if (c < 0x41 || (c > 0x5a && c < 0x61) - || (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - case 3: - if (c < 0xa1 || c > 0xfe) { /* bad */ - filter->flag = 1; - } - filter->status = 0; - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uhc.h b/ext/mbstring/libmbfl/filters/mbfilter_uhc.h index 6ed54867674..860d45eb86f 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_uhc.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_uhc.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_uhc; -extern const struct mbfl_identify_vtbl vtbl_identify_uhc; extern const struct mbfl_convert_vtbl vtbl_uhc_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_uhc; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf16.c b/ext/mbstring/libmbfl/filters/mbfilter_utf16.c index c23a4326951..10063883f2e 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf16.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf16.c @@ -30,9 +30,6 @@ #include "mbfilter.h" #include "mbfilter_utf16.h" -static int mbfl_filt_ident_utf16(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_utf16le(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_utf16be(int c, mbfl_identify_filter *filter); static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter); static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL}; @@ -70,24 +67,6 @@ const mbfl_encoding mbfl_encoding_utf16le = { &vtbl_wchar_utf16le }; -const struct mbfl_identify_vtbl vtbl_identify_utf16 = { - mbfl_no_encoding_utf16, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf16 -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf16le = { - mbfl_no_encoding_utf16le, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf16le -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf16be = { - mbfl_no_encoding_utf16be, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf16be -}; - const struct mbfl_convert_vtbl vtbl_utf16_wchar = { mbfl_no_encoding_utf16, mbfl_no_encoding_wchar, @@ -346,108 +325,3 @@ static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter) return 0; } - -static int mbfl_filt_ident_utf16(int c, mbfl_identify_filter *filter) -{ - if (filter->status == 0) { - if (c >= 0xfe) { /* could be a byte-order mark */ - filter->status = c; - } else { - /* no byte-order mark at beginning of input; assume UTF-16BE */ - filter->filter_function = mbfl_filt_ident_utf16be; - return (filter->filter_function)(c, filter); - } - } else { - unsigned short n = (filter->status << 8) | c; - filter->status = 0; - - if (n == 0xfeff) { - /* it was a big-endian byte-order mark */ - filter->filter_function = mbfl_filt_ident_utf16be; - } else if (n == 0xfffe) { - /* it was a little-endian byte-order mark */ - filter->filter_function = mbfl_filt_ident_utf16le; - } else { - /* it wasn't a byte-order mark */ - filter->filter_function = mbfl_filt_ident_utf16be; - (filter->filter_function)(n >> 8, filter); - return (filter->filter_function)(c, filter); - } - } - return c; -} - -static int mbfl_filt_ident_utf16le(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* 1st byte */ - filter->status = 1; - break; - - case 1: /* 2nd byte */ - if ((c & 0xfc) == 0xd8) { - /* Looks like a surrogate pair */ - filter->status = 2; - } else if ((c & 0xfc) == 0xdc) { - /* This is wrong; the second part of the surrogate pair has come first */ - filter->flag = 1; - } else { - filter->status = 0; /* Just an ordinary 2-byte character */ - } - break; - - case 2: /* 3rd byte */ - filter->status = 3; - break; - - case 3: /* 4th byte */ - if ((c & 0xfc) == 0xdc) { - filter->status = 0; - } else { - filter->flag = 1; /* Surrogate pair wrongly encoded */ - } - break; - } - - return c; -} - -static int mbfl_filt_ident_utf16be(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* 1st byte */ - if ((c & 0xfc) == 0xd8) { - /* Looks like a surrogate pair */ - filter->status = 2; - } else if ((c & 0xfc) == 0xdc) { - /* This is wrong; the second part of the surrogate pair has come first */ - filter->flag = 1; - } else { - /* Just an ordinary 2-byte character */ - filter->status = 1; - } - break; - - case 1: /* 2nd byte, not surrogate pair */ - filter->status = 0; - break; - - case 2: /* 2nd byte, surrogate pair */ - filter->status = 3; - break; - - case 3: /* 3rd byte, surrogate pair */ - if ((c & 0xfc) == 0xdc) { - filter->status = 4; - } else { - filter->flag = 1; /* Surrogate pair wrongly encoded */ - } - break; - - case 4: /* 4th byte, surrogate pair */ - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf16.h b/ext/mbstring/libmbfl/filters/mbfilter_utf16.h index 601779add41..727c231b347 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf16.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf16.h @@ -33,9 +33,7 @@ extern const mbfl_encoding mbfl_encoding_utf16; extern const mbfl_encoding mbfl_encoding_utf16be; extern const mbfl_encoding mbfl_encoding_utf16le; -extern const struct mbfl_identify_vtbl vtbl_identify_utf16; -extern const struct mbfl_identify_vtbl vtbl_identify_utf16le; -extern const struct mbfl_identify_vtbl vtbl_identify_utf16be; + extern const struct mbfl_convert_vtbl vtbl_utf16_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_utf16; extern const struct mbfl_convert_vtbl vtbl_utf16be_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c index 1d9f4795958..51051a4db91 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c @@ -30,9 +30,6 @@ #include "mbfilter.h" #include "mbfilter_utf32.h" -static int mbfl_filt_ident_utf32(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_utf32le(int c, mbfl_identify_filter *filter); -static int mbfl_filt_ident_utf32be(int c, mbfl_identify_filter *filter); static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter); static const char *mbfl_encoding_utf32_aliases[] = {"utf32", NULL}; @@ -70,24 +67,6 @@ const mbfl_encoding mbfl_encoding_utf32le = { &vtbl_wchar_utf32le }; -const struct mbfl_identify_vtbl vtbl_identify_utf32 = { - mbfl_no_encoding_utf32, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf32 -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf32be = { - mbfl_no_encoding_utf32be, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf32be -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf32le = { - mbfl_no_encoding_utf32le, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf32le -}; - const struct mbfl_convert_vtbl vtbl_utf32_wchar = { mbfl_no_encoding_utf32, mbfl_no_encoding_wchar, @@ -327,128 +306,3 @@ static int mbfl_filt_conv_utf32_wchar_flush(mbfl_convert_filter *filter) filter->status = filter->cache = 0; return 0; } - -static int mbfl_filt_ident_utf32(int c, mbfl_identify_filter *filter) -{ - /* The largest valid codepoint is 0x10FFFF; we don't want values above that - * Neither do we want to see surrogates - * For UTF-32 (not LE or BE), we do also need to look for a byte-order mark */ - switch (filter->status) { - case 0: /* 1st byte */ - if (c == 0xff) { - filter->status = 1; - return c; - } - filter->filter_function = mbfl_filt_ident_utf32be; - break; - - case 1: /* 2nd byte */ - if (c == 0xfe) { - filter->status = 2; - return c; - } - filter->filter_function = mbfl_filt_ident_utf32be; - (filter->filter_function)(0xff, filter); - break; - - case 2: /* 3rd byte */ - if (c == 0) { - filter->status = 3; - return c; - } - filter->filter_function = mbfl_filt_ident_utf32be; - (filter->filter_function)(0xff, filter); - (filter->filter_function)(0xfe, filter); - break; - - case 3: /* 4th byte */ - if (c == 0) { - /* We found a little-endian byte-order mark! */ - filter->status = 0; - filter->filter_function = mbfl_filt_ident_utf32le; - return c; - } - filter->filter_function = mbfl_filt_ident_utf32be; - (filter->filter_function)(0xff, filter); - (filter->filter_function)(0xfe, filter); - (filter->filter_function)(0, filter); - break; - } - - return (filter->filter_function)(c, filter); -} - -static int mbfl_filt_ident_utf32le(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* 1st byte */ - filter->status = 1; - break; - - case 1: /* 2nd byte */ - if (c >= 0xD8 && c <= 0xDF) { - filter->status = 4; /* might be surrogate if we are in BMP */ - } else { - filter->status = 2; - } - break; - - case 2: /* 3rd byte */ - if (c > 0x10) { - filter->flag = 1; /* too big */ - } - filter->status = 3; - break; - - case 3: /* 4th byte */ - if (c) { - filter->flag = 1; /* too big */ - } - filter->status = 0; - break; - - case 4: /* 3rd byte, previous byte looked like surrogate */ - if (!c) { - filter->flag = 1; /* yep, it's a surrogate */ - } - filter->status = 3; - } - return c; -} - -static int mbfl_filt_ident_utf32be(int c, mbfl_identify_filter *filter) -{ - switch (filter->status) { - case 0: /* 1st byte */ - if (c) { - filter->flag = 1; /* too big */ - } - filter->status = 1; - break; - - case 1: /* 2nd byte */ - if (c > 0x10) { - filter->flag = 1; /* too big */ - } if (c) { - filter->status = 4; /* not in the BMP */ - } else { - filter->status = 2; - } - break; - - case 2: /* 3rd byte */ - if (c >= 0xD8 && c <= 0xDF) { - filter->flag = 1; /* reserved range for surrogates */ - } - filter->status = 3; - break; - - case 3: /* 4th byte */ - filter->status = 0; - break; - - case 4: /* 3rd byte, not in BMP */ - filter->status = 3; - } - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.h b/ext/mbstring/libmbfl/filters/mbfilter_utf32.h index de1235e4519..58c69d72f16 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.h @@ -33,9 +33,7 @@ extern const mbfl_encoding mbfl_encoding_utf32; extern const mbfl_encoding mbfl_encoding_utf32be; extern const mbfl_encoding mbfl_encoding_utf32le; -extern const struct mbfl_identify_vtbl vtbl_identify_utf32; -extern const struct mbfl_identify_vtbl vtbl_identify_utf32be; -extern const struct mbfl_identify_vtbl vtbl_identify_utf32le; + extern const struct mbfl_convert_vtbl vtbl_utf32_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_utf32; extern const struct mbfl_convert_vtbl vtbl_utf32be_wchar; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c index ed931a8b1f4..bda303ef6f8 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c @@ -30,8 +30,6 @@ #include "mbfilter.h" #include "mbfilter_utf7.h" -static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter); - static const unsigned char mbfl_base64_table[] = { /* 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', */ 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d, @@ -58,12 +56,6 @@ const mbfl_encoding mbfl_encoding_utf7 = { &vtbl_wchar_utf7 }; -const struct mbfl_identify_vtbl vtbl_identify_utf7 = { - mbfl_no_encoding_utf7, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf7 -}; - const struct mbfl_convert_vtbl vtbl_utf7_wchar = { mbfl_no_encoding_utf7, mbfl_no_encoding_wchar, @@ -409,52 +401,3 @@ int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter) return 0; } - -static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter) -{ - int n; - - switch (filter->status) { - /* directly encoded characters */ - case 0: - if (c == 0x2b) { /* '+' shift character */ - filter->status++; - } else if (c == 0x5c || c == 0x7e || c < 0 || c > 0x7f) { /* illegal character */ - filter->flag = 1; /* bad */ - } - break; - - /* Modified Base64 */ - case 1: - case 2: - n = 0; - if (c >= 0x41 && c <= 0x5a) { /* A - Z */ - n = 1; - } else if (c >= 0x61 && c <= 0x7a) { /* a - z */ - n = 1; - } else if (c >= 0x30 && c <= 0x39) { /* 0 - 9 */ - n = 1; - } else if (c == 0x2b) { /* '+' */ - n = 1; - } else if (c == 0x2f) { /* '/' */ - n = 1; - } - if (n <= 0) { - if (filter->status == 1 && c != 0x2d) { - filter->flag = 1; /* bad */ - } else if (c < 0 || c > 0x7f) { - filter->flag = 1; /* bad */ - } - filter->status = 0; - } else { - filter->status = 2; - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7.h b/ext/mbstring/libmbfl/filters/mbfilter_utf7.h index 6223bdba97e..119937eb417 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7.h @@ -33,7 +33,6 @@ #include "mbfilter.h" extern const mbfl_encoding mbfl_encoding_utf7; -extern const struct mbfl_identify_vtbl vtbl_identify_utf7; extern const struct mbfl_convert_vtbl vtbl_utf7_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_utf7; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c index 3b7a76de475..0c73952558c 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c @@ -30,8 +30,6 @@ #include "mbfilter.h" #include "mbfilter_utf8.h" -int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter); - const unsigned char mblen_table_utf8[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -64,12 +62,6 @@ const mbfl_encoding mbfl_encoding_utf8 = { &vtbl_wchar_utf8 }; -const struct mbfl_identify_vtbl vtbl_identify_utf8 = { - mbfl_no_encoding_utf8, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf8 -}; - const struct mbfl_convert_vtbl vtbl_utf8_wchar = { mbfl_no_encoding_utf8, mbfl_no_encoding_wchar, @@ -236,76 +228,3 @@ int mbfl_filt_conv_wchar_utf8(int c, mbfl_convert_filter *filter) return c; } - -int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter) -{ - int c1; - - c1 = (filter->status >> 8) & 0xff; - filter->status &= 0xff; - - if (c < 0x80) { - if (c < 0) { - filter->flag = 1; /* bad */ - } else if (filter->status) { - filter->flag = 1; /* bad */ - } - filter->status = 0; - } else if (c < 0xc0) { - switch (filter->status) { - case 0x20: /* 3 byte code 2nd char */ - if ((c1 == 0x0 && c >= 0xa0) || - (c1 == 0xd && c < 0xa0) || - (c1 > 0x0 && c1 != 0xd)) { - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status = 0; - } - break; - case 0x30: /* 4 byte code 2nd char */ - if ((c1 == 0x0 && c >= 0x90) || - (c1 > 0x0 && c1 < 0x4) || - (c1 == 0x4 && c < 0x90)) { - filter->status++; - } else { - filter->flag = 1; /* bad */ - filter->status = 0; - } - break; - case 0x31: /* 4 byte code 3rd char */ - filter->status++; - break; - case 0x10: /* 2 byte code 2nd char */ - case 0x21: /* 3 byte code 3rd char */ - case 0x32: /* 4 byte code 4th char */ - filter->status = 0; - break; - default: - filter->flag = 1; /* bad */ - filter->status = 0; - break; - } - } else if (c < 0xc2) { /* 0xc0,0xc1 */ - filter->flag = 1; /* bad */ - filter->status = 0; - } else { - if (filter->status) { - filter->flag = 1; /* bad */ - } - filter->status = 0; - if (c < 0xe0) { /* 2 byte code first char */ - filter->status = 0x10; - } else if (c < 0xf0) { /* 3 byte code 1st char */ - filter->status = 0x20; - filter->status |= (c & 0xf) << 8; - } else if (c < 0xf5) { /* 4 byte code 1st char */ - filter->status = 0x30; - filter->status |= (c & 0x7) << 8; - } else { - filter->flag = 1; /* bad */ - } - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8.h b/ext/mbstring/libmbfl/filters/mbfilter_utf8.h index b0c93ccd3db..24cb91ea5ac 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8.h @@ -31,7 +31,6 @@ #define MBFL_MBFILTER_UTF8_H extern const mbfl_encoding mbfl_encoding_utf8; -extern const struct mbfl_identify_vtbl vtbl_identify_utf8; extern const struct mbfl_convert_vtbl vtbl_utf8_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_utf8; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c index 8f50d227817..763ed90fabd 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c @@ -32,7 +32,6 @@ #include "mbfilter_utf8_mobile.h" #include "mbfilter_sjis_mobile.h" -extern int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter); extern int mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter); extern const unsigned char mblen_table_utf8[]; @@ -86,30 +85,6 @@ const mbfl_encoding mbfl_encoding_utf8_sb = { &vtbl_wchar_utf8_sb }; -const struct mbfl_identify_vtbl vtbl_identify_utf8_docomo = { - mbfl_no_encoding_utf8_docomo, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf8 -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi_a = { - mbfl_no_encoding_utf8_kddi_a, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf8 -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi_b = { - mbfl_no_encoding_utf8_kddi_b, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf8 -}; - -const struct mbfl_identify_vtbl vtbl_identify_utf8_sb = { - mbfl_no_encoding_utf8_sb, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_utf8 -}; - const struct mbfl_convert_vtbl vtbl_utf8_docomo_wchar = { mbfl_no_encoding_utf8_docomo, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.h index 74ac6e195e5..116bc16299d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.h @@ -35,11 +35,6 @@ extern const mbfl_encoding mbfl_encoding_utf8_kddi_a; extern const mbfl_encoding mbfl_encoding_utf8_kddi_b; extern const mbfl_encoding mbfl_encoding_utf8_sb; -extern const struct mbfl_identify_vtbl vtbl_identify_utf8_docomo; -extern const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi_a; -extern const struct mbfl_identify_vtbl vtbl_identify_utf8_kddi_b; -extern const struct mbfl_identify_vtbl vtbl_identify_utf8_sb; - extern const struct mbfl_convert_vtbl vtbl_utf8_docomo_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_utf8_docomo; extern const struct mbfl_convert_vtbl vtbl_utf8_kddi_a_wchar; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 68cb39fa900..08a7e8396cf 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -86,6 +86,8 @@ #include "mbfl_filter_output.h" #include "mbfilter_8bit.h" #include "mbfilter_wchar.h" +#include "mbstring.h" +#include "php_unicode.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_base64.h" #include "filters/mbfilter_qprint.h" @@ -289,114 +291,102 @@ size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd) /* * encoding detector */ -mbfl_encoding_detector * -mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict) +static int mbfl_estimate_encoding_likelihood(int c, void* data) { - mbfl_encoding_detector *identd; + mbfl_convert_filter *filter = *((mbfl_convert_filter**)data); + uintptr_t *score = (uintptr_t*)(&filter->opaque); - int i, num; - mbfl_identify_filter *filter; + /* Receive wchars decoded from test string using candidate encoding + * If the test string was invalid in the candidate encoding, we assume + * it's the wrong one. */ + if (c & MBFL_WCSGROUP_THROUGH) { + filter->num_illegalchar++; + } else if (php_unicode_is_cntrl(c) || php_unicode_is_private(c)) { + /* Otherwise, count how many control characters and 'private use' + * codepoints we see. Those are rarely used and may indicate that + * the candidate encoding is not the right one. */ + *score += 10; + } else if (php_unicode_is_punct(c)) { + /* Punctuation is also less common than letters/digits */ + (*score)++; + } + return c; +} - if (elist == NULL || elistsz <= 0) { +mbfl_encoding_detector *mbfl_encoding_detector_new(const mbfl_encoding **elist, int elistsz, int strict) +{ + if (!elistsz) { return NULL; } - /* allocate */ - identd = emalloc(sizeof(mbfl_encoding_detector)); - identd->filter_list = ecalloc(elistsz, sizeof(mbfl_identify_filter *)); - - /* create filters */ - i = 0; - num = 0; - while (i < elistsz) { - filter = mbfl_identify_filter_new2(elist[i]); - if (filter != NULL) { - identd->filter_list[num] = filter; - num++; - } - i++; + mbfl_encoding_detector *identd = emalloc(sizeof(mbfl_encoding_detector)); + identd->filter_list = ecalloc(elistsz, sizeof(mbfl_convert_filter*)); + for (int i = 0; i < elistsz; i++) { + identd->filter_list[i] = mbfl_convert_filter_new(elist[i], &mbfl_encoding_wchar, + mbfl_estimate_encoding_likelihood, NULL, &identd->filter_list[i]); + identd->filter_list[i]->opaque = (void*)0; } - identd->filter_list_size = num; - - /* set strict flag */ + identd->filter_list_size = elistsz; identd->strict = strict; - return identd; } - -void -mbfl_encoding_detector_delete(mbfl_encoding_detector *identd) +void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd) { - int i; - - if (identd != NULL) { - if (identd->filter_list != NULL) { - i = identd->filter_list_size; - while (i > 0) { - i--; - mbfl_identify_filter_delete(identd->filter_list[i]); - } - efree((void *)identd->filter_list); - } - efree((void *)identd); + for (int i = 0; i < identd->filter_list_size; i++) { + mbfl_convert_filter_delete(identd->filter_list[i]); } + efree(identd->filter_list); + efree(identd); } -int -mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string) +int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string) { - int res = 0; - /* feed data */ - if (identd != NULL && string != NULL && string->val != NULL) { - int num = identd->filter_list_size; - size_t n = string->len; - unsigned char *p = string->val; - int bad = 0; - while (n > 0) { - int i; - for (i = 0; i < num; i++) { - mbfl_identify_filter *filter = identd->filter_list[i]; - if (!filter->flag) { - (*filter->filter_function)(*p, filter); - if (filter->flag) { - bad++; - } + int num = identd->filter_list_size; + size_t n = string->len; + unsigned char *p = string->val; + int bad = 0; + + while (n--) { + for (int i = 0; i < num; i++) { + mbfl_convert_filter *filter = identd->filter_list[i]; + if (!filter->num_illegalchar) { + (*filter->filter_function)(*p, filter); + if (filter->num_illegalchar) { + bad++; } } - if ((num - 1) <= bad) { - res = 1; - break; - } - p++; - n--; + } + if ((num - 1) <= bad && !identd->strict) { + return 1; + } + p++; + } + + if (identd->strict) { + for (int i = 0; i < num; i++) { + mbfl_convert_filter *filter = identd->filter_list[i]; + (filter->filter_flush)(filter); } } - return res; + return 0; } const mbfl_encoding *mbfl_encoding_detector_judge(mbfl_encoding_detector *identd) { - mbfl_identify_filter *filter; - const mbfl_encoding *encoding = NULL; - int n; + uintptr_t best_score = UINT_MAX; /* Low score is 'better' */ + const mbfl_encoding *enc = NULL; - /* judge */ - if (identd != NULL) { - n = identd->filter_list_size - 1; - while (n >= 0) { - filter = identd->filter_list[n]; - if (!filter->flag) { - if (!identd->strict || !filter->status) { - encoding = filter->encoding; - } - } - n--; + for (int i = 0; i < identd->filter_list_size; i++) { + mbfl_convert_filter *filter = identd->filter_list[i]; + if (!filter->num_illegalchar && (uintptr_t)filter->opaque < best_score) { + enc = filter->from; + best_score = (uintptr_t)filter->opaque; } } - return encoding; + return enc; } /* @@ -465,80 +455,19 @@ mbfl_convert_encoding( return mbfl_memory_device_result(&device, result); } - /* * identify encoding */ -const mbfl_encoding * -mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict) +const mbfl_encoding *mbfl_identify_encoding(mbfl_string *string, const mbfl_encoding **elist, int elistsz, int strict) { - int i, bad; - size_t n; - unsigned char *p; - mbfl_identify_filter *flist, *filter; - const mbfl_encoding *encoding; - - /* flist is an array of mbfl_identify_filter instances */ - flist = ecalloc(elistsz, sizeof(mbfl_identify_filter)); - - if (elist != NULL) { - for (i = 0; i < elistsz; i++) { - mbfl_identify_filter_init2(&flist[i], elist[i]); - } + if (!elistsz) { + return NULL; } - - /* feed data */ - n = string->len; - p = string->val; - - if (p != NULL) { - bad = 0; - while (n > 0) { - for (i = 0; i < elistsz; i++) { - filter = &flist[i]; - if (!filter->flag) { - (*filter->filter_function)(*p, filter); - if (filter->flag) { - bad++; - } - } - } - if ((elistsz - 1) <= bad && !strict) { - break; - } - p++; - n--; - } - } - - /* judge */ - encoding = NULL; - - for (i = 0; i < elistsz; i++) { - filter = &flist[i]; - if (!filter->flag) { - if (strict && filter->status) { - continue; - } - encoding = filter->encoding; - break; - } - } - - /* fall-back judge */ - if (!encoding) { - for (i = 0; i < elistsz; i++) { - filter = &flist[i]; - if (!filter->flag && (!strict || !filter->status)) { - encoding = filter->encoding; - break; - } - } - } - - efree((void *)flist); - - return encoding; + mbfl_encoding_detector *identd = mbfl_encoding_detector_new(elist, elistsz, strict); + mbfl_encoding_detector_feed(identd, string); + const mbfl_encoding *enc = mbfl_encoding_detector_judge(identd); + mbfl_encoding_detector_delete(identd); + return enc; } /* diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index 16742be96c3..b914a36c9ef 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -93,7 +93,6 @@ #include "mbfl_language.h" #include "mbfl_string.h" #include "mbfl_convert.h" -#include "mbfl_ident.h" /* Prefer local fix, otherwise need to include too much. */ #ifndef ssize_t @@ -156,7 +155,7 @@ MBFLAPI extern size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd); typedef struct _mbfl_encoding_detector mbfl_encoding_detector; struct _mbfl_encoding_detector { - mbfl_identify_filter **filter_list; + mbfl_convert_filter **filter_list; int filter_list_size; int strict; }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c index 41081aa883d..4017829cf80 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c @@ -50,12 +50,6 @@ const mbfl_encoding mbfl_encoding_8bit = { &vtbl_wchar_8bit }; -const struct mbfl_identify_vtbl vtbl_identify_8bit = { - mbfl_no_encoding_8bit, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true -}; - const struct mbfl_convert_vtbl vtbl_8bit_wchar = { mbfl_no_encoding_8bit, mbfl_no_encoding_wchar, diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h index 64369b999d4..acdf640fd71 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h @@ -35,6 +35,5 @@ #include "mbfilter.h" MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit; -extern const struct mbfl_identify_vtbl vtbl_identify_8bit; #endif /* MBFL_MBFILTER_8BIT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index 020fb74d743..6e7f2077e5e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -113,7 +113,7 @@ static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = { NULL }; -static void mbfl_convert_filter_common_init(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to, +static void mbfl_convert_filter_init(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to, const struct mbfl_convert_vtbl *vtbl, output_function_t output_function, flush_function_t flush_function, void* data) { /* encoding structure */ @@ -140,7 +140,6 @@ static void mbfl_convert_filter_common_init(mbfl_convert_filter *filter, const m (*filter->filter_ctor)(filter); } - mbfl_convert_filter* mbfl_convert_filter_new(const mbfl_encoding *from, const mbfl_encoding *to, output_function_t output_function, flush_function_t flush_function, void* data) { @@ -150,7 +149,7 @@ mbfl_convert_filter* mbfl_convert_filter_new(const mbfl_encoding *from, const mb } mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter)); - mbfl_convert_filter_common_init(filter, from, to, vtbl, output_function, flush_function, data); + mbfl_convert_filter_init(filter, from, to, vtbl, output_function, flush_function, data); return filter; } @@ -161,7 +160,7 @@ mbfl_convert_filter* mbfl_convert_filter_new2(const struct mbfl_convert_vtbl *vt const mbfl_encoding *to_encoding = mbfl_no2encoding(vtbl->to); mbfl_convert_filter *filter = emalloc(sizeof(mbfl_convert_filter)); - mbfl_convert_filter_common_init(filter, from_encoding, to_encoding, vtbl, output_function, flush_function, data); + mbfl_convert_filter_init(filter, from_encoding, to_encoding, vtbl, output_function, flush_function, data); return filter; } @@ -208,8 +207,7 @@ void mbfl_convert_filter_reset(mbfl_convert_filter *filter, const mbfl_encoding vtbl = &vtbl_pass; } - mbfl_convert_filter_common_init(filter, from, to, vtbl, - filter->output_function, filter->flush_function, filter->data); + mbfl_convert_filter_init(filter, from, to, vtbl, filter->output_function, filter->flush_function, filter->data); } void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest) diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c deleted file mode 100644 index 5ceb516bfa6..00000000000 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter.c - * by Moriyoshi Koizumi on 20 Dec 2002. The file - * mbfilter.c is included in this package . - * - */ - -#include - -#include "mbfl_ident.h" -#include "mbfilter_pass.h" -#include "mbfilter_8bit.h" -#include "mbfilter_wchar.h" - -#include "filters/mbfilter_euc_cn.h" -#include "filters/mbfilter_hz.h" -#include "filters/mbfilter_euc_tw.h" -#include "filters/mbfilter_big5.h" -#include "filters/mbfilter_uhc.h" -#include "filters/mbfilter_euc_kr.h" -#include "filters/mbfilter_iso2022_kr.h" -#include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_open.h" -#include "filters/mbfilter_sjis_mobile.h" -#include "filters/mbfilter_jis.h" -#include "filters/mbfilter_iso2022_jp_ms.h" -#include "filters/mbfilter_iso2022jp_2004.h" -#include "filters/mbfilter_iso2022jp_mobile.h" -#include "filters/mbfilter_euc_jp.h" -#include "filters/mbfilter_euc_jp_win.h" -#include "filters/mbfilter_euc_jp_2004.h" -#include "filters/mbfilter_utf8_mobile.h" -#include "filters/mbfilter_ascii.h" -#include "filters/mbfilter_koi8r.h" -#include "filters/mbfilter_koi8u.h" -#include "filters/mbfilter_cp866.h" -#include "filters/mbfilter_cp932.h" -#include "filters/mbfilter_cp936.h" -#include "filters/mbfilter_cp1251.h" -#include "filters/mbfilter_cp1252.h" -#include "filters/mbfilter_cp1254.h" -#include "filters/mbfilter_cp51932.h" -#include "filters/mbfilter_cp5022x.h" -#include "filters/mbfilter_gb18030.h" -#include "filters/mbfilter_iso8859_1.h" -#include "filters/mbfilter_iso8859_2.h" -#include "filters/mbfilter_iso8859_3.h" -#include "filters/mbfilter_iso8859_4.h" -#include "filters/mbfilter_iso8859_5.h" -#include "filters/mbfilter_iso8859_6.h" -#include "filters/mbfilter_iso8859_7.h" -#include "filters/mbfilter_iso8859_8.h" -#include "filters/mbfilter_iso8859_9.h" -#include "filters/mbfilter_iso8859_10.h" -#include "filters/mbfilter_iso8859_13.h" -#include "filters/mbfilter_iso8859_14.h" -#include "filters/mbfilter_iso8859_15.h" -#include "filters/mbfilter_iso8859_16.h" -#include "filters/mbfilter_base64.h" -#include "filters/mbfilter_qprint.h" -#include "filters/mbfilter_uuencode.h" -#include "filters/mbfilter_7bit.h" -#include "filters/mbfilter_utf7.h" -#include "filters/mbfilter_utf7imap.h" -#include "filters/mbfilter_utf8.h" -#include "filters/mbfilter_utf16.h" -#include "filters/mbfilter_utf32.h" -#include "filters/mbfilter_ucs4.h" -#include "filters/mbfilter_ucs2.h" -#include "filters/mbfilter_htmlent.h" -#include "filters/mbfilter_armscii8.h" -#include "filters/mbfilter_cp850.h" - -static const struct mbfl_identify_vtbl vtbl_identify_false = { - mbfl_no_encoding_pass, - mbfl_filt_ident_false_ctor, - mbfl_filt_ident_false -}; - -static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { - &vtbl_identify_utf8, - &vtbl_identify_utf7, - &vtbl_identify_ascii, - &vtbl_identify_eucjp, - &vtbl_identify_sjis, - &vtbl_identify_sjis_open, - &vtbl_identify_eucjpwin, - &vtbl_identify_eucjp2004, - &vtbl_identify_cp932, - &vtbl_identify_jis, - &vtbl_identify_2022jp, - &vtbl_identify_2022jpms, - &vtbl_identify_2022jp_2004, - &vtbl_identify_2022jp_kddi, - &vtbl_identify_cp51932, - &vtbl_identify_sjis_docomo, - &vtbl_identify_sjis_kddi, - &vtbl_identify_sjis_sb, - &vtbl_identify_utf8_docomo, - &vtbl_identify_utf8_kddi_a, - &vtbl_identify_utf8_kddi_b, - &vtbl_identify_utf8_sb, - &vtbl_identify_euccn, - &vtbl_identify_cp936, - &vtbl_identify_hz, - &vtbl_identify_euctw, - &vtbl_identify_big5, - &vtbl_identify_cp950, - &vtbl_identify_euckr, - &vtbl_identify_uhc, - &vtbl_identify_2022kr, - &vtbl_identify_cp1251, - &vtbl_identify_cp866, - &vtbl_identify_koi8r, - &vtbl_identify_koi8u, - &vtbl_identify_cp1252, - &vtbl_identify_cp1254, - &vtbl_identify_8859_1, - &vtbl_identify_8859_2, - &vtbl_identify_8859_3, - &vtbl_identify_8859_4, - &vtbl_identify_8859_5, - &vtbl_identify_8859_6, - &vtbl_identify_8859_7, - &vtbl_identify_8859_8, - &vtbl_identify_8859_9, - &vtbl_identify_8859_10, - &vtbl_identify_8859_13, - &vtbl_identify_8859_14, - &vtbl_identify_8859_15, - &vtbl_identify_8859_16, - &vtbl_identify_armscii8, - &vtbl_identify_cp850, - &vtbl_identify_jis_ms, - &vtbl_identify_cp50220, - &vtbl_identify_cp50221, - &vtbl_identify_cp50222, - &vtbl_identify_gb18030, - &vtbl_identify_7bit, - &vtbl_identify_utf16, - &vtbl_identify_utf16le, - &vtbl_identify_utf16be, - &vtbl_identify_8bit, - &vtbl_identify_ucs2, - &vtbl_identify_ucs2be, - &vtbl_identify_ucs2le, - &vtbl_identify_utf32, - &vtbl_identify_utf32be, - &vtbl_identify_utf32le, - &vtbl_identify_false, - NULL -}; - -const struct mbfl_identify_vtbl* mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding) -{ - const struct mbfl_identify_vtbl** vtbl; - - for (vtbl = mbfl_identify_filter_list; *vtbl; vtbl++) { - if ((*vtbl)->encoding == encoding) { - return *vtbl; - } - } - - return NULL; -} - -mbfl_identify_filter *mbfl_identify_filter_new(enum mbfl_no_encoding encoding) -{ - mbfl_identify_filter *filter = emalloc(sizeof(mbfl_identify_filter)); - mbfl_identify_filter_init(filter, encoding); - return filter; -} - -mbfl_identify_filter *mbfl_identify_filter_new2(const mbfl_encoding *encoding) -{ - mbfl_identify_filter *filter = emalloc(sizeof(mbfl_identify_filter)); - mbfl_identify_filter_init2(filter, encoding); - return filter; -} - -void mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding) -{ - const mbfl_encoding *enc = mbfl_no2encoding(encoding); - mbfl_identify_filter_init2(filter, enc ? enc : &mbfl_encoding_pass); -} - -void mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding) -{ - filter->encoding = encoding; - filter->status = filter->flag = filter->score = 0; - - /* setup the function table */ - const struct mbfl_identify_vtbl *vtbl = mbfl_identify_filter_get_vtbl(filter->encoding->no_encoding); - if (vtbl == NULL) { - vtbl = &vtbl_identify_false; - } - filter->filter_ctor = vtbl->filter_ctor; - filter->filter_function = vtbl->filter_function; - - (*filter->filter_ctor)(filter); -} - -void mbfl_identify_filter_delete(mbfl_identify_filter *filter) -{ - efree(filter); -} - -void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter) -{ - filter->status = filter->flag = 0; -} - -/* A (useless) filter which says that _every_ string is invalid in a certain encoding. - * Obviously, that cannot be true. Remove after all encodings have proper identify filters */ -int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter) -{ - filter->flag = 1; /* bad */ - return c; -} - -void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter) -{ - filter->status = 0; - filter->flag = 1; -} - -/* For encodings in which _every_ possible input string is valid */ -int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter) -{ - return c; -} diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h b/ext/mbstring/libmbfl/mbfl/mbfl_ident.h deleted file mode 100644 index 39be183ef3f..00000000000 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter.h - * by Moriyoshi Koizumi on 20 Dec 2002. The file - * mbfilter.h is included in this package . - * - */ - -#ifndef MBFL_IDENT_H -#define MBFL_IDENT_H - -#include "mbfl_defs.h" -#include "mbfl_encoding.h" - -/* - * identify filter - */ -typedef struct _mbfl_identify_filter mbfl_identify_filter; - -struct _mbfl_identify_filter { - void (*filter_ctor)(mbfl_identify_filter *filter); - int (*filter_function)(int c, mbfl_identify_filter *filter); - int status; - int flag; - int score; - const mbfl_encoding *encoding; -}; - -struct mbfl_identify_vtbl { - enum mbfl_no_encoding encoding; - void (*filter_ctor)(mbfl_identify_filter *filter); - int (*filter_function)(int c, mbfl_identify_filter *filter); -}; - -MBFLAPI extern const struct mbfl_identify_vtbl * mbfl_identify_filter_get_vtbl(enum mbfl_no_encoding encoding); -MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new(enum mbfl_no_encoding encoding); -MBFLAPI extern mbfl_identify_filter * mbfl_identify_filter_new2(const mbfl_encoding *encoding); -MBFLAPI extern void mbfl_identify_filter_delete(mbfl_identify_filter *filter); -MBFLAPI extern void mbfl_identify_filter_init(mbfl_identify_filter *filter, enum mbfl_no_encoding encoding); -MBFLAPI extern void mbfl_identify_filter_init2(mbfl_identify_filter *filter, const mbfl_encoding *encoding); - -MBFLAPI extern void mbfl_filt_ident_common_ctor(mbfl_identify_filter *filter); -MBFLAPI extern void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter); - -MBFLAPI extern int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter); -MBFLAPI extern int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter); - -#endif /* MBFL_IDENT_H */ diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e27d46250ad..22332e52160 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -3855,23 +3855,30 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ +static int mbfl_filt_check_errors(int c, void* data) +{ + if (c & MBFL_WCSGROUP_THROUGH) { + (*((mbfl_convert_filter**)data))->num_illegalchar++; + } + return c; +} MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding) { - mbfl_identify_filter *ident = mbfl_identify_filter_new2(encoding); + mbfl_convert_filter *filter = mbfl_convert_filter_new(encoding, &mbfl_encoding_wchar, mbfl_filt_check_errors, NULL, &filter); while (length--) { unsigned char c = *input++; - (ident->filter_function)(c, ident); - if (ident->flag) { - mbfl_identify_filter_delete(ident); + (filter->filter_function)(c, filter); + if (filter->num_illegalchar) { + mbfl_convert_filter_delete(filter); return 0; } } - /* String must not end in the middle of a multi-byte character */ - int result = (ident->status == 0); - mbfl_identify_filter_delete(ident); + (filter->filter_flush)(filter); + int result = !filter->num_illegalchar; + mbfl_convert_filter_delete(filter); return result; } diff --git a/ext/mbstring/tests/bug45722.phpt b/ext/mbstring/tests/bug45722.phpt index 97f6fe5d976..f0649cbb2ad 100644 --- a/ext/mbstring/tests/bug45722.phpt +++ b/ext/mbstring/tests/bug45722.phpt @@ -7,4 +7,4 @@ Bug #45722 (mb_check_encoding() crashes) var_dump(mb_check_encoding("&\xc2\xb7 TEST TEST TEST TEST TEST TEST", "HTML-ENTITIES")); ?> --EXPECT-- -bool(false) +bool(true) diff --git a/ext/mbstring/tests/mb_convert_encoding.phpt b/ext/mbstring/tests/mb_convert_encoding.phpt index 1f39d21b15c..5ea4440049a 100644 --- a/ext/mbstring/tests/mb_convert_encoding.phpt +++ b/ext/mbstring/tests/mb_convert_encoding.phpt @@ -20,11 +20,11 @@ $euc_jp = ' // Note: For some reason it complains, results are different. Not researched. echo "== BASIC TEST ==\n"; $s = $sjis; -$s = mb_convert_encoding($s, 'EUC-JP', 'SJIS'); +$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', 'SJIS')); print("EUC-JP: $s\n"); // EUC-JP $s = $jis; -$s = mb_convert_encoding($s, 'EUC-JP', 'JIS'); +$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', 'JIS')); print("EUC-JP: $s\n"); // EUC-JP $s = $euc_jp; @@ -41,7 +41,7 @@ echo "== STRING ENCODING LIST ==\n"; $a = 'JIS,UTF-8,EUC-JP,SJIS'; $s = $jis; -$s = mb_convert_encoding($s, 'EUC-JP', $a); +$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', $a)); print("EUC-JP: $s\n"); // EUC-JP $s = $euc_jp; @@ -58,7 +58,7 @@ echo "== ARRAY ENCODING LIST ==\n"; $a = array(0=>'JIS', 1=>'UTF-8', 2=>'EUC-JP', 3=>'SJIS'); $s = $jis; -$s = mb_convert_encoding($s, 'EUC-JP', $a); +$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', $a)); print("EUC-JP: $s\n"); // EUC-JP $s = $euc_jp; @@ -74,7 +74,7 @@ print("JIS: ".base64_encode($s)."\n"); // JIS echo "== DETECT ORDER ==\n"; $s = $jis; -$s = mb_convert_encoding($s, 'EUC-JP', 'auto'); +$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', 'auto')); print("EUC-JP: $s\n"); // EUC-JP $s = $euc_jp; @@ -86,18 +86,18 @@ $s = mb_convert_encoding($s, 'JIS', 'auto'); print("JIS: ".base64_encode($s)."\n"); // JIS -// Invalid(?) Parameters +// Invalid Parameters echo "== INVALID PARAMETER ==\n"; $s = mb_convert_encoding(1234, 'EUC-JP'); -print("INT: $s\n"); // EUC-JP +print("INT: $s\n"); $s = mb_convert_encoding('', 'EUC-JP'); print("EUC-JP: $s\n"); // SJIS $s = $euc_jp; try { - var_dump( mb_convert_encoding($s, 'BAD') ); + var_dump(mb_convert_encoding($s, 'BAD')); } catch (\ValueError $e) { echo $e->getMessage() . \PHP_EOL; } @@ -105,20 +105,20 @@ try { ?> --EXPECT-- == BASIC TEST == -EUC-JP: 日本語テキストです。0123456789。 -EUC-JP: 日本語テキストです。0123456789。 +EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3 +EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3 SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg== JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg== == STRING ENCODING LIST == -EUC-JP: 日本語テキストです。0123456789。 +EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3 SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg== JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg== == ARRAY ENCODING LIST == -EUC-JP: 日本語テキストです。0123456789。 +EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3 SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg== JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg== == DETECT ORDER == -EUC-JP: 日本語テキストです。0123456789。 +EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3 SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg== JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg== == INVALID PARAMETER == diff --git a/ext/mbstring/tests/mb_detect_encoding.phpt b/ext/mbstring/tests/mb_detect_encoding.phpt index 71c44c16427..b3db5f88250 100644 --- a/ext/mbstring/tests/mb_detect_encoding.phpt +++ b/ext/mbstring/tests/mb_detect_encoding.phpt @@ -41,7 +41,6 @@ echo "== ARRAY ENCODING LIST ==\n"; $a = array(0=>'UTF-8',1=>'EUC-JP', 2=>'SJIS', 3=>'JIS'); -// Note: Due to detect order, detected as UTF-8 $s = $jis; $s = mb_detect_encoding($s, $a); print("JIS: $s\n"); @@ -98,7 +97,7 @@ JIS: JIS EUC-JP: EUC-JP EUC-JP: EUC-JP == ARRAY ENCODING LIST == -JIS: UTF-8 +JIS: JIS EUC-JP: EUC-JP SJIS: SJIS == DETECT ORDER ==