From e2459857afdf70ed4bc23bc7f614e9f383eb3072 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Thu, 22 Oct 2020 21:18:52 +0200 Subject: [PATCH] Remove duplicate implementation of CP932 from mbstring Sigh. Double sigh. After fruitlessly searching the Internet for information on this mysterious text encoding called "SJIS-open", I wrote a script to try converting every Unicode codepoint from 0-0xFFFF and compare the results from different variants of Shift-JIS, to see which one "SJIS-open" would be most similar to. The result? It's just CP932. There is no difference at all. So why do we have two implementations of CP932 in mbstring? In case somebody, somewhere is using "SJIS-open" (or its aliases "SJIS-win" or "SJIS-ms"), add these as aliases to CP932 so existing code will continue to work. --- ext/mbstring/config.m4 | 1 - ext/mbstring/config.w32 | 2 +- ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 2 +- .../libmbfl/filters/mbfilter_sjis_open.c | 308 ------------------ .../libmbfl/filters/mbfilter_sjis_open.h | 43 --- ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 1 - ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 2 - ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 1 - .../mb_internal_encoding_variation2.phpt | 4 +- 9 files changed, 4 insertions(+), 360 deletions(-) delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 9547130fba0..30004b1f8d4 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -117,7 +117,6 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_qprint.c libmbfl/filters/mbfilter_singlebyte.c libmbfl/filters/mbfilter_sjis.c - libmbfl/filters/mbfilter_sjis_open.c libmbfl/filters/mbfilter_sjis_mobile.c libmbfl/filters/mbfilter_sjis_mac.c libmbfl/filters/mbfilter_sjis_2004.c diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 8a9eadf41be..175f59064dc 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -26,7 +26,7 @@ if (PHP_MBSTRING != "no") { mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \ mbfilter_utf8_mobile.c mbfilter_euc_jp_2004.c mbfilter_uuencode.c \ - mbfilter_cp5022x.c mbfilter_sjis_open.c mbfilter_sjis_mobile.c \ + mbfilter_cp5022x.c mbfilter_sjis_mobile.c \ mbfilter_sjis_mac.c mbfilter_iso2022jp_2004.c \ mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c \ mbfilter_tl_jisx0201_jisx0208.c", "mbstring"); diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index e6f34dd31b3..cba3c6d9d17 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -54,7 +54,7 @@ static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; -static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; +static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", "SJIS-win", "SJIS-ms", "SJIS-open", NULL}; const mbfl_encoding mbfl_encoding_cp932 = { mbfl_no_encoding_cp932, diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c deleted file mode 100644 index df7ba62e77b..00000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_sjis_open.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static const char *mbfl_encoding_sjis_open_aliases[] = {"SJIS-open", "SJIS-ms", NULL}; - -const mbfl_encoding mbfl_encoding_sjis_open = { - mbfl_no_encoding_sjis_open, - "SJIS-win", - "Shift_JIS", - mbfl_encoding_sjis_open_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_open_wchar, - &vtbl_wchar_sjis_open -}; - -const struct mbfl_convert_vtbl vtbl_sjis_open_wchar = { - mbfl_no_encoding_sjis_open, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_open_wchar, - mbfl_filt_conv_common_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_open = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_open, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_open, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -/* - * SJIS-win => wchar - */ -int -mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else { - w = c & MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; - CK((*filter->output_function)(w, filter->data)); - } - break; - - case 1: /* kanji second char */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = (s1 - 0x21)*94 + s2 - 0x21; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ - w = s - (94*94) + 0xe000; - } - } - if (w <= 0) { - w = (s1 << 8) | s2; - w &= MBFL_WCSPLANE_MASK; - w |= MBFL_WCSPLANE_WINCP932; - } - CK((*filter->output_function)(w, filter->data)); - } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else { - w = (c1 << 8) | c; - w &= MBFL_WCSGROUP_MASK; - w |= MBFL_WCSGROUP_THROUGH; - CK((*filter->output_function)(w, filter->data)); - } - break; - - default: - filter->status = 0; - break; - } - - return c; -} - -/* - * wchar => SJIS-win - */ -int -mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1, s2; - - s1 = 0; - s2 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ - s1 = c - 0xe000; - c1 = s1/94 + 0x7f; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216f; /* FULLWIDTH YEN SIGN */ - } else if (c == 0x203e) { /* OVER LINE */ - s1 = 0x2131; /* FULLWIDTH MACRON */ - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ - s1 = 0x2141; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } - } - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if (s1 <= 0) { - c1 = 0; - c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext3_ucs_table[c1]) { - s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - } - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* latin or kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return c; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h deleted file mode 100644 index 764fad7e17a..00000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_open.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_OPEN_H -#define MBFL_MBFILTER_SJIS_OPEN_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis_open; - -extern const struct mbfl_convert_vtbl vtbl_sjis_open_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_open; - -int mbfl_filt_conv_sjis_open_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_SJIS_OPEN_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index ae7dbba12ae..d8e98b58adb 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -44,7 +44,6 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_open.h" #include "filters/mbfilter_sjis_2004.h" #include "filters/mbfilter_sjis_mobile.h" #include "filters/mbfilter_sjis_mac.h" diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 1a0e65d95db..c1641c84c86 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -48,7 +48,6 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_open.h" #include "filters/mbfilter_sjis_mobile.h" #include "filters/mbfilter_sjis_mac.h" #include "filters/mbfilter_sjis_2004.h" @@ -114,7 +113,6 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_sjis, &mbfl_encoding_eucjp_win, &mbfl_encoding_eucjp2004, - &mbfl_encoding_sjis_open, &mbfl_encoding_sjis_docomo, &mbfl_encoding_sjis_kddi, &mbfl_encoding_sjis_sb, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index fe1b1be6921..c99b42dcc6e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -68,7 +68,6 @@ enum mbfl_no_encoding { mbfl_no_encoding_eucjp2004, mbfl_no_encoding_sjis, mbfl_no_encoding_eucjp_win, - mbfl_no_encoding_sjis_open, mbfl_no_encoding_sjis_docomo, mbfl_no_encoding_sjis_kddi, mbfl_no_encoding_sjis_sb, diff --git a/ext/mbstring/tests/mb_internal_encoding_variation2.phpt b/ext/mbstring/tests/mb_internal_encoding_variation2.phpt index dab306b12a2..842d8d519cf 100644 --- a/ext/mbstring/tests/mb_internal_encoding_variation2.phpt +++ b/ext/mbstring/tests/mb_internal_encoding_variation2.phpt @@ -176,10 +176,10 @@ string(9) "eucJP-win" -- Iteration 20 -- string(9) "eucJP-win" bool(true) -string(8) "SJIS-win" +string(5) "CP932" -- Iteration 21 -- -string(8) "SJIS-win" +string(5) "CP932" bool(true) string(11) "ISO-2022-JP"