From d497c0e96f0347707df717aba047e059658f3f68 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Mon, 4 Jan 2021 22:03:21 +0200 Subject: [PATCH] JIS7/JIS8 encoding: use JISX0201 for U+203E (overline) In other legacy Japanese encodings like Shift-JIS, we are now using a specific JISX 0208 character for the Unicode overline (U+203E). Previously, the single byte 0x7E was used, but an ASCII 0x7E does not represent an overline, so this was changed. However, JIS7/JIS8 can represent characters in the JISX 0201 character set as well. That character set also includes an overline character, which takes less bytes to encode than the corresponding JISX 0208 character, so we'll use it. This is what mbstring had been doing for a long time; but it changed as a side effect of the recent changes to how U+203E is encoded in Shift-JIS, etc. So change it back. --- ext/mbstring/libmbfl/filters/mbfilter_jis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index 2eeae28f824..2fd58872b67 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -278,6 +278,8 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { @@ -288,8 +290,6 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) if (s <= 0) { if (c == 0xa5) { /* YEN SIGN */ s = 0x1005c; - } else if (c == 0x203e) { /* OVER LINE */ - s = 0x1007e; } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ s = 0x2140; } else if (c == 0x2225) { /* PARALLEL TO */