1
0
mirror of https://github.com/php/php-src.git synced 2026-04-27 10:16:41 +02:00

ISO-2022-JP-2004 conversion: represent backslash and tilde as ASCII

This issue dates back to some commits I merged recently, which made encodings
like Shift-JIS-2004 use appropriate JIS X 0208 characters to represent
backslashes and tildes, rather than single-byte characters which are used in
those encodings with a different meaning (for example, in these encodings,
0x5C is used for a halfwidth Yen sign, rather than a backslash).

There was an unintended side effect: ISO-2022-JP-2004 was also made to
represent backslashes and tildes using JIS X 0208 characters. However,
ISO-2022-JP explicitly includes ASCII as one of its selectable character sets,
and ISO-2022-JP-2004 is just an extension of ISO-2022-JP. So when converting
text to ISO-2022-JP-2004, we can convert Unicode backslashes and tildes to ASCII
rather than using the corresponding JIS X 0208 characters.
This commit is contained in:
Alex Dowad
2021-01-13 21:28:50 +02:00
parent b429228420
commit 4d65c2a992
@@ -482,8 +482,8 @@ int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
return 0;
}
int
mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter)
{
int k;
int c1, c2, s1, s2;
@@ -548,6 +548,12 @@ retry:
}
}
if (s1 <= 0 && filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (c == 0x5C || c == 0x7E)) {
/* ISO-2022-JP-2004 can represent ASCII characters directly, so there is no need
* to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde */
s1 = c;
}
/* check for major japanese chars: U+4E00 - U+9FFF */
if (s1 <= 0) {
for (k=0; k < uni2jis_tbl_len ;k++) {