mirror of
https://github.com/php/php-src.git
synced 2026-04-27 10:16:41 +02:00
ISO-2022-JP-2004 conversion: represent backslash and tilde as ASCII
This issue dates back to some commits I merged recently, which made encodings like Shift-JIS-2004 use appropriate JIS X 0208 characters to represent backslashes and tildes, rather than single-byte characters which are used in those encodings with a different meaning (for example, in these encodings, 0x5C is used for a halfwidth Yen sign, rather than a backslash). There was an unintended side effect: ISO-2022-JP-2004 was also made to represent backslashes and tildes using JIS X 0208 characters. However, ISO-2022-JP explicitly includes ASCII as one of its selectable character sets, and ISO-2022-JP-2004 is just an extension of ISO-2022-JP. So when converting text to ISO-2022-JP-2004, we can convert Unicode backslashes and tildes to ASCII rather than using the corresponding JIS X 0208 characters.
This commit is contained in:
@@ -482,8 +482,8 @@ int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) {
|
||||
int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter)
|
||||
{
|
||||
int k;
|
||||
int c1, c2, s1, s2;
|
||||
|
||||
@@ -548,6 +548,12 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
if (s1 <= 0 && filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (c == 0x5C || c == 0x7E)) {
|
||||
/* ISO-2022-JP-2004 can represent ASCII characters directly, so there is no need
|
||||
* to use the JIS X 0208 REVERSE SOLIDUS for ASCII backslash, or WAVE DASH for tilde */
|
||||
s1 = c;
|
||||
}
|
||||
|
||||
/* check for major japanese chars: U+4E00 - U+9FFF */
|
||||
if (s1 <= 0) {
|
||||
for (k=0; k < uni2jis_tbl_len ;k++) {
|
||||
|
||||
Reference in New Issue
Block a user