1
0
mirror of https://github.com/php/php-src.git synced 2026-04-05 07:02:33 +02:00
Files
archived-php-src/ext/mbstring/tests/mb_convert_encoding.phpt
Alex Dowad 9308974f8c Deprecate use of mbstring to convert text to Base64/QPrint/HTML entities/etc
The purpose of mbstring is for working with Unicode and legacy text
encodings; but Base64, QPrint, etc. are not text encodings and don't
really belong in mbstring. PHP already contains separate implementations
of Base64, QPrint, and HTML entities. It will be better to eventually
remove these non-encodings from mbstring.

Regarding HTML entities... there is a bit more to say. mbstring's
implementation of HTML entities is different from the other built-in
implementation (htmlspecialchars and htmlentities). Those functions
convert <, >, and & to HTML entities, but mbstring does not.

It appears that the original author of mbstring intended for something
to be done with <, >, and &. He used a table to identify which
characters should be converted to HTML entities, and </>/& all have a
special value in that table. However, nothing ever checks for that
special value, so the characters are passed through unconverted.

This seems like a very useless implementation of HTML entities. The most
important characters which need to be expressed as entities in HTML
documents are those three!
2021-11-01 11:23:21 +02:00

142 lines
4.5 KiB
PHP

--TEST--
mb_convert_encoding()
--EXTENSIONS--
mbstring
--INI--
output_handler=
mbstring.language=Japanese
--FILE--
<?php
// TODO: Add more tests
// SJIS string (BASE64 encoded)
$sjis = base64_decode('k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==');
// JIS string (BASE64 encoded)
$jis = base64_decode('GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==');
// EUC-JP string
$euc_jp = "\xC6\xFC\xCB\xDC\xB8\xEC\xA5\xC6\xA5\xAD\xA5\xB9\xA5\xC8\xA4\xC7\xA4\xB9\xA1\xA301234\xA3\xB5\xA3\xB6\xA3\xB7\xA3\xB8\xA3\xB9\xA1\xA3";
// Test with single "form encoding"
// Note: For some reason it complains, results are different. Not researched.
echo "== BASIC TEST ==\n";
$s = $sjis;
$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', 'SJIS'));
print("EUC-JP: $s\n"); // EUC-JP
$s = $jis;
$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', 'JIS'));
print("EUC-JP: $s\n"); // EUC-JP
$s = $euc_jp;
$s = mb_convert_encoding($s, 'SJIS', 'EUC-JP');
print("SJIS: ".base64_encode($s)."\n"); // SJIS
$s = $euc_jp;
$s = mb_convert_encoding($s, 'JIS', 'EUC-JP');
print("JIS: ".base64_encode($s)."\n"); // JIS
// Using Encoding List Array
echo "== STRING ENCODING LIST ==\n";
$a = 'JIS,UTF-8,EUC-JP,SJIS';
$s = $jis;
$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', $a));
print("EUC-JP: $s\n"); // EUC-JP
$s = $euc_jp;
$s = mb_convert_encoding($s, 'SJIS', $a);
print("SJIS: ".base64_encode($s)."\n"); // SJIS
$s = $euc_jp;
$s = mb_convert_encoding($s, 'JIS', $a);
print("JIS: ".base64_encode($s)."\n"); // JIS
// Using Encoding List Array
echo "== ARRAY ENCODING LIST ==\n";
$a = array(0=>'JIS', 1=>'UTF-8', 2=>'EUC-JP', 3=>'SJIS');
$s = $jis;
$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', $a));
print("EUC-JP: $s\n"); // EUC-JP
$s = $euc_jp;
$s = mb_convert_encoding($s, 'SJIS', $a);
print("SJIS: ".base64_encode($s)."\n"); // SJIS
$s = $euc_jp;
$s = mb_convert_encoding($s, 'JIS', $a);
print("JIS: ".base64_encode($s)."\n"); // JIS
// Using Detect Order
echo "== DETECT ORDER ==\n";
$s = $jis;
$s = bin2hex(mb_convert_encoding($s, 'EUC-JP', 'auto'));
print("EUC-JP: $s\n"); // EUC-JP
$s = $euc_jp;
$s = mb_convert_encoding($s, 'SJIS', 'auto');
print("SJIS: ".base64_encode($s)."\n"); // SJIS
$s = $euc_jp;
$s = mb_convert_encoding($s, 'JIS', 'auto');
print("JIS: ".base64_encode($s)."\n"); // JIS
echo "== INVALID PARAMETER ==\n";
$s = mb_convert_encoding(1234, 'EUC-JP');
print("INT: $s\n");
$s = mb_convert_encoding('', 'EUC-JP');
print("EUC-JP: $s\n"); // SJIS
function tryBadConversion($str, $encoding) {
try {
var_dump(mb_convert_encoding($str, $encoding));
} catch (ValueError $e) {
echo $e->getMessage(), "\n";
}
}
tryBadConversion($euc_jp, 'BAD');
tryBadConversion('abc', 'Quoted-Printable');
tryBadConversion('abc', 'BASE64');
tryBadConversion('abc', 'HTML-ENTITIES');
?>
--EXPECTF--
== BASIC TEST ==
EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==
JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==
== STRING ENCODING LIST ==
EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==
JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==
== ARRAY ENCODING LIST ==
EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==
JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==
== DETECT ORDER ==
EUC-JP: c6fccbdcb8eca5c6a5ada5b9a5c8a4c7a4b9a1a33031323334a3b5a3b6a3b7a3b8a3b9a1a3
SJIS: k/qWe4zqg2WDTINYg2eCxYK3gUIwMTIzNIJUglWCVoJXgliBQg==
JIS: GyRCRnxLXDhsJUYlLSU5JUgkRyQ5ISMbKEIwMTIzNBskQiM1IzYjNyM4IzkhIxsoQg==
== INVALID PARAMETER ==
INT: 1234
EUC-JP:
mb_convert_encoding(): Argument #2 ($to_encoding) must be a valid encoding, "BAD" given
Deprecated: mb_convert_encoding(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s on line %d
string(3) "abc"
Deprecated: mb_convert_encoding(): Handling Base64 via mbstring is deprecated; use base64_encode/base64_decode instead in %s on line %d
string(4) "YWJj"
Deprecated: mb_convert_encoding(): Handling HTML entities via mbstring is deprecated; use htmlspecialchars, htmlentities, or mb_encode_numericentity/mb_decode_numericentity instead in %s on line %d
string(3) "abc"