mirror of
https://github.com/php/php-src.git
synced 2026-04-24 00:18:23 +02:00
803cd824e5
* Fast path for when there is nothing to trim in mb_trim * Make mb_trim decide between linear search vs hash table lookup Using empirical experiments I noticed that on my i7-4790 the hash table approach becomes faster once we have more than 4 code points in the trim characters, when evaluated on the worst case. This patch changes the logic so that a hash table is used for a large number of trim characters, and linear search when the number of trim characters is <= 4.
139 lines
4.8 KiB
PHP
139 lines
4.8 KiB
PHP
--TEST--
|
||
mb_trim() function tests
|
||
--EXTENSIONS--
|
||
mbstring
|
||
--FILE--
|
||
<?php
|
||
mb_internal_encoding("UTF-8");
|
||
|
||
echo "== Copy from trim ==\n";
|
||
var_dump('ABC' === mb_trim('ABC'));
|
||
var_dump('ABC' === mb_ltrim('ABC'));
|
||
var_dump('ABC' === mb_rtrim('ABC'));
|
||
var_dump('ABC' === mb_trim(" \0\t\nABC \0\t\n"));
|
||
var_dump("ABC \0\t\n" === mb_ltrim(" \0\t\nABC \0\t\n"));
|
||
var_dump(" \0\t\nABC" === mb_rtrim(" \0\t\nABC \0\t\n"));
|
||
var_dump(" \0\t\nABC \0\t\n" === mb_trim(" \0\t\nABC \0\t\n",''));
|
||
var_dump(" \0\t\nABC \0\t\n" === mb_ltrim(" \0\t\nABC \0\t\n",''));
|
||
var_dump(" \0\t\nABC \0\t\n" === mb_rtrim(" \0\t\nABC \0\t\n",''));
|
||
echo "== Empty string ==\n";
|
||
var_dump(mb_trim(""));
|
||
var_dump(mb_ltrim(""));
|
||
var_dump(mb_rtrim(""));
|
||
|
||
echo "== Single string ==\n";
|
||
var_dump(mb_ltrim(' test ', ''));
|
||
var_dump(mb_trim(" あいうえおあお ", " ", "UTF-8"));
|
||
var_dump(mb_trim('foo BAR Spaß', 'ß', "UTF-8"));
|
||
var_dump(mb_trim('foo BAR Spaß', 'f', "UTF-8"));
|
||
|
||
echo "== Multi strings ==\n";
|
||
var_dump(mb_trim('foo BAR Spaß', 'ßf', "UTF-8"));
|
||
var_dump(mb_trim('foo BAR Spaß', 'fß', "UTF-8"));
|
||
var_dump(mb_trim(" あいうおえお あ", " あ", "UTF-8"));
|
||
var_dump(mb_trim(" あいうおえお あ", "あ ", "UTF-8"));
|
||
var_dump(mb_trim(" あいうおえお a", "あa", "UTF-8"));
|
||
var_dump(mb_trim(" あいうおえお a", "\xe3", "UTF-8"));
|
||
|
||
echo "== Many strings ==\n";
|
||
var_dump(mb_trim(str_repeat(" ", 129)));
|
||
var_dump(mb_trim(str_repeat(" ", 129) . "a"));
|
||
var_dump(mb_rtrim(str_repeat(" ", 129) . "a"));
|
||
|
||
echo "== Very long trim characters ==\n";
|
||
$trim_chars = "";
|
||
for ($i = 1024; $i < 2048; $i++) {
|
||
$trim_chars .= mb_chr($i);
|
||
}
|
||
var_dump(mb_trim($trim_chars . "hello" . $trim_chars, $trim_chars));
|
||
var_dump(strlen(mb_ltrim($trim_chars . "hello" . $trim_chars, $trim_chars)));
|
||
var_dump(strlen(mb_rtrim($trim_chars . "hello" . $trim_chars, $trim_chars)));
|
||
|
||
echo "== mb_ltrim ==\n";
|
||
var_dump(mb_ltrim("あああああああああああああああああああああああああああああああああいああああ", "あ"));
|
||
echo "== mb_rtrim ==\n";
|
||
var_dump(mb_rtrim("あああああああああああああああああああああああああああああああああいああああ", "あ"));
|
||
|
||
echo "== default params ==\n";
|
||
var_dump(mb_trim(" \f\n\r\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}"));
|
||
|
||
echo "== Byte Order Mark ==\n";
|
||
var_dump(mb_ltrim("\u{FFFE}漢字", "\u{FFFE}\u{FEFF}"));
|
||
var_dump(bin2hex(mb_ltrim(mb_convert_encoding("\u{FFFE}漢字", "UTF-16LE", "UTF-8"), mb_convert_encoding("\u{FFFE}\u{FEFF}", "UTF-16LE", "UTF-8"), "UTF-16LE")));
|
||
var_dump(bin2hex(mb_ltrim(mb_convert_encoding("\u{FEFF}漢字", "UTF-16BE", "UTF-8"), mb_convert_encoding("\u{FFFE}\u{FEFF}", "UTF-16BE", "UTF-8"), "UTF-16BE")));
|
||
|
||
echo "== Empty string ==\n";
|
||
var_dump(mb_trim(" abcd ", ""));
|
||
var_dump(mb_ltrim(" abcd ", ""));
|
||
var_dump(mb_rtrim(" abcd ", ""));
|
||
|
||
echo "== SJIS ==\n";
|
||
var_dump(mb_convert_encoding(mb_trim("\x81\x40\x82\xa0\x81\x40", "\x81\x40", "SJIS"), "UTF-8", "SJIS"));
|
||
|
||
echo "== Same strings ==\n";
|
||
var_dump(mb_trim("foo", "oo"));
|
||
|
||
echo "== \$encoding throws ValueError ==\n";
|
||
try {
|
||
var_dump(mb_trim( "\u{180F}", "", "NULL"));
|
||
} catch (ValueError $e) {
|
||
var_dump($e->getMessage());
|
||
}
|
||
|
||
?>
|
||
--EXPECT--
|
||
== Copy from trim ==
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
bool(true)
|
||
== Empty string ==
|
||
string(0) ""
|
||
string(0) ""
|
||
string(0) ""
|
||
== Single string ==
|
||
string(6) " test "
|
||
string(21) "あいうえおあお"
|
||
string(11) "foo BAR Spa"
|
||
string(12) "oo BAR Spaß"
|
||
== Multi strings ==
|
||
string(10) "oo BAR Spa"
|
||
string(10) "oo BAR Spa"
|
||
string(16) "いうおえお "
|
||
string(16) "いうおえお "
|
||
string(25) " あいうおえお "
|
||
string(26) " あいうおえお a"
|
||
== Many strings ==
|
||
string(0) ""
|
||
string(1) "a"
|
||
string(388) " a"
|
||
== Very long trim characters ==
|
||
string(5) "hello"
|
||
int(2053)
|
||
int(2053)
|
||
== mb_ltrim ==
|
||
string(15) "いああああ"
|
||
== mb_rtrim ==
|
||
string(102) "あああああああああああああああああああああああああああああああああい"
|
||
== default params ==
|
||
string(0) ""
|
||
== Byte Order Mark ==
|
||
string(6) "漢字"
|
||
string(8) "226f575b"
|
||
string(8) "6f225b57"
|
||
== Empty string ==
|
||
string(6) " abcd "
|
||
string(6) " abcd "
|
||
string(6) " abcd "
|
||
== SJIS ==
|
||
string(3) "あ"
|
||
== Same strings ==
|
||
string(1) "f"
|
||
== $encoding throws ValueError ==
|
||
string(73) "mb_trim(): Argument #3 ($encoding) must be a valid encoding, "NULL" given"
|