mirror of
https://github.com/php/php-src.git
synced 2026-03-26 09:12:14 +01:00
This boosts the performance of mb_strpos, mb_stripos, mb_strrpos, mb_strripos, mb_strstr, mb_stristr, mb_strrchr, and mb_strrichr when used on non-UTF-8 strings. mb_substr is also faster. With UTF-8 input, there is no appreciable difference in performance for mb_strpos, mb_stripos, mb_strrpos, etc. This is expected, since the only real difference here (aside from shorter and simpler code) is that the new text conversion code is used when converting non-UTF-8 input strings to UTF-8. (This is done because internally, mb_strpos, etc. work only on UTF-8 text.) For ASCII, speed is boosted by 30-65%. For other legacy text encodings, the degree of performance improvement will depend on how slow the legacy conversion code was. One other minor, but notable difference is that strings encoded using UTF-8 variants from Japanese mobile vendors (SoftBank, KDDI, Docomo) will not undergo encoding conversion but will be processed "as is". It is expected that this will result in a large performance boost for such input strings; but realistically, the number of users who work with such strings is probably minute. I was not originally planning to include mb_substr in this commit, but fuzzing of the reimplemented mb_strstr revealed that mb_substr needed to be reimplemented, too; using the old mbfl_substr, which was based on the old text conversion filters, in combination with functions which use the new text conversion filters caused bugs. The performance boost for mb_substr varies from 10%-500%, depending on the encoding and input string used.
59 lines
1.8 KiB
PHP
59 lines
1.8 KiB
PHP
--TEST--
|
|
Test mb_strrchr() function : basic functionality
|
|
--EXTENSIONS--
|
|
mbstring
|
|
--FILE--
|
|
<?php
|
|
echo "*** Testing mb_strrchr() : basic functionality ***\n";
|
|
|
|
mb_internal_encoding('UTF-8');
|
|
|
|
$string_ascii = 'abc def';
|
|
//Japanese string in UTF-8
|
|
$string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
|
|
|
|
echo "\n-- ASCII string: needle exists --\n";
|
|
var_dump(bin2hex(mb_strrchr($string_ascii, 'd', false, 'ISO-8859-1')));
|
|
var_dump(bin2hex(mb_strrchr($string_ascii, 'd')));
|
|
var_dump(bin2hex(mb_strrchr($string_ascii, 'd', true)));
|
|
|
|
echo "\n-- ASCII string: needle doesn't exist --\n";
|
|
var_dump(mb_strrchr($string_ascii, '123'));
|
|
|
|
echo "\n-- Multibyte string: needle exists --\n";
|
|
$needle1 = base64_decode('5pel5pys6Kqe');
|
|
var_dump(bin2hex(mb_strrchr($string_mb, $needle1)));
|
|
var_dump(bin2hex(mb_strrchr($string_mb, $needle1, false, 'utf-8')));
|
|
var_dump(bin2hex(mb_strrchr($string_mb, $needle1, true)));
|
|
|
|
echo "\n-- Multibyte string: needle doesn't exist --\n";
|
|
$needle2 = base64_decode('44GT44KT44Gr44Gh44Gv44CB5LiW55WM');
|
|
var_dump(mb_strrchr($string_mb, $needle2));
|
|
|
|
echo "\n-- Regression tests --\n";
|
|
// Regression test from when mb_strrchr was being reimplemented
|
|
var_dump(mb_strrchr("\x00t\x00", "", false, "UTF32"));
|
|
|
|
?>
|
|
--EXPECT--
|
|
*** Testing mb_strrchr() : basic functionality ***
|
|
|
|
-- ASCII string: needle exists --
|
|
string(6) "646566"
|
|
string(6) "646566"
|
|
string(8) "61626320"
|
|
|
|
-- ASCII string: needle doesn't exist --
|
|
bool(false)
|
|
|
|
-- Multibyte string: needle exists --
|
|
string(106) "e697a5e69cace8aa9ee38386e382ade382b9e38388e381a7e38199e380823031323334efbc95efbc96efbc97efbc98efbc99e38082"
|
|
string(106) "e697a5e69cace8aa9ee38386e382ade382b9e38388e381a7e38199e380823031323334efbc95efbc96efbc97efbc98efbc99e38082"
|
|
string(0) ""
|
|
|
|
-- Multibyte string: needle doesn't exist --
|
|
bool(false)
|
|
|
|
-- Regression tests --
|
|
string(0) ""
|