mirror of
https://github.com/php/php-src.git
synced 2026-04-19 05:51:02 +02:00
grapheme_extract() converts UTF-8 string in the argument to UTF-16 to iterate through graphemes, and count each UTF-16 character as one Unicode character, which is not correct for UTF-16 surrogate pairs. The patch removes the conversion and counts UTF-8 directly if needed.
29 lines
876 B
PHP
29 lines
876 B
PHP
--TEST--
|
|
Bug #68447: grapheme_extract take an extra trailing character
|
|
--SKIPIF--
|
|
<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
|
|
--FILE--
|
|
<?php
|
|
$katsushikaku = "葛󠄁飾区";
|
|
echo grapheme_extract($katsushikaku, 1) . "\n";
|
|
|
|
$haiyore = "這󠄀いよれ";
|
|
echo grapheme_extract($haiyore, 1, GRAPHEME_EXTR_COUNT) . "\n";
|
|
echo grapheme_extract($haiyore, 2, GRAPHEME_EXTR_COUNT) . "\n";
|
|
echo grapheme_extract($haiyore, 6, GRAPHEME_EXTR_MAXBYTES) . "\n";
|
|
echo grapheme_extract($haiyore, 9, GRAPHEME_EXTR_MAXBYTES) . "\n";
|
|
echo grapheme_extract($haiyore, 12, GRAPHEME_EXTR_MAXBYTES) . "\n";
|
|
echo grapheme_extract($haiyore, 1, GRAPHEME_EXTR_MAXCHARS) . "\n";
|
|
echo grapheme_extract($haiyore, 2, GRAPHEME_EXTR_MAXCHARS) . "\n";
|
|
echo grapheme_extract($haiyore, 3, GRAPHEME_EXTR_MAXCHARS) . "\n";
|
|
--EXPECT--
|
|
葛󠄁
|
|
這󠄀
|
|
這󠄀い
|
|
|
|
這󠄀
|
|
這󠄀い
|
|
|
|
這󠄀
|
|
這󠄀い
|