mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
2
NEWS
2
NEWS
@@ -2,6 +2,8 @@ PHP NEWS
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
?? ??? ????, PHP 8.3.0alpha3
|
||||
|
||||
- MBString:
|
||||
. Implement mb_str_pad() RFC. (nielsdos)
|
||||
|
||||
22 Jun 2023, PHP 8.3.0alpha2
|
||||
|
||||
|
||||
@@ -208,6 +208,10 @@ PHP 8.3 UPGRADE NOTES
|
||||
the given $depth and $options.
|
||||
RFC: https://wiki.php.net/rfc/json_validate
|
||||
|
||||
- MBString:
|
||||
. Added mb_str_pad(), which is the mbstring equivalent of str_pad().
|
||||
RFC: https://wiki.php.net/rfc/mb_str_pad
|
||||
|
||||
- Posix:
|
||||
. Added posix_sysconf call to get runtime informations.
|
||||
. Added posix_pathconf call to get configuration value from a directory/file.
|
||||
|
||||
@@ -5522,6 +5522,132 @@ PHP_FUNCTION(mb_chr)
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
PHP_FUNCTION(mb_str_pad)
|
||||
{
|
||||
zend_string *input, *encoding_str = NULL, *pad = NULL;
|
||||
zend_long pad_to_length;
|
||||
zend_long pad_type_val = PHP_STR_PAD_RIGHT;
|
||||
|
||||
ZEND_PARSE_PARAMETERS_START(2, 5)
|
||||
Z_PARAM_STR(input)
|
||||
Z_PARAM_LONG(pad_to_length)
|
||||
Z_PARAM_OPTIONAL
|
||||
Z_PARAM_STR(pad)
|
||||
Z_PARAM_LONG(pad_type_val)
|
||||
Z_PARAM_STR_OR_NULL(encoding_str)
|
||||
ZEND_PARSE_PARAMETERS_END();
|
||||
|
||||
const mbfl_encoding *encoding = php_mb_get_encoding(encoding_str, 5);
|
||||
if (!encoding) {
|
||||
RETURN_THROWS();
|
||||
}
|
||||
|
||||
size_t input_length = mb_get_strlen(input, encoding);
|
||||
|
||||
/* If resulting string turns out to be shorter than input string,
|
||||
we simply copy the input and return. */
|
||||
if (pad_to_length < 0 || (size_t)pad_to_length <= input_length) {
|
||||
RETURN_STR_COPY(input);
|
||||
}
|
||||
|
||||
if (ZSTR_LEN(pad) == 0) {
|
||||
zend_argument_value_error(3, "must be a non-empty string");
|
||||
RETURN_THROWS();
|
||||
}
|
||||
|
||||
if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
|
||||
zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
|
||||
RETURN_THROWS();
|
||||
}
|
||||
|
||||
size_t pad_length = mb_get_strlen(pad, encoding);
|
||||
|
||||
size_t num_mb_pad_chars = pad_to_length - input_length;
|
||||
|
||||
/* We need to figure out the left/right padding lengths. */
|
||||
size_t left_pad = 0, right_pad = 0; /* Initialize here to silence compiler warnings. */
|
||||
switch (pad_type_val) {
|
||||
case PHP_STR_PAD_RIGHT:
|
||||
right_pad = num_mb_pad_chars;
|
||||
break;
|
||||
|
||||
case PHP_STR_PAD_LEFT:
|
||||
left_pad = num_mb_pad_chars;
|
||||
break;
|
||||
|
||||
case PHP_STR_PAD_BOTH:
|
||||
left_pad = num_mb_pad_chars / 2;
|
||||
right_pad = num_mb_pad_chars - left_pad;
|
||||
break;
|
||||
}
|
||||
|
||||
/* How many full block copies need to happen, and how many characters are then left over? */
|
||||
size_t full_left_pad_copies = left_pad / pad_length;
|
||||
size_t full_right_pad_copies = right_pad / pad_length;
|
||||
size_t remaining_left_pad_chars = left_pad % pad_length;
|
||||
size_t remaining_right_pad_chars = right_pad % pad_length;
|
||||
|
||||
if (UNEXPECTED(full_left_pad_copies > SIZE_MAX / ZSTR_LEN(pad) || full_right_pad_copies > SIZE_MAX / ZSTR_LEN(pad))) {
|
||||
goto overflow_no_release;
|
||||
}
|
||||
|
||||
/* Compute the number of bytes required for the padding */
|
||||
size_t full_left_pad_bytes = full_left_pad_copies * ZSTR_LEN(pad);
|
||||
size_t full_right_pad_bytes = full_right_pad_copies * ZSTR_LEN(pad);
|
||||
|
||||
/* No special fast-path handling necessary for zero-length pads because these functions will not
|
||||
* allocate memory in case a zero-length pad is required. */
|
||||
zend_string *remaining_left_pad_str = mb_get_substr(pad, 0, remaining_left_pad_chars, encoding);
|
||||
zend_string *remaining_right_pad_str = mb_get_substr(pad, 0, remaining_right_pad_chars, encoding);
|
||||
|
||||
if (UNEXPECTED(full_left_pad_bytes > ZSTR_MAX_LEN - ZSTR_LEN(remaining_left_pad_str)
|
||||
|| full_right_pad_bytes > ZSTR_MAX_LEN - ZSTR_LEN(remaining_right_pad_str))) {
|
||||
goto overflow;
|
||||
}
|
||||
|
||||
size_t left_pad_bytes = full_left_pad_bytes + ZSTR_LEN(remaining_left_pad_str);
|
||||
size_t right_pad_bytes = full_right_pad_bytes + ZSTR_LEN(remaining_right_pad_str);
|
||||
|
||||
if (UNEXPECTED(left_pad_bytes > ZSTR_MAX_LEN - right_pad_bytes
|
||||
|| ZSTR_LEN(input) > ZSTR_MAX_LEN - left_pad_bytes - right_pad_bytes)) {
|
||||
goto overflow;
|
||||
}
|
||||
|
||||
zend_string *result = zend_string_alloc(ZSTR_LEN(input) + left_pad_bytes + right_pad_bytes, false);
|
||||
char *buffer = ZSTR_VAL(result);
|
||||
|
||||
/* First we pad the left. */
|
||||
for (size_t i = 0; i < full_left_pad_copies; i++, buffer += ZSTR_LEN(pad)) {
|
||||
memcpy(buffer, ZSTR_VAL(pad), ZSTR_LEN(pad));
|
||||
}
|
||||
memcpy(buffer, ZSTR_VAL(remaining_left_pad_str), ZSTR_LEN(remaining_left_pad_str));
|
||||
buffer += ZSTR_LEN(remaining_left_pad_str);
|
||||
|
||||
/* Then we copy the input string. */
|
||||
memcpy(buffer, ZSTR_VAL(input), ZSTR_LEN(input));
|
||||
buffer += ZSTR_LEN(input);
|
||||
|
||||
/* Finally, we pad on the right. */
|
||||
for (size_t i = 0; i < full_right_pad_copies; i++, buffer += ZSTR_LEN(pad)) {
|
||||
memcpy(buffer, ZSTR_VAL(pad), ZSTR_LEN(pad));
|
||||
}
|
||||
memcpy(buffer, ZSTR_VAL(remaining_right_pad_str), ZSTR_LEN(remaining_right_pad_str));
|
||||
|
||||
ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
|
||||
|
||||
zend_string_release_ex(remaining_left_pad_str, false);
|
||||
zend_string_release_ex(remaining_right_pad_str, false);
|
||||
|
||||
RETURN_NEW_STR(result);
|
||||
|
||||
overflow:
|
||||
zend_string_release_ex(remaining_left_pad_str, false);
|
||||
zend_string_release_ex(remaining_right_pad_str, false);
|
||||
overflow_no_release:
|
||||
zend_throw_error(NULL, "String size overflow");
|
||||
RETURN_THROWS();
|
||||
}
|
||||
|
||||
/* {{{ */
|
||||
PHP_FUNCTION(mb_scrub)
|
||||
{
|
||||
|
||||
@@ -183,6 +183,8 @@ function mb_ord(string $string, ?string $encoding = null): int|false {}
|
||||
|
||||
function mb_chr(int $codepoint, ?string $encoding = null): string|false {}
|
||||
|
||||
function mb_str_pad(string $string, int $length, string $pad_string = " ", int $pad_type = STR_PAD_RIGHT, ?string $encoding = null): string {}
|
||||
|
||||
#ifdef HAVE_MBREGEX
|
||||
/** @refcount 1 */
|
||||
function mb_regex_encoding(?string $encoding = null): string|bool {}
|
||||
|
||||
12
ext/mbstring/mbstring_arginfo.h
generated
12
ext/mbstring/mbstring_arginfo.h
generated
@@ -1,5 +1,5 @@
|
||||
/* This is a generated file, edit the .stub.php file instead.
|
||||
* Stub hash: 26a027093075613056921c4d1a7eee65d52ec5eb */
|
||||
* Stub hash: 141073d610f862b525406fb7f48ac58b6691080e */
|
||||
|
||||
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_language, 0, 0, MAY_BE_STRING|MAY_BE_BOOL)
|
||||
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, language, IS_STRING, 1, "null")
|
||||
@@ -198,6 +198,14 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_chr, 0, 1, MAY_BE_STRING|MAY_
|
||||
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_mb_str_pad, 0, 2, IS_STRING, 0)
|
||||
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
|
||||
ZEND_ARG_TYPE_INFO(0, length, IS_LONG, 0)
|
||||
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, pad_string, IS_STRING, 0, "\" \"")
|
||||
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, pad_type, IS_LONG, 0, "STR_PAD_RIGHT")
|
||||
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
#if defined(HAVE_MBREGEX)
|
||||
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_regex_encoding, 0, 0, MAY_BE_STRING|MAY_BE_BOOL)
|
||||
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
|
||||
@@ -346,6 +354,7 @@ ZEND_FUNCTION(mb_check_encoding);
|
||||
ZEND_FUNCTION(mb_scrub);
|
||||
ZEND_FUNCTION(mb_ord);
|
||||
ZEND_FUNCTION(mb_chr);
|
||||
ZEND_FUNCTION(mb_str_pad);
|
||||
#if defined(HAVE_MBREGEX)
|
||||
ZEND_FUNCTION(mb_regex_encoding);
|
||||
#endif
|
||||
@@ -440,6 +449,7 @@ static const zend_function_entry ext_functions[] = {
|
||||
ZEND_FE(mb_scrub, arginfo_mb_scrub)
|
||||
ZEND_FE(mb_ord, arginfo_mb_ord)
|
||||
ZEND_FE(mb_chr, arginfo_mb_chr)
|
||||
ZEND_FE(mb_str_pad, arginfo_mb_str_pad)
|
||||
#if defined(HAVE_MBREGEX)
|
||||
ZEND_FE(mb_regex_encoding, arginfo_mb_regex_encoding)
|
||||
#endif
|
||||
|
||||
138
ext/mbstring/tests/mb_str_pad.phpt
Normal file
138
ext/mbstring/tests/mb_str_pad.phpt
Normal file
@@ -0,0 +1,138 @@
|
||||
--TEST--
|
||||
mb_str_pad()
|
||||
--EXTENSIONS--
|
||||
mbstring
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
echo "--- Error conditions ---\n";
|
||||
try {
|
||||
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_RIGHT));
|
||||
} catch (ValueError $e) {
|
||||
var_dump($e->getMessage());
|
||||
}
|
||||
try {
|
||||
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_LEFT));
|
||||
} catch (ValueError $e) {
|
||||
var_dump($e->getMessage());
|
||||
}
|
||||
try {
|
||||
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_BOTH));
|
||||
} catch (ValueError $e) {
|
||||
var_dump($e->getMessage());
|
||||
}
|
||||
try {
|
||||
var_dump(mb_str_pad('▶▶', 6, ' ', 123456));
|
||||
} catch (ValueError $e) {
|
||||
var_dump($e->getMessage());
|
||||
}
|
||||
try {
|
||||
var_dump(mb_str_pad('▶▶', 6, ' ', STR_PAD_BOTH, 'unexisting'));
|
||||
} catch (ValueError $e) {
|
||||
var_dump($e->getMessage());
|
||||
}
|
||||
|
||||
echo "--- Simple ASCII strings ---\n";
|
||||
var_dump(mb_str_pad('Hello', 7, '+-', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('World', 10, '+-', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('Hello', 7, '+-', STR_PAD_LEFT));
|
||||
var_dump(mb_str_pad('World', 10, '+-', STR_PAD_LEFT));
|
||||
var_dump(mb_str_pad('Hello', 7, '+-', STR_PAD_RIGHT));
|
||||
var_dump(mb_str_pad('World', 10, '+-', STR_PAD_RIGHT));
|
||||
|
||||
echo "--- Edge cases pad length ---\n";
|
||||
var_dump(mb_str_pad('▶▶', 2, ' ', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('▶▶', 1, ' ', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('▶▶', 0, ' ', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('▶▶', -1, ' ', STR_PAD_BOTH));
|
||||
|
||||
echo "--- Empty input string ---\n";
|
||||
var_dump(mb_str_pad('', 2, ' ', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('', 1, ' ', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('', 0, ' ', STR_PAD_BOTH));
|
||||
var_dump(mb_str_pad('', -1, ' ', STR_PAD_BOTH));
|
||||
|
||||
echo "--- No default argument ---\n";
|
||||
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_RIGHT));
|
||||
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_LEFT));
|
||||
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_BOTH));
|
||||
|
||||
echo "--- UTF-8 emojis ---\n";
|
||||
for ($i = 6; $i > 0; $i--) {
|
||||
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_RIGHT));
|
||||
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_LEFT));
|
||||
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_BOTH));
|
||||
}
|
||||
|
||||
echo "--- UTF-8, 32, 7 test ---\n";
|
||||
|
||||
// Taken from mb_substr.phpt
|
||||
$utf8 = "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь";
|
||||
$utf32 = mb_convert_encoding($utf8, 'UTF-32', 'UTF-8');
|
||||
$utf7 = mb_convert_encoding($utf8, 'UTF-7', 'UTF-8');
|
||||
$tests = ["UTF-8" => $utf8, "UTF-32" => $utf32, "UTF-7" => $utf7];
|
||||
|
||||
foreach ($tests as $encoding => $test) {
|
||||
$pad_str = mb_convert_encoding('▶▶', $encoding, 'UTF-8');
|
||||
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_RIGHT, $encoding), 'UTF-8', $encoding));
|
||||
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_LEFT, $encoding), 'UTF-8', $encoding));
|
||||
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_BOTH, $encoding), 'UTF-8', $encoding));
|
||||
}
|
||||
?>
|
||||
--EXPECT--
|
||||
--- Error conditions ---
|
||||
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
|
||||
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
|
||||
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
|
||||
string(90) "mb_str_pad(): Argument #4 ($pad_type) must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH"
|
||||
string(82) "mb_str_pad(): Argument #5 ($encoding) must be a valid encoding, "unexisting" given"
|
||||
--- Simple ASCII strings ---
|
||||
string(7) "+Hello+"
|
||||
string(10) "+-World+-+"
|
||||
string(7) "+-Hello"
|
||||
string(10) "+-+-+World"
|
||||
string(7) "Hello+-"
|
||||
string(10) "World+-+-+"
|
||||
--- Edge cases pad length ---
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
--- Empty input string ---
|
||||
string(2) " "
|
||||
string(1) " "
|
||||
string(0) ""
|
||||
string(0) ""
|
||||
--- No default argument ---
|
||||
string(10) "▶▶ "
|
||||
string(10) " ▶▶"
|
||||
string(10) " ▶▶ "
|
||||
--- UTF-8 emojis ---
|
||||
string(18) "▶▶❤❓❇❤"
|
||||
string(18) "❤❓❇❤▶▶"
|
||||
string(18) "❤❓▶▶❤❓"
|
||||
string(15) "▶▶❤❓❇"
|
||||
string(15) "❤❓❇▶▶"
|
||||
string(15) "❤▶▶❤❓"
|
||||
string(12) "▶▶❤❓"
|
||||
string(12) "❤❓▶▶"
|
||||
string(12) "❤▶▶❤"
|
||||
string(9) "▶▶❤"
|
||||
string(9) "❤▶▶"
|
||||
string(9) "▶▶❤"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
string(6) "▶▶"
|
||||
--- UTF-8, 32, 7 test ---
|
||||
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
|
||||
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
|
||||
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
|
||||
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
|
||||
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
|
||||
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
|
||||
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
|
||||
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
|
||||
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
|
||||
Reference in New Issue
Block a user