1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

[RFC] Implement mb_str_pad() (#11284)

Closes GH-10203.
This commit is contained in:
Niels Dossche
2023-06-20 21:22:04 +02:00
committed by GitHub
parent d9e2da342a
commit 68591632b2
6 changed files with 283 additions and 1 deletions

2
NEWS
View File

@@ -2,6 +2,8 @@ PHP NEWS
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
?? ??? ????, PHP 8.3.0alpha3
- MBString:
. Implement mb_str_pad() RFC. (nielsdos)
22 Jun 2023, PHP 8.3.0alpha2

View File

@@ -208,6 +208,10 @@ PHP 8.3 UPGRADE NOTES
the given $depth and $options.
RFC: https://wiki.php.net/rfc/json_validate
- MBString:
. Added mb_str_pad(), which is the mbstring equivalent of str_pad().
RFC: https://wiki.php.net/rfc/mb_str_pad
- Posix:
. Added posix_sysconf call to get runtime informations.
. Added posix_pathconf call to get configuration value from a directory/file.

View File

@@ -5522,6 +5522,132 @@ PHP_FUNCTION(mb_chr)
}
/* }}} */
PHP_FUNCTION(mb_str_pad)
{
zend_string *input, *encoding_str = NULL, *pad = NULL;
zend_long pad_to_length;
zend_long pad_type_val = PHP_STR_PAD_RIGHT;
ZEND_PARSE_PARAMETERS_START(2, 5)
Z_PARAM_STR(input)
Z_PARAM_LONG(pad_to_length)
Z_PARAM_OPTIONAL
Z_PARAM_STR(pad)
Z_PARAM_LONG(pad_type_val)
Z_PARAM_STR_OR_NULL(encoding_str)
ZEND_PARSE_PARAMETERS_END();
const mbfl_encoding *encoding = php_mb_get_encoding(encoding_str, 5);
if (!encoding) {
RETURN_THROWS();
}
size_t input_length = mb_get_strlen(input, encoding);
/* If resulting string turns out to be shorter than input string,
we simply copy the input and return. */
if (pad_to_length < 0 || (size_t)pad_to_length <= input_length) {
RETURN_STR_COPY(input);
}
if (ZSTR_LEN(pad) == 0) {
zend_argument_value_error(3, "must be a non-empty string");
RETURN_THROWS();
}
if (pad_type_val < PHP_STR_PAD_LEFT || pad_type_val > PHP_STR_PAD_BOTH) {
zend_argument_value_error(4, "must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH");
RETURN_THROWS();
}
size_t pad_length = mb_get_strlen(pad, encoding);
size_t num_mb_pad_chars = pad_to_length - input_length;
/* We need to figure out the left/right padding lengths. */
size_t left_pad = 0, right_pad = 0; /* Initialize here to silence compiler warnings. */
switch (pad_type_val) {
case PHP_STR_PAD_RIGHT:
right_pad = num_mb_pad_chars;
break;
case PHP_STR_PAD_LEFT:
left_pad = num_mb_pad_chars;
break;
case PHP_STR_PAD_BOTH:
left_pad = num_mb_pad_chars / 2;
right_pad = num_mb_pad_chars - left_pad;
break;
}
/* How many full block copies need to happen, and how many characters are then left over? */
size_t full_left_pad_copies = left_pad / pad_length;
size_t full_right_pad_copies = right_pad / pad_length;
size_t remaining_left_pad_chars = left_pad % pad_length;
size_t remaining_right_pad_chars = right_pad % pad_length;
if (UNEXPECTED(full_left_pad_copies > SIZE_MAX / ZSTR_LEN(pad) || full_right_pad_copies > SIZE_MAX / ZSTR_LEN(pad))) {
goto overflow_no_release;
}
/* Compute the number of bytes required for the padding */
size_t full_left_pad_bytes = full_left_pad_copies * ZSTR_LEN(pad);
size_t full_right_pad_bytes = full_right_pad_copies * ZSTR_LEN(pad);
/* No special fast-path handling necessary for zero-length pads because these functions will not
* allocate memory in case a zero-length pad is required. */
zend_string *remaining_left_pad_str = mb_get_substr(pad, 0, remaining_left_pad_chars, encoding);
zend_string *remaining_right_pad_str = mb_get_substr(pad, 0, remaining_right_pad_chars, encoding);
if (UNEXPECTED(full_left_pad_bytes > ZSTR_MAX_LEN - ZSTR_LEN(remaining_left_pad_str)
|| full_right_pad_bytes > ZSTR_MAX_LEN - ZSTR_LEN(remaining_right_pad_str))) {
goto overflow;
}
size_t left_pad_bytes = full_left_pad_bytes + ZSTR_LEN(remaining_left_pad_str);
size_t right_pad_bytes = full_right_pad_bytes + ZSTR_LEN(remaining_right_pad_str);
if (UNEXPECTED(left_pad_bytes > ZSTR_MAX_LEN - right_pad_bytes
|| ZSTR_LEN(input) > ZSTR_MAX_LEN - left_pad_bytes - right_pad_bytes)) {
goto overflow;
}
zend_string *result = zend_string_alloc(ZSTR_LEN(input) + left_pad_bytes + right_pad_bytes, false);
char *buffer = ZSTR_VAL(result);
/* First we pad the left. */
for (size_t i = 0; i < full_left_pad_copies; i++, buffer += ZSTR_LEN(pad)) {
memcpy(buffer, ZSTR_VAL(pad), ZSTR_LEN(pad));
}
memcpy(buffer, ZSTR_VAL(remaining_left_pad_str), ZSTR_LEN(remaining_left_pad_str));
buffer += ZSTR_LEN(remaining_left_pad_str);
/* Then we copy the input string. */
memcpy(buffer, ZSTR_VAL(input), ZSTR_LEN(input));
buffer += ZSTR_LEN(input);
/* Finally, we pad on the right. */
for (size_t i = 0; i < full_right_pad_copies; i++, buffer += ZSTR_LEN(pad)) {
memcpy(buffer, ZSTR_VAL(pad), ZSTR_LEN(pad));
}
memcpy(buffer, ZSTR_VAL(remaining_right_pad_str), ZSTR_LEN(remaining_right_pad_str));
ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
zend_string_release_ex(remaining_left_pad_str, false);
zend_string_release_ex(remaining_right_pad_str, false);
RETURN_NEW_STR(result);
overflow:
zend_string_release_ex(remaining_left_pad_str, false);
zend_string_release_ex(remaining_right_pad_str, false);
overflow_no_release:
zend_throw_error(NULL, "String size overflow");
RETURN_THROWS();
}
/* {{{ */
PHP_FUNCTION(mb_scrub)
{

View File

@@ -183,6 +183,8 @@ function mb_ord(string $string, ?string $encoding = null): int|false {}
function mb_chr(int $codepoint, ?string $encoding = null): string|false {}
function mb_str_pad(string $string, int $length, string $pad_string = " ", int $pad_type = STR_PAD_RIGHT, ?string $encoding = null): string {}
#ifdef HAVE_MBREGEX
/** @refcount 1 */
function mb_regex_encoding(?string $encoding = null): string|bool {}

View File

@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 26a027093075613056921c4d1a7eee65d52ec5eb */
* Stub hash: 141073d610f862b525406fb7f48ac58b6691080e */
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_language, 0, 0, MAY_BE_STRING|MAY_BE_BOOL)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, language, IS_STRING, 1, "null")
@@ -198,6 +198,14 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_chr, 0, 1, MAY_BE_STRING|MAY_
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_mb_str_pad, 0, 2, IS_STRING, 0)
ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0)
ZEND_ARG_TYPE_INFO(0, length, IS_LONG, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, pad_string, IS_STRING, 0, "\" \"")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, pad_type, IS_LONG, 0, "STR_PAD_RIGHT")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
ZEND_END_ARG_INFO()
#if defined(HAVE_MBREGEX)
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_regex_encoding, 0, 0, MAY_BE_STRING|MAY_BE_BOOL)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null")
@@ -346,6 +354,7 @@ ZEND_FUNCTION(mb_check_encoding);
ZEND_FUNCTION(mb_scrub);
ZEND_FUNCTION(mb_ord);
ZEND_FUNCTION(mb_chr);
ZEND_FUNCTION(mb_str_pad);
#if defined(HAVE_MBREGEX)
ZEND_FUNCTION(mb_regex_encoding);
#endif
@@ -440,6 +449,7 @@ static const zend_function_entry ext_functions[] = {
ZEND_FE(mb_scrub, arginfo_mb_scrub)
ZEND_FE(mb_ord, arginfo_mb_ord)
ZEND_FE(mb_chr, arginfo_mb_chr)
ZEND_FE(mb_str_pad, arginfo_mb_str_pad)
#if defined(HAVE_MBREGEX)
ZEND_FE(mb_regex_encoding, arginfo_mb_regex_encoding)
#endif

View File

@@ -0,0 +1,138 @@
--TEST--
mb_str_pad()
--EXTENSIONS--
mbstring
--FILE--
<?php
echo "--- Error conditions ---\n";
try {
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_RIGHT));
} catch (ValueError $e) {
var_dump($e->getMessage());
}
try {
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_LEFT));
} catch (ValueError $e) {
var_dump($e->getMessage());
}
try {
var_dump(mb_str_pad('▶▶', 6, '', STR_PAD_BOTH));
} catch (ValueError $e) {
var_dump($e->getMessage());
}
try {
var_dump(mb_str_pad('▶▶', 6, ' ', 123456));
} catch (ValueError $e) {
var_dump($e->getMessage());
}
try {
var_dump(mb_str_pad('▶▶', 6, ' ', STR_PAD_BOTH, 'unexisting'));
} catch (ValueError $e) {
var_dump($e->getMessage());
}
echo "--- Simple ASCII strings ---\n";
var_dump(mb_str_pad('Hello', 7, '+-', STR_PAD_BOTH));
var_dump(mb_str_pad('World', 10, '+-', STR_PAD_BOTH));
var_dump(mb_str_pad('Hello', 7, '+-', STR_PAD_LEFT));
var_dump(mb_str_pad('World', 10, '+-', STR_PAD_LEFT));
var_dump(mb_str_pad('Hello', 7, '+-', STR_PAD_RIGHT));
var_dump(mb_str_pad('World', 10, '+-', STR_PAD_RIGHT));
echo "--- Edge cases pad length ---\n";
var_dump(mb_str_pad('▶▶', 2, ' ', STR_PAD_BOTH));
var_dump(mb_str_pad('▶▶', 1, ' ', STR_PAD_BOTH));
var_dump(mb_str_pad('▶▶', 0, ' ', STR_PAD_BOTH));
var_dump(mb_str_pad('▶▶', -1, ' ', STR_PAD_BOTH));
echo "--- Empty input string ---\n";
var_dump(mb_str_pad('', 2, ' ', STR_PAD_BOTH));
var_dump(mb_str_pad('', 1, ' ', STR_PAD_BOTH));
var_dump(mb_str_pad('', 0, ' ', STR_PAD_BOTH));
var_dump(mb_str_pad('', -1, ' ', STR_PAD_BOTH));
echo "--- No default argument ---\n";
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_RIGHT));
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_LEFT));
var_dump(mb_str_pad('▶▶', 6, pad_type: STR_PAD_BOTH));
echo "--- UTF-8 emojis ---\n";
for ($i = 6; $i > 0; $i--) {
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_RIGHT));
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_LEFT));
var_dump(mb_str_pad('▶▶', $i, '❤❓❇', STR_PAD_BOTH));
}
echo "--- UTF-8, 32, 7 test ---\n";
// Taken from mb_substr.phpt
$utf8 = "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь";
$utf32 = mb_convert_encoding($utf8, 'UTF-32', 'UTF-8');
$utf7 = mb_convert_encoding($utf8, 'UTF-7', 'UTF-8');
$tests = ["UTF-8" => $utf8, "UTF-32" => $utf32, "UTF-7" => $utf7];
foreach ($tests as $encoding => $test) {
$pad_str = mb_convert_encoding('▶▶', $encoding, 'UTF-8');
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_RIGHT, $encoding), 'UTF-8', $encoding));
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_LEFT, $encoding), 'UTF-8', $encoding));
var_dump(mb_convert_encoding(mb_str_pad($test, 44, $pad_str, STR_PAD_BOTH, $encoding), 'UTF-8', $encoding));
}
?>
--EXPECT--
--- Error conditions ---
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
string(66) "mb_str_pad(): Argument #3 ($pad_string) must be a non-empty string"
string(90) "mb_str_pad(): Argument #4 ($pad_type) must be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH"
string(82) "mb_str_pad(): Argument #5 ($encoding) must be a valid encoding, "unexisting" given"
--- Simple ASCII strings ---
string(7) "+Hello+"
string(10) "+-World+-+"
string(7) "+-Hello"
string(10) "+-+-+World"
string(7) "Hello+-"
string(10) "World+-+-+"
--- Edge cases pad length ---
string(6) "▶▶"
string(6) "▶▶"
string(6) "▶▶"
string(6) "▶▶"
--- Empty input string ---
string(2) " "
string(1) " "
string(0) ""
string(0) ""
--- No default argument ---
string(10) "▶▶ "
string(10) " ▶▶"
string(10) " ▶▶ "
--- UTF-8 emojis ---
string(18) "▶▶❤❓❇❤"
string(18) "❤❓❇❤▶▶"
string(18) "❤❓▶▶❤❓"
string(15) "▶▶❤❓❇"
string(15) "❤❓❇▶▶"
string(15) "❤▶▶❤❓"
string(12) "▶▶❤❓"
string(12) "❤❓▶▶"
string(12) "❤▶▶❤"
string(9) "▶▶❤"
string(9) "❤▶▶"
string(9) "▶▶❤"
string(6) "▶▶"
string(6) "▶▶"
string(6) "▶▶"
string(6) "▶▶"
string(6) "▶▶"
string(6) "▶▶"
--- UTF-8, 32, 7 test ---
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"
string(92) "Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶▶"
string(92) "▶▶▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь"
string(92) "▶Σὲ γνωρίζω ἀπὸ τὴν κόψη Зарегистрируйтесь▶▶"