mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
zend_compile: Optimize sprintf() into a rope (#14546)
* zend_compile: Add `zend_compile_rope_finalize()` This just extracts the implementation as-is into a dedicated function to make it reusable in preparation of a future commit. * zend_compile: Use clearer parameter names for `zend_compile_rope_finalize()` * zend_compile: Fix `zend_compile_rope_finalize()` for ropes containing a single constant string Without this Opcache will trigger a use-after-free in `zend_optimizer_compact_literals()`. Co-authored-by: Ilija Tovilo <ilija.tovilo@me.com> * zend_compile: Optimize `sprintf()` into a rope This optimization will compile `sprintf()` using only `%s` placeholders into a rope at compile time, effectively making those calls equivalent to the use of string interpolation, with the added benefit of supporting arbitrary expressions instead of just expressions starting with a `$`. For a synthetic test using: <?php $a = 'foo'; $b = 'bar'; for ($i = 0; $i < 100_000_000; $i++) { sprintf("%s-%s", $a, $b); } This optimization yields a 2.1× performance improvement: $ hyperfine 'sapi/cli/php -d zend_extension=php-src/modules/opcache.so -d opcache.enable_cli=1 test.php' \ '/tmp/unoptimized -d zend_extension=php-src/modules/opcache.so -d opcache.enable_cli=1 test.php' Benchmark 1: sapi/cli/php -d zend_extension=php-src/modules/opcache.so -d opcache.enable_cli=1 test.php Time (mean ± σ): 1.869 s ± 0.033 s [User: 1.865 s, System: 0.003 s] Range (min … max): 1.840 s … 1.945 s 10 runs Benchmark 2: /tmp/unoptimized -d zend_extension=php-src/modules/opcache.so -d opcache.enable_cli=1 test.php Time (mean ± σ): 4.011 s ± 0.034 s [User: 4.006 s, System: 0.005 s] Range (min … max): 3.964 s … 4.079 s 10 runs Summary sapi/cli/php -d zend_extension=php-src/modules/opcache.so -d opcache.enable_cli=1 test.php ran 2.15 ± 0.04 times faster than /tmp/unoptimized -d zend_extension=php-src/modules/opcache.so -d opcache.enable_cli=1 test.php This optimization comes with a small and probably insignificant behavioral change: If one of the values cannot be (cleanly) converted to a string, for example when attempting to insert an object that is not `Stringable`, the resulting Exception will naturally not show the `sprintf()` call in the resulting stack trace, because there is no call to `sprintf()`. Nevertheless it will correctly point out the line of the `sprintf()` call as the source of the Exception, pointing the user towards the correct location. * zend_compile: Eagerly handle empty format strings in `sprintf()` optimization * zend_compile: Add additional explanatory comments to zend_compile_func_sprintf() * Add zero-argument test to sprintf_rope_optimization_001.phpt --------- Co-authored-by: Ilija Tovilo <ilija.tovilo@me.com>
This commit is contained in:
@@ -4712,6 +4712,171 @@ static void zend_compile_ns_call(znode *result, znode *name_node, zend_ast *args
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
static zend_op *zend_compile_rope_add(znode *result, uint32_t num, znode *elem_node);
|
||||
static zend_op *zend_compile_rope_add_ex(zend_op *opline, znode *result, uint32_t num, znode *elem_node);
|
||||
static void zend_compile_rope_finalize(znode *result, uint32_t j, zend_op *init_opline, zend_op *opline);
|
||||
|
||||
static zend_result zend_compile_func_sprintf(znode *result, zend_ast_list *args) /* {{{ */
|
||||
{
|
||||
/* Bail out if we do not have a format string. */
|
||||
if (args->children < 1) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
zend_eval_const_expr(&args->child[0]);
|
||||
/* Bail out if the format string is not constant. */
|
||||
if (args->child[0]->kind != ZEND_AST_ZVAL) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
zval *format_string = zend_ast_get_zval(args->child[0]);
|
||||
if (Z_TYPE_P(format_string) != IS_STRING) {
|
||||
return FAILURE;
|
||||
}
|
||||
if (Z_STRLEN_P(format_string) >= 256) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
char *p;
|
||||
char *end;
|
||||
uint32_t string_placeholder_count;
|
||||
|
||||
string_placeholder_count = 0;
|
||||
p = Z_STRVAL_P(format_string);
|
||||
end = p + Z_STRLEN_P(format_string);
|
||||
|
||||
for (;;) {
|
||||
p = memchr(p, '%', end - p);
|
||||
if (!p) {
|
||||
break;
|
||||
}
|
||||
|
||||
char *q = p + 1;
|
||||
if (q == end) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
switch (*q) {
|
||||
case 's':
|
||||
string_placeholder_count++;
|
||||
break;
|
||||
case '%':
|
||||
break;
|
||||
default:
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
p = q;
|
||||
p++;
|
||||
}
|
||||
|
||||
/* Bail out if the number of placeholders does not match the number of values. */
|
||||
if (string_placeholder_count != (args->children - 1)) {
|
||||
return FAILURE;
|
||||
}
|
||||
|
||||
/* Handle empty format strings. */
|
||||
if (Z_STRLEN_P(format_string) == 0) {
|
||||
result->op_type = IS_CONST;
|
||||
ZVAL_EMPTY_STRING(&result->u.constant);
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
znode *elements = NULL;
|
||||
|
||||
if (string_placeholder_count > 0) {
|
||||
elements = safe_emalloc(sizeof(*elements), string_placeholder_count, 0);
|
||||
}
|
||||
|
||||
/* Compile the value expressions first for error handling that is consistent
|
||||
* with a function call: Values that fail to convert to a string may emit errors.
|
||||
*/
|
||||
for (uint32_t i = 0; i < string_placeholder_count; i++) {
|
||||
zend_compile_expr(elements + i, args->child[1 + i]);
|
||||
if (elements[i].op_type == IS_CONST) {
|
||||
if (Z_TYPE(elements[i].u.constant) != IS_ARRAY) {
|
||||
convert_to_string(&elements[i].u.constant);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rope_elements = 0;
|
||||
uint32_t rope_init_lineno = -1;
|
||||
zend_op *opline = NULL;
|
||||
|
||||
string_placeholder_count = 0;
|
||||
p = Z_STRVAL_P(format_string);
|
||||
end = p + Z_STRLEN_P(format_string);
|
||||
char *offset = p;
|
||||
for (;;) {
|
||||
p = memchr(p, '%', end - p);
|
||||
if (!p) {
|
||||
break;
|
||||
}
|
||||
|
||||
char *q = p + 1;
|
||||
ZEND_ASSERT(q < end);
|
||||
ZEND_ASSERT(*q == 's' || *q == '%');
|
||||
|
||||
if (*q == '%') {
|
||||
/* Optimization to not create a dedicated rope element for the literal '%':
|
||||
* Include the first '%' within the "constant" part instead of dropping the
|
||||
* full placeholder.
|
||||
*/
|
||||
p++;
|
||||
}
|
||||
|
||||
if (p != offset) {
|
||||
znode const_node;
|
||||
const_node.op_type = IS_CONST;
|
||||
ZVAL_STRINGL(&const_node.u.constant, offset, p - offset);
|
||||
if (rope_elements == 0) {
|
||||
rope_init_lineno = get_next_op_number();
|
||||
}
|
||||
opline = zend_compile_rope_add(result, rope_elements++, &const_node);
|
||||
}
|
||||
|
||||
if (*q == 's') {
|
||||
/* Perform the cast of constant arrays when actually evaluating corresponding placeholder
|
||||
* for correct error reporting.
|
||||
*/
|
||||
if (elements[string_placeholder_count].op_type == IS_CONST) {
|
||||
if (Z_TYPE(elements[string_placeholder_count].u.constant) == IS_ARRAY) {
|
||||
zend_emit_op_tmp(&elements[string_placeholder_count], ZEND_CAST, &elements[string_placeholder_count], NULL)->extended_value = IS_STRING;
|
||||
}
|
||||
}
|
||||
if (rope_elements == 0) {
|
||||
rope_init_lineno = get_next_op_number();
|
||||
}
|
||||
opline = zend_compile_rope_add(result, rope_elements++, &elements[string_placeholder_count]);
|
||||
|
||||
string_placeholder_count++;
|
||||
}
|
||||
|
||||
p = q;
|
||||
p++;
|
||||
offset = p;
|
||||
}
|
||||
if (end != offset) {
|
||||
/* Add the constant part after the last placeholder. */
|
||||
znode const_node;
|
||||
const_node.op_type = IS_CONST;
|
||||
ZVAL_STRINGL(&const_node.u.constant, offset, end - offset);
|
||||
if (rope_elements == 0) {
|
||||
rope_init_lineno = get_next_op_number();
|
||||
}
|
||||
opline = zend_compile_rope_add(result, rope_elements++, &const_node);
|
||||
}
|
||||
ZEND_ASSERT(opline != NULL);
|
||||
|
||||
zend_op *init_opline = CG(active_op_array)->opcodes + rope_init_lineno;
|
||||
zend_compile_rope_finalize(result, rope_elements, init_opline, opline);
|
||||
efree(elements);
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static zend_result zend_try_compile_special_func_ex(znode *result, zend_string *lcname, zend_ast_list *args, zend_function *fbc, uint32_t type) /* {{{ */
|
||||
{
|
||||
if (zend_string_equals_literal(lcname, "strlen")) {
|
||||
@@ -4778,6 +4943,8 @@ static zend_result zend_try_compile_special_func_ex(znode *result, zend_string *
|
||||
return zend_compile_func_array_slice(result, args);
|
||||
} else if (zend_string_equals_literal(lcname, "array_key_exists")) {
|
||||
return zend_compile_func_array_key_exists(result, args);
|
||||
} else if (zend_string_equals_literal(lcname, "sprintf")) {
|
||||
return zend_compile_func_sprintf(result, args);
|
||||
} else {
|
||||
return FAILURE;
|
||||
}
|
||||
@@ -10188,6 +10355,59 @@ static zend_op *zend_compile_rope_add(znode *result, uint32_t num, znode *elem_n
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
static void zend_compile_rope_finalize(znode *result, uint32_t rope_elements, zend_op *init_opline, zend_op *opline)
|
||||
{
|
||||
if (rope_elements == 1) {
|
||||
if (opline->op2_type == IS_CONST) {
|
||||
GET_NODE(result, opline->op2);
|
||||
ZVAL_UNDEF(CT_CONSTANT(opline->op2));
|
||||
SET_UNUSED(opline->op2);
|
||||
MAKE_NOP(opline);
|
||||
} else {
|
||||
opline->opcode = ZEND_CAST;
|
||||
opline->extended_value = IS_STRING;
|
||||
opline->op1_type = opline->op2_type;
|
||||
opline->op1 = opline->op2;
|
||||
SET_UNUSED(opline->op2);
|
||||
zend_make_tmp_result(result, opline);
|
||||
}
|
||||
} else if (rope_elements == 2) {
|
||||
opline->opcode = ZEND_FAST_CONCAT;
|
||||
opline->extended_value = 0;
|
||||
opline->op1_type = init_opline->op2_type;
|
||||
opline->op1 = init_opline->op2;
|
||||
zend_make_tmp_result(result, opline);
|
||||
MAKE_NOP(init_opline);
|
||||
} else {
|
||||
uint32_t var;
|
||||
|
||||
init_opline->extended_value = rope_elements;
|
||||
opline->opcode = ZEND_ROPE_END;
|
||||
zend_make_tmp_result(result, opline);
|
||||
var = opline->op1.var = get_temporary_variable();
|
||||
|
||||
/* Allocates the necessary number of zval slots to keep the rope */
|
||||
uint32_t i = ((rope_elements * sizeof(zend_string*)) + (sizeof(zval) - 1)) / sizeof(zval);
|
||||
while (i > 1) {
|
||||
get_temporary_variable();
|
||||
i--;
|
||||
}
|
||||
|
||||
/* Update all the previous opcodes to use the same variable */
|
||||
while (opline != init_opline) {
|
||||
opline--;
|
||||
if (opline->opcode == ZEND_ROPE_ADD &&
|
||||
opline->result.var == (uint32_t)-1) {
|
||||
opline->op1.var = var;
|
||||
opline->result.var = var;
|
||||
} else if (opline->opcode == ZEND_ROPE_INIT &&
|
||||
opline->result.var == (uint32_t)-1) {
|
||||
opline->result.var = var;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void zend_compile_encaps_list(znode *result, zend_ast *ast) /* {{{ */
|
||||
{
|
||||
uint32_t i, j;
|
||||
@@ -10263,53 +10483,7 @@ static void zend_compile_encaps_list(znode *result, zend_ast *ast) /* {{{ */
|
||||
opline = zend_compile_rope_add_ex(opline, result, j++, &last_const_node);
|
||||
}
|
||||
init_opline = CG(active_op_array)->opcodes + rope_init_lineno;
|
||||
if (j == 1) {
|
||||
if (opline->op2_type == IS_CONST) {
|
||||
GET_NODE(result, opline->op2);
|
||||
MAKE_NOP(opline);
|
||||
} else {
|
||||
opline->opcode = ZEND_CAST;
|
||||
opline->extended_value = IS_STRING;
|
||||
opline->op1_type = opline->op2_type;
|
||||
opline->op1 = opline->op2;
|
||||
SET_UNUSED(opline->op2);
|
||||
zend_make_tmp_result(result, opline);
|
||||
}
|
||||
} else if (j == 2) {
|
||||
opline->opcode = ZEND_FAST_CONCAT;
|
||||
opline->extended_value = 0;
|
||||
opline->op1_type = init_opline->op2_type;
|
||||
opline->op1 = init_opline->op2;
|
||||
zend_make_tmp_result(result, opline);
|
||||
MAKE_NOP(init_opline);
|
||||
} else {
|
||||
uint32_t var;
|
||||
|
||||
init_opline->extended_value = j;
|
||||
opline->opcode = ZEND_ROPE_END;
|
||||
zend_make_tmp_result(result, opline);
|
||||
var = opline->op1.var = get_temporary_variable();
|
||||
|
||||
/* Allocates the necessary number of zval slots to keep the rope */
|
||||
i = ((j * sizeof(zend_string*)) + (sizeof(zval) - 1)) / sizeof(zval);
|
||||
while (i > 1) {
|
||||
get_temporary_variable();
|
||||
i--;
|
||||
}
|
||||
|
||||
/* Update all the previous opcodes to use the same variable */
|
||||
while (opline != init_opline) {
|
||||
opline--;
|
||||
if (opline->opcode == ZEND_ROPE_ADD &&
|
||||
opline->result.var == (uint32_t)-1) {
|
||||
opline->op1.var = var;
|
||||
opline->result.var = var;
|
||||
} else if (opline->opcode == ZEND_ROPE_INIT &&
|
||||
opline->result.var == (uint32_t)-1) {
|
||||
opline->result.var = var;
|
||||
}
|
||||
}
|
||||
}
|
||||
zend_compile_rope_finalize(result, j, init_opline, opline);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
176
ext/standard/tests/strings/sprintf_rope_optimization_001.phpt
Normal file
176
ext/standard/tests/strings/sprintf_rope_optimization_001.phpt
Normal file
@@ -0,0 +1,176 @@
|
||||
--TEST--
|
||||
Test sprintf() function : Rope Optimization
|
||||
--FILE--
|
||||
<?php
|
||||
function func($str) {
|
||||
return strtoupper($str);
|
||||
}
|
||||
function sideeffect() {
|
||||
echo "Called!\n";
|
||||
return "foo";
|
||||
}
|
||||
class Foo {
|
||||
public function __construct() {
|
||||
echo "Called\n";
|
||||
}
|
||||
}
|
||||
|
||||
$a = "foo";
|
||||
$b = "bar";
|
||||
$c = new stdClass();
|
||||
|
||||
try {
|
||||
var_dump(sprintf("const"));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s", $a));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s/%s", $a, $b));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s/%s/%s", $a, $b));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s/%s/%s", $a, $b, $c));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s/", func("baz")));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("/%s", func("baz")));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("/%s/", func("baz")));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s%s%s%s", $a, $b, func("baz"), $a));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s/%s", sprintf("%s:%s", $a, $b), sprintf("%s-%s", func('baz'), func('baz'))));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf(sideeffect()));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s-%s-%s", __FILE__, __LINE__, 1));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
$values = range('a', 'z');
|
||||
var_dump(sprintf("%s%s%s", "{$values[0]}{$values[1]}{$values[2]}", "{$values[3]}{$values[4]}{$values[5]}", "{$values[6]}{$values[7]}{$values[8]}"));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s%s%s", new Foo(), new Foo(), new Foo(), ));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf(...));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf('%%s'));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf('%%s', 'test'));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf('%s-%s-%s', [], [], []));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf(""));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
try {
|
||||
var_dump(sprintf());
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
echo "Done";
|
||||
?>
|
||||
--EXPECTF--
|
||||
string(5) "const"
|
||||
|
||||
string(3) "foo"
|
||||
|
||||
string(7) "foo/bar"
|
||||
|
||||
ArgumentCountError: 4 arguments are required, 3 given in %s:32
|
||||
Stack trace:
|
||||
#0 %s(32): sprintf('%s/%s/%s', 'foo', 'bar')
|
||||
#1 {main}
|
||||
|
||||
Error: Object of class stdClass could not be converted to string in %s:36
|
||||
Stack trace:
|
||||
#0 {main}
|
||||
|
||||
string(4) "BAZ/"
|
||||
|
||||
string(4) "/BAZ"
|
||||
|
||||
string(5) "/BAZ/"
|
||||
|
||||
string(12) "foobarBAZfoo"
|
||||
|
||||
string(15) "foo:bar/BAZ-BAZ"
|
||||
|
||||
Called!
|
||||
string(3) "foo"
|
||||
|
||||
string(%d) "%ssprintf_rope_optimization_001.php-%d-1"
|
||||
|
||||
string(9) "abcdefghi"
|
||||
|
||||
Called
|
||||
Called
|
||||
Called
|
||||
Error: Object of class Foo could not be converted to string in %s:73
|
||||
Stack trace:
|
||||
#0 {main}
|
||||
|
||||
object(Closure)#3 (2) {
|
||||
["function"]=>
|
||||
string(7) "sprintf"
|
||||
["parameter"]=>
|
||||
array(2) {
|
||||
["$format"]=>
|
||||
string(10) "<required>"
|
||||
["$values"]=>
|
||||
string(10) "<optional>"
|
||||
}
|
||||
}
|
||||
|
||||
string(2) "%s"
|
||||
|
||||
string(2) "%s"
|
||||
|
||||
|
||||
Warning: Array to string conversion in %s on line 89
|
||||
|
||||
Warning: Array to string conversion in %s on line 89
|
||||
|
||||
Warning: Array to string conversion in %s on line 89
|
||||
string(17) "Array-Array-Array"
|
||||
|
||||
string(0) ""
|
||||
|
||||
ArgumentCountError: sprintf() expects at least 1 argument, 0 given in %s:97
|
||||
Stack trace:
|
||||
#0 %s(97): sprintf()
|
||||
#1 {main}
|
||||
|
||||
Done
|
||||
@@ -0,0 +1,26 @@
|
||||
--TEST--
|
||||
Test sprintf() function : Rope Optimization with a throwing error handler.
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
function exception_error_handler(int $errno, string $errstr, ?string $errfile, int $errline) {
|
||||
if (!(error_reporting() & $errno)) {
|
||||
// This error code is not included in error_reporting
|
||||
return;
|
||||
}
|
||||
throw new \ErrorException($errstr, 0, $errno, $errfile, $errline);
|
||||
}
|
||||
set_error_handler(exception_error_handler(...));
|
||||
|
||||
try {
|
||||
var_dump(sprintf("%s-%s", new stdClass(), []));
|
||||
} catch (\Throwable $e) {echo $e, PHP_EOL; } echo PHP_EOL;
|
||||
|
||||
echo "Done";
|
||||
?>
|
||||
--EXPECTF--
|
||||
Error: Object of class stdClass could not be converted to string in %s:13
|
||||
Stack trace:
|
||||
#0 {main}
|
||||
|
||||
Done
|
||||
Reference in New Issue
Block a user