mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
Improve performance of urldecode() and rawurldecode()
There are two hot spots on my machines:
1. We copy the string because the internal PHP API works in-place.
2. The conversion of hex characters is slow due to going through the C
locale handling.
This patch resolves the first hot spots by introducing 2 new internal
APIs that avoid the redundant copy and allocate an empty string upfront.
The second hotspot is resolved by having a specialised htoi handler.
For the following benchmark:
```php
$encoded = "Hello%20World%21+This%20is%20a%20test%3A%20%40%23%24%25%5E%26*%28%29";
for ($i=0;$i<2000000;$i++) {
rawurldecode($encoded);
urldecode($encoded);
}
```
On an i7-4790:
```
Benchmark 1: ./sapi/cli/php x.php
Time (mean ± σ): 364.8 ms ± 3.7 ms [User: 359.9 ms, System: 3.3 ms]
Range (min … max): 359.9 ms … 372.0 ms 10 runs
Benchmark 2: ./sapi/cli/php_old x.php
Time (mean ± σ): 565.5 ms ± 4.9 ms [User: 561.8 ms, System: 2.5 ms]
Range (min … max): 560.7 ms … 578.2 ms 10 runs
Summary
./sapi/cli/php x.php ran
1.55 ± 0.02 times faster than ./sapi/cli/php_old x.php
```
On an i7-1185G7:
```
Benchmark 1: ./sapi/cli/php x.php
Time (mean ± σ): 708.8 ms ± 6.1 ms [User: 701.4 ms, System: 6.3 ms]
Range (min … max): 701.9 ms … 722.3 ms 10 runs
Benchmark 2: ./sapi/cli/php_old x.php
Time (mean ± σ): 1.311 s ± 0.019 s [User: 1.300 s, System: 0.008 s]
Range (min … max): 1.281 s … 1.348 s 10 runs
Summary
./sapi/cli/php x.php ran
1.85 ± 0.03 times faster than ./sapi/cli/php_old x.php
```
Closes GH-18378.
This commit is contained in:
@@ -476,6 +476,7 @@ PHP 8.5 UPGRADE NOTES
|
||||
- Standard:
|
||||
. Improved performance of array functions with callbacks
|
||||
(array_find, array_filter, array_map, usort, ...).
|
||||
. Improved performance of urlencode() and rawurlencode().
|
||||
|
||||
- XMLReader:
|
||||
. Improved property access performance.
|
||||
|
||||
@@ -61,6 +61,10 @@ PHP 8.5 INTERNALS UPGRADE NOTES
|
||||
is still valid. This is useful when a GC cycle is collected and the
|
||||
database object can be destroyed prior to destroying the statement.
|
||||
|
||||
- ext/standard
|
||||
. Added php_url_decode_ex() and php_raw_url_decode_ex() that unlike their
|
||||
non-ex counterparts do not work in-place.
|
||||
|
||||
========================
|
||||
4. OpCode changes
|
||||
========================
|
||||
|
||||
@@ -411,21 +411,24 @@ done:
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* https://stackoverflow.com/questions/34365746/whats-the-fastest-way-to-convert-hex-to-integer-in-c */
|
||||
static unsigned int php_htoi_single(unsigned char x)
|
||||
{
|
||||
ZEND_ASSERT((x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F') || (x >= '0' && x <= '9'));
|
||||
return 9 * (x >> 6) + (x & 0xf);
|
||||
}
|
||||
|
||||
/* {{{ php_htoi */
|
||||
static int php_htoi(char *s)
|
||||
static int php_htoi(const char *s)
|
||||
{
|
||||
int value;
|
||||
int c;
|
||||
unsigned char c;
|
||||
|
||||
c = ((unsigned char *)s)[0];
|
||||
if (isupper(c))
|
||||
c = tolower(c);
|
||||
value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
|
||||
value = php_htoi_single(c) * 16;
|
||||
|
||||
c = ((unsigned char *)s)[1];
|
||||
if (isupper(c))
|
||||
c = tolower(c);
|
||||
value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
|
||||
value += php_htoi_single(c);
|
||||
|
||||
return (value);
|
||||
}
|
||||
@@ -572,28 +575,27 @@ PHP_FUNCTION(urldecode)
|
||||
Z_PARAM_STR(in_str)
|
||||
ZEND_PARSE_PARAMETERS_END();
|
||||
|
||||
out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
|
||||
ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
|
||||
out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
|
||||
ZSTR_LEN(out_str) = php_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));
|
||||
|
||||
RETURN_NEW_STR(out_str);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_url_decode */
|
||||
PHPAPI size_t php_url_decode(char *str, size_t len)
|
||||
PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len)
|
||||
{
|
||||
char *dest = str;
|
||||
char *data = str;
|
||||
char *dest_start = dest;
|
||||
const char *data = src;
|
||||
|
||||
while (len--) {
|
||||
while (src_len--) {
|
||||
if (*data == '+') {
|
||||
*dest = ' ';
|
||||
}
|
||||
else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
|
||||
else if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
|
||||
&& isxdigit((int) *(data + 2))) {
|
||||
*dest = (char) php_htoi(data + 1);
|
||||
data += 2;
|
||||
len -= 2;
|
||||
src_len -= 2;
|
||||
} else {
|
||||
*dest = *data;
|
||||
}
|
||||
@@ -601,7 +603,13 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
|
||||
dest++;
|
||||
}
|
||||
*dest = '\0';
|
||||
return dest - str;
|
||||
return dest - dest_start;
|
||||
}
|
||||
|
||||
/* {{{ php_url_decode */
|
||||
PHPAPI size_t php_url_decode(char *str, size_t len)
|
||||
{
|
||||
return php_url_decode_ex(str, str, len);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
@@ -634,25 +642,24 @@ PHP_FUNCTION(rawurldecode)
|
||||
Z_PARAM_STR(in_str)
|
||||
ZEND_PARSE_PARAMETERS_END();
|
||||
|
||||
out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
|
||||
ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
|
||||
out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
|
||||
ZSTR_LEN(out_str) = php_raw_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));
|
||||
|
||||
RETURN_NEW_STR(out_str);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_raw_url_decode */
|
||||
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
|
||||
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len)
|
||||
{
|
||||
char *dest = str;
|
||||
char *data = str;
|
||||
char *dest_start = dest;
|
||||
const char *data = src;
|
||||
|
||||
while (len--) {
|
||||
if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
|
||||
while (src_len--) {
|
||||
if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
|
||||
&& isxdigit((int) *(data + 2))) {
|
||||
*dest = (char) php_htoi(data + 1);
|
||||
data += 2;
|
||||
len -= 2;
|
||||
src_len -= 2;
|
||||
} else {
|
||||
*dest = *data;
|
||||
}
|
||||
@@ -660,7 +667,13 @@ PHPAPI size_t php_raw_url_decode(char *str, size_t len)
|
||||
dest++;
|
||||
}
|
||||
*dest = '\0';
|
||||
return dest - str;
|
||||
return dest - dest_start;
|
||||
}
|
||||
|
||||
/* {{{ php_raw_url_decode */
|
||||
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
|
||||
{
|
||||
return php_raw_url_decode_ex(str, str, len);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
@@ -33,7 +33,9 @@ PHPAPI php_url *php_url_parse(char const *str);
|
||||
PHPAPI php_url *php_url_parse_ex(char const *str, size_t length);
|
||||
PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port);
|
||||
PHPAPI size_t php_url_decode(char *str, size_t len); /* return value: length of decoded string */
|
||||
PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len);
|
||||
PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length of decoded string */
|
||||
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len);
|
||||
PHPAPI zend_string *php_url_encode(char const *s, size_t len);
|
||||
PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user