mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
Optimize strspn()
The current implementation uses a nested loop (for + goto), which has complexity O(|s1| * |s2|). If we instead use a lookup table, the complexity drops to O(|s1| + |s2|). This is conceptually the same strategy that common C library implementations such as glibc and musl use. The variation with a bitvector instead of a table also gives a speed-up, but the table variation was about 1.34x faster. On microbenchmarks this easily gave a 5x speedup. This can bring a 1.4-1.5% performance improvement in the Symfony benchmark. Closes GH-12431.
This commit is contained in:
@@ -145,3 +145,6 @@ PHP 8.4 UPGRADE NOTES
|
||||
* The performance of DOMNode::C14N() is greatly improved for the case without
|
||||
an xpath query. This can give a time improvement of easily two order of
|
||||
magnitude for documents with tens of thousands of nodes.
|
||||
|
||||
* The performance of strspn() is greatly improved. It now runs in linear time
|
||||
instead of being bounded by quadratic time.
|
||||
|
||||
@@ -1597,19 +1597,40 @@ PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
|
||||
/* }}} */
|
||||
|
||||
/* {{{ php_strspn */
|
||||
PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
|
||||
PHPAPI size_t php_strspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end)
|
||||
{
|
||||
const char *p = s1, *spanp;
|
||||
char c = *p;
|
||||
|
||||
cont:
|
||||
for (spanp = s2; p != s1_end && spanp != s2_end;) {
|
||||
if (*spanp++ == c) {
|
||||
c = *(++p);
|
||||
goto cont;
|
||||
/* Fast path for short strings.
|
||||
* The table lookup cannot be faster in this case because we not only have to compare, but also build the table.
|
||||
* We only compare in this case.
|
||||
* Empirically tested that the table lookup approach is only beneficial if characters is longer than 1 character. */
|
||||
if (characters_end - characters == 1) {
|
||||
const char *ptr = haystack;
|
||||
while (ptr < haystack_end && *ptr == *characters) {
|
||||
ptr++;
|
||||
}
|
||||
return ptr - haystack;
|
||||
}
|
||||
return (p - s1);
|
||||
|
||||
/* Every character in characters will set a boolean in this lookup table.
|
||||
* We'll use the lookup table as a fast lookup for the characters in characters while looping over haystack. */
|
||||
bool table[256];
|
||||
/* Use multiple small memsets to inline the memset with intrinsics, trick learned from glibc. */
|
||||
memset(table, 0, 64);
|
||||
memset(table + 64, 0, 64);
|
||||
memset(table + 128, 0, 64);
|
||||
memset(table + 192, 0, 64);
|
||||
|
||||
while (characters < characters_end) {
|
||||
table[(unsigned char) *characters] = true;
|
||||
characters++;
|
||||
}
|
||||
|
||||
const char *ptr = haystack;
|
||||
while (ptr < haystack_end && table[(unsigned char) *ptr]) {
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return ptr - haystack;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user