diff --git a/UPGRADING b/UPGRADING index a34c0c758c4..5adf37fea82 100644 --- a/UPGRADING +++ b/UPGRADING @@ -145,3 +145,6 @@ PHP 8.4 UPGRADE NOTES * The performance of DOMNode::C14N() is greatly improved for the case without an xpath query. This can give a time improvement of easily two order of magnitude for documents with tens of thousands of nodes. + +* The performance of strspn() is greatly improved. It now runs in linear time + instead of being bounded by quadratic time. diff --git a/ext/standard/string.c b/ext/standard/string.c index 266f8c93c15..d709ff8cb4f 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1597,19 +1597,40 @@ PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len) /* }}} */ /* {{{ php_strspn */ -PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end) +PHPAPI size_t php_strspn(const char *haystack, const char *characters, const char *haystack_end, const char *characters_end) { - const char *p = s1, *spanp; - char c = *p; - -cont: - for (spanp = s2; p != s1_end && spanp != s2_end;) { - if (*spanp++ == c) { - c = *(++p); - goto cont; + /* Fast path for short strings. + * The table lookup cannot be faster in this case because we not only have to compare, but also build the table. + * We only compare in this case. + * Empirically tested that the table lookup approach is only beneficial if characters is longer than 1 character. */ + if (characters_end - characters == 1) { + const char *ptr = haystack; + while (ptr < haystack_end && *ptr == *characters) { + ptr++; } + return ptr - haystack; } - return (p - s1); + + /* Every character in characters will set a boolean in this lookup table. + * We'll use the lookup table as a fast lookup for the characters in characters while looping over haystack. */ + bool table[256]; + /* Use multiple small memsets to inline the memset with intrinsics, trick learned from glibc. */ + memset(table, 0, 64); + memset(table + 64, 0, 64); + memset(table + 128, 0, 64); + memset(table + 192, 0, 64); + + while (characters < characters_end) { + table[(unsigned char) *characters] = true; + characters++; + } + + const char *ptr = haystack; + while (ptr < haystack_end && table[(unsigned char) *ptr]) { + ptr++; + } + + return ptr - haystack; } /* }}} */