mirror of
https://github.com/php/php-src.git
synced 2026-03-26 17:22:15 +01:00
Faster strrpos implementation
This commit is contained in:
@@ -54,7 +54,8 @@ PHP X.Y UPGRADE NOTES
|
||||
. zend_function.common.num_args don't include the variadic argument anymore.
|
||||
. ob_start() no longer issues an E_ERROR, but instead an E_RECOVERABLE_ERROR in case an
|
||||
output buffer is created in an output buffer handler.
|
||||
. Add zend_memnstr_ex, which is based on string matching sunday algo.
|
||||
. Added zend_memnstr_ex, which is based on string matching sunday algo.
|
||||
. Added zend_memnrstr, zend_memnrstr_ex.
|
||||
|
||||
- DBA
|
||||
. dba_delete() now returns false if the key was not found for the inifile
|
||||
|
||||
@@ -2763,54 +2763,96 @@ process_double:
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
static zend_always_inline void zend_memstr_ex_pre(unsigned int td[], const char *needle, size_t needle_len) /* {{{ */ {
|
||||
/*
|
||||
* String matching - Sunday algorithm
|
||||
* http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
|
||||
*/
|
||||
static zend_always_inline void zend_memnstr_ex_pre(unsigned int td[], const char *needle, size_t needle_len, int reverse) /* {{{ */ {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
td[i] = needle_len + 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < needle_len; i++) {
|
||||
td[(unsigned char)needle[i]] = (int)needle_len - i;
|
||||
if (reverse) {
|
||||
for (i = needle_len - 1; i >= 0; i--) {
|
||||
td[(unsigned char)needle[i]] = i + 1;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < needle_len; i++) {
|
||||
td[(unsigned char)needle[i]] = (int)needle_len - i;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
* String matching - Sunday algorithm
|
||||
* http://www.iti.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
|
||||
*/
|
||||
ZEND_API const char* zend_memnstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end) /* {{{ */
|
||||
{
|
||||
unsigned int td[256];
|
||||
register size_t i;
|
||||
const unsigned register char *p;
|
||||
register const char *p;
|
||||
|
||||
if (needle_len == 0 || (end - haystack) == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zend_memstr_ex_pre(td, needle, needle_len);
|
||||
zend_memnstr_ex_pre(td, needle, needle_len, 0);
|
||||
|
||||
p = (const unsigned char *)haystack;
|
||||
p = haystack;
|
||||
end -= needle_len;
|
||||
|
||||
while (p <= (unsigned char *)end) {
|
||||
while (p <= end) {
|
||||
for (i = 0; i < needle_len; i++) {
|
||||
if (needle[i] != p[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == needle_len) {
|
||||
return (const char *)p;
|
||||
return p;
|
||||
}
|
||||
p += td[p[needle_len]];
|
||||
p += td[(unsigned char)(p[needle_len])];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
ZEND_API const char* zend_memnrstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end) /* {{{ */
|
||||
{
|
||||
unsigned int td[256];
|
||||
register size_t i;
|
||||
register const char *p;
|
||||
|
||||
if (needle_len == 0 || (end - haystack) == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
zend_memnstr_ex_pre(td, needle, needle_len, 1);
|
||||
|
||||
p = end;
|
||||
p -= needle_len;
|
||||
|
||||
while (p >= haystack) {
|
||||
for (i = 0; i < needle_len; i++) {
|
||||
if (needle[i] != p[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == needle_len) {
|
||||
return (const char *)p;
|
||||
}
|
||||
|
||||
if (p == haystack) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
p -= td[(unsigned char)(p[-1])];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
|
||||
@@ -88,6 +88,7 @@ ZEND_API zend_bool instanceof_function(const zend_class_entry *instance_ce, cons
|
||||
ZEND_API zend_uchar _is_numeric_string_ex(const char *str, size_t length, zend_long *lval, double *dval, int allow_errors, int *oflow_info);
|
||||
|
||||
ZEND_API const char* zend_memnstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end);
|
||||
ZEND_API const char* zend_memnrstr_ex(const char *haystack, const char *needle, size_t needle_len, char *end);
|
||||
|
||||
END_EXTERN_C()
|
||||
|
||||
@@ -174,11 +175,12 @@ zend_memnstr(const char *haystack, const char *needle, size_t needle_len, char *
|
||||
size_t off_s;
|
||||
|
||||
if (needle_len == 1) {
|
||||
return (char *)memchr(p, *needle, (end-p));
|
||||
return (const char *)memchr(p, *needle, (end-p));
|
||||
}
|
||||
|
||||
off_p = end - haystack;
|
||||
off_s = (off_p > 0) ? (size_t)off_p : 0;
|
||||
|
||||
if (needle_len > off_s) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -187,7 +189,7 @@ zend_memnstr(const char *haystack, const char *needle, size_t needle_len, char *
|
||||
end -= needle_len;
|
||||
|
||||
while (p <= end) {
|
||||
if ((p = (char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
|
||||
if ((p = (const char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
|
||||
if (!memcmp(needle, p, needle_len-1)) {
|
||||
return p;
|
||||
}
|
||||
@@ -209,7 +211,6 @@ zend_memnstr(const char *haystack, const char *needle, size_t needle_len, char *
|
||||
static zend_always_inline const void *zend_memrchr(const void *s, int c, size_t n)
|
||||
{
|
||||
register const unsigned char *e;
|
||||
|
||||
if (n <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -219,10 +220,46 @@ static zend_always_inline const void *zend_memrchr(const void *s, int c, size_t
|
||||
return (const void *)e;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static zend_always_inline const char *
|
||||
zend_memnrstr(const char *haystack, const char *needle, size_t needle_len, char *end)
|
||||
{
|
||||
const char *p = end;
|
||||
const char ne = needle[needle_len-1];
|
||||
ptrdiff_t off_p;
|
||||
size_t off_s;
|
||||
|
||||
if (needle_len == 1) {
|
||||
return (const char *)zend_memrchr(haystack, *needle, (p - haystack));
|
||||
}
|
||||
|
||||
off_p = end - haystack;
|
||||
off_s = (off_p > 0) ? (size_t)off_p : 0;
|
||||
|
||||
if (needle_len > off_s) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (EXPECTED(off_s < 1024 || needle_len < 3)) {
|
||||
p -= needle_len;
|
||||
|
||||
do {
|
||||
if ((p = (const char *)zend_memrchr(haystack, *needle, (p - haystack) + 1)) && ne == p[needle_len-1]) {
|
||||
if (!memcmp(needle, p, needle_len - 1)) {
|
||||
return p;
|
||||
}
|
||||
}
|
||||
} while (p-- >= haystack);
|
||||
|
||||
return NULL;
|
||||
} else {
|
||||
return zend_memnrstr_ex(haystack, needle, needle_len, end);
|
||||
}
|
||||
}
|
||||
|
||||
BEGIN_EXTERN_C()
|
||||
ZEND_API int increment_function(zval *op1);
|
||||
ZEND_API int decrement_function(zval *op2);
|
||||
|
||||
@@ -1929,9 +1929,9 @@ PHP_FUNCTION(stripos)
|
||||
char *found = NULL;
|
||||
zend_string *haystack;
|
||||
zend_long offset = 0;
|
||||
char *needle_dup = NULL, *haystack_dup;
|
||||
char needle_char[2];
|
||||
zval *needle;
|
||||
zend_string *needle_dup = NULL, *haystack_dup;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sz|l", &haystack, &needle, &offset) == FAILURE) {
|
||||
return;
|
||||
@@ -1946,40 +1946,38 @@ PHP_FUNCTION(stripos)
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
haystack_dup = estrndup(haystack->val, haystack->len);
|
||||
php_strtolower(haystack_dup, haystack->len);
|
||||
|
||||
if (Z_TYPE_P(needle) == IS_STRING) {
|
||||
if (Z_STRLEN_P(needle) == 0 || Z_STRLEN_P(needle) > haystack->len) {
|
||||
efree(haystack_dup);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
needle_dup = estrndup(Z_STRVAL_P(needle), Z_STRLEN_P(needle));
|
||||
php_strtolower(needle_dup, Z_STRLEN_P(needle));
|
||||
found = (char*)php_memnstr(haystack_dup + offset, needle_dup, Z_STRLEN_P(needle), haystack_dup + haystack->len);
|
||||
haystack_dup = php_string_tolower(haystack);
|
||||
needle_dup = php_string_tolower(Z_STR_P(needle));
|
||||
found = (char*)php_memnstr(haystack_dup->val + offset,
|
||||
needle_dup->val, needle_dup->len, haystack_dup->val + haystack->len);
|
||||
} else {
|
||||
if (php_needle_char(needle, needle_char) != SUCCESS) {
|
||||
efree(haystack_dup);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
haystack_dup = php_string_tolower(haystack);
|
||||
needle_char[0] = tolower(needle_char[0]);
|
||||
needle_char[1] = '\0';
|
||||
found = (char*)php_memnstr(haystack_dup + offset,
|
||||
found = (char*)php_memnstr(haystack_dup->val + offset,
|
||||
needle_char,
|
||||
sizeof(needle_char) - 1,
|
||||
haystack_dup + haystack->len);
|
||||
haystack_dup->val + haystack->len);
|
||||
}
|
||||
|
||||
efree(haystack_dup);
|
||||
if (needle_dup) {
|
||||
efree(needle_dup);
|
||||
}
|
||||
|
||||
if (found) {
|
||||
RETURN_LONG(found - haystack_dup);
|
||||
RETVAL_LONG(found - haystack_dup->val);
|
||||
} else {
|
||||
RETURN_FALSE;
|
||||
RETVAL_FALSE;
|
||||
}
|
||||
|
||||
zend_string_release(haystack_dup);
|
||||
if (needle_dup) {
|
||||
zend_string_release(needle_dup);
|
||||
}
|
||||
}
|
||||
/* }}} */
|
||||
@@ -1994,6 +1992,7 @@ PHP_FUNCTION(strrpos)
|
||||
size_t needle_len;
|
||||
zend_long offset = 0;
|
||||
char *p, *e, ord_needle[2];
|
||||
char *found;
|
||||
|
||||
#ifndef FAST_ZPP
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sz|l", &haystack, &zneedle, &offset) == FAILURE) {
|
||||
@@ -2030,37 +2029,22 @@ PHP_FUNCTION(strrpos)
|
||||
RETURN_FALSE;
|
||||
}
|
||||
p = haystack->val + (size_t)offset;
|
||||
e = haystack->val + haystack->len - needle_len;
|
||||
e = haystack->val + haystack->len;
|
||||
} else {
|
||||
if (offset < -INT_MAX || (size_t)(-offset) > haystack->len) {
|
||||
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
p = haystack->val;
|
||||
if (needle_len > (size_t)(-offset)) {
|
||||
e = haystack->val + haystack->len - needle_len;
|
||||
if (haystack->len + (size_t)offset >= needle_len) {
|
||||
e = haystack->val + haystack->len + (size_t)offset + needle_len;
|
||||
} else {
|
||||
e = haystack->val + haystack->len + offset;
|
||||
e = haystack->val + haystack->len;
|
||||
}
|
||||
}
|
||||
|
||||
if (needle_len == 1) {
|
||||
/* Single character search can shortcut memcmps */
|
||||
while (e >= p) {
|
||||
if (*e == *needle) {
|
||||
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
|
||||
}
|
||||
e--;
|
||||
}
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
while (e >= p) {
|
||||
if (memcmp(e, needle, needle_len) == 0) {
|
||||
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
|
||||
}
|
||||
e--;
|
||||
if ((found = (char *)zend_memnrstr(p, needle, needle_len, e))) {
|
||||
RETURN_LONG(found - haystack->val);
|
||||
}
|
||||
|
||||
RETURN_FALSE;
|
||||
@@ -2072,103 +2056,105 @@ PHP_FUNCTION(strrpos)
|
||||
PHP_FUNCTION(strripos)
|
||||
{
|
||||
zval *zneedle;
|
||||
char *needle;
|
||||
zend_string *needle;
|
||||
zend_string *haystack;
|
||||
size_t needle_len;
|
||||
zend_long offset = 0;
|
||||
char *p, *e, ord_needle[2];
|
||||
char *needle_dup, *haystack_dup;
|
||||
char *p, *e;
|
||||
char *found;
|
||||
zend_string *needle_dup, *haystack_dup, *ord_needle = NULL;
|
||||
ALLOCA_FLAG(use_heap);
|
||||
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "Sz|l", &haystack, &zneedle, &offset) == FAILURE) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
STR_ALLOCA_ALLOC(ord_needle, 1, use_heap);
|
||||
if (Z_TYPE_P(zneedle) == IS_STRING) {
|
||||
needle = Z_STRVAL_P(zneedle);
|
||||
needle_len = Z_STRLEN_P(zneedle);
|
||||
needle = Z_STR_P(zneedle);
|
||||
} else {
|
||||
if (php_needle_char(zneedle, ord_needle) != SUCCESS) {
|
||||
if (php_needle_char(zneedle, ord_needle->val) != SUCCESS) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
ord_needle[1] = '\0';
|
||||
ord_needle->val[1] = '\0';
|
||||
needle = ord_needle;
|
||||
needle_len = 1;
|
||||
}
|
||||
|
||||
if ((haystack->len == 0) || (needle_len == 0)) {
|
||||
if ((haystack->len == 0) || (needle->len == 0)) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
if (needle_len == 1) {
|
||||
if (needle->len == 1) {
|
||||
/* Single character search can shortcut memcmps
|
||||
Can also avoid tolower emallocs */
|
||||
if (offset >= 0) {
|
||||
if ((size_t)offset > haystack->len) {
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
p = haystack->val + offset;
|
||||
p = haystack->val + (size_t)offset;
|
||||
e = haystack->val + haystack->len - 1;
|
||||
} else {
|
||||
p = haystack->val;
|
||||
if (offset < -INT_MAX || (size_t)(-offset) > haystack->len) {
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
e = haystack->val + haystack->len + offset;
|
||||
e = haystack->val + haystack->len + (size_t)offset;
|
||||
}
|
||||
/* Borrow that ord_needle buffer to avoid repeatedly tolower()ing needle */
|
||||
*ord_needle = tolower(*needle);
|
||||
*ord_needle->val = tolower(*needle->val);
|
||||
while (e >= p) {
|
||||
if (tolower(*e) == *ord_needle) {
|
||||
if (tolower(*e) == *ord_needle->val) {
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
|
||||
}
|
||||
e--;
|
||||
}
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
needle_dup = estrndup(needle, needle_len);
|
||||
php_strtolower(needle_dup, needle_len);
|
||||
haystack_dup = estrndup(haystack->val, haystack->len);
|
||||
php_strtolower(haystack_dup, haystack->len);
|
||||
|
||||
haystack_dup = php_string_tolower(haystack);
|
||||
if (offset >= 0) {
|
||||
if ((size_t)offset > haystack->len) {
|
||||
efree(needle_dup);
|
||||
efree(haystack_dup);
|
||||
zend_string_release(haystack_dup);
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
p = haystack_dup + offset;
|
||||
e = haystack_dup + haystack->len - needle_len;
|
||||
p = haystack_dup->val + offset;
|
||||
e = haystack_dup->val + haystack->len;
|
||||
} else {
|
||||
if (offset < -INT_MAX || (size_t)(-offset) > haystack->len) {
|
||||
efree(needle_dup);
|
||||
efree(haystack_dup);
|
||||
zend_string_release(haystack_dup);
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
p = haystack_dup;
|
||||
if (needle_len > (size_t)(-offset)) {
|
||||
e = haystack_dup + haystack->len - needle_len;
|
||||
p = haystack_dup->val;
|
||||
if (haystack->len + (size_t)offset >= needle->len) {
|
||||
e = haystack_dup->val + haystack->len + (size_t)offset + needle->len;
|
||||
} else {
|
||||
e = haystack_dup + haystack->len + offset;
|
||||
e = haystack_dup->val + haystack->len;
|
||||
}
|
||||
}
|
||||
|
||||
while (e >= p) {
|
||||
if (memcmp(e, needle_dup, needle_len) == 0) {
|
||||
efree(haystack_dup);
|
||||
efree(needle_dup);
|
||||
RETURN_LONG(e - p + (offset > 0 ? offset : 0));
|
||||
}
|
||||
e--;
|
||||
needle_dup = php_string_tolower(needle);
|
||||
if ((found = (char *)zend_memnrstr(p, needle_dup->val, needle_dup->len, e))) {
|
||||
RETVAL_LONG(found - haystack_dup->val);
|
||||
zend_string_release(needle_dup);
|
||||
zend_string_release(haystack_dup);
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
} else {
|
||||
zend_string_release(needle_dup);
|
||||
zend_string_release(haystack_dup);
|
||||
STR_ALLOCA_FREE(ord_needle, use_heap);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
efree(haystack_dup);
|
||||
efree(needle_dup);
|
||||
RETURN_FALSE;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user