1
0
mirror of https://github.com/php/php-src.git synced 2026-03-27 09:42:22 +01:00

Make stripos() work with Unicode strings.

This commit is contained in:
Andrei Zmievski
2006-10-02 16:52:22 +00:00
parent 255b215c9e
commit e673ae2eb9
2 changed files with 34 additions and 26 deletions

View File

@@ -2625,7 +2625,7 @@ PHP_FUNCTION(strpos)
}
/* }}} */
/* {{{ proto int stripos(string haystack, string needle [, int offset])
/* {{{ proto int stripos(string haystack, string needle [, int offset]) U
Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(stripos)
{
@@ -2633,12 +2633,12 @@ PHP_FUNCTION(stripos)
long offset = 0;
int haystack_len, needle_len = 0;
zend_uchar str_type;
void *haystack_dup, *needle_dup = NULL;
void *haystack_dup = NULL, *needle_dup = NULL;
char needle_char[2];
char c = 0;
UChar u_needle_char[3];
UChar32 ch = 0;
UChar u_needle_char[8];
void *found = NULL;
int cu_offset = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZ|l", &haystack, &needle, &offset) == FAILURE) {
return;
@@ -2662,6 +2662,7 @@ PHP_FUNCTION(stripos)
if (!Z_UNILEN_PP(needle) || Z_UNILEN_PP(needle) > haystack_len) {
RETURN_FALSE;
}
/* convert both strings to the same type */
if (Z_TYPE_PP(haystack) != Z_TYPE_PP(needle)) {
str_type = zend_get_unified_string_type(2 TSRMLS_CC, Z_TYPE_PP(haystack), Z_TYPE_PP(needle));
convert_to_explicit_type_ex(haystack, str_type);
@@ -2669,11 +2670,9 @@ PHP_FUNCTION(stripos)
}
needle_len = Z_UNILEN_PP(needle);
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
haystack_dup = php_u_strtolower(Z_USTRVAL_PP(haystack), &haystack_len, UG(default_locale));
needle_dup = php_u_strtolower(Z_USTRVAL_PP(needle), &needle_len, UG(default_locale));
found = zend_u_memnstr((UChar *)haystack_dup + offset,
(UChar *)needle_dup, needle_len,
(UChar *)haystack_dup + haystack_len);
/* calculate codeunit offset */
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset);
found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len TSRMLS_CC);
} else {
haystack_dup = estrndup(Z_STRVAL_PP(haystack), haystack_len);
php_strtolower((char *)haystack_dup, haystack_len);
@@ -2688,14 +2687,22 @@ PHP_FUNCTION(stripos)
case IS_LONG:
case IS_BOOL:
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
ch = u_tolower((UChar32)Z_LVAL_PP(needle));
if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) {
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
RETURN_FALSE;
}
needle_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
} else {
c = tolower((char)Z_LVAL_PP(needle));
}
break;
case IS_DOUBLE:
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
ch = u_tolower((UChar32)Z_DVAL_PP(needle));
if ((UChar32)Z_DVAL_PP(needle) < 0 || (UChar32)Z_DVAL_PP(needle) > 0x10FFFF) {
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
RETURN_FALSE;
}
needle_len = zend_codepoint_to_uchar((UChar32)Z_DVAL_PP(needle), u_needle_char);
} else {
c = tolower((char)Z_DVAL_PP(needle));
}
@@ -2707,18 +2714,12 @@ PHP_FUNCTION(stripos)
}
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
if (U_IS_BMP(ch)) {
u_needle_char[needle_len++] = ch;
u_needle_char[needle_len] = 0;
} else {
u_needle_char[needle_len++] = U16_LEAD(ch);
u_needle_char[needle_len++] = U16_TRAIL(ch);
u_needle_char[needle_len] = 0;
}
haystack_dup = php_u_strtolower(Z_USTRVAL_PP(haystack), &haystack_len, UG(default_locale));
found = zend_u_memnstr((UChar *)haystack_dup + offset,
(UChar *)u_needle_char, needle_len,
(UChar *)haystack_dup + haystack_len);
/* calculate codeunit offset */
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset);
u_needle_char[needle_len] = 0;
found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset,
u_needle_char, haystack_len, needle_len TSRMLS_CC);
} else {
needle_char[0] = c;
needle_char[1] = '\0';
@@ -2731,14 +2732,21 @@ PHP_FUNCTION(stripos)
}
}
efree(haystack_dup);
if (haystack_dup) {
efree(haystack_dup);
}
if (needle_dup) {
efree(needle_dup);
}
if (found) {
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
RETURN_LONG((UChar *)found - (UChar *)haystack_dup);
/* Simple subtraction will not suffice, since there may be
supplementary codepoints. We count how many codepoints there are
between the starting offset and the found location and add them
to the starting codepoint offset. */
RETURN_LONG(offset + u_countChar32(Z_USTRVAL_PP(haystack) + cu_offset,
(UChar*)found - (Z_USTRVAL_PP(haystack) + cu_offset)));
} else {
RETURN_LONG((char *)found - (char *)haystack_dup);
}

View File

@@ -27,7 +27,6 @@ ext/standard
Params API. Rest - no idea yet.
stristr()
stripos()
strripos()
str_replace()
stri_replace()
@@ -158,6 +157,7 @@ ext/standard
strip_tags()
stripcslashes()
stripslashes()
stripos()
strpbrk()
strpos()
strrchr()