diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e219a6bdb79..d0d724f640a 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2878,7 +2878,7 @@ PHP_FUNCTION(mb_convert_encoding) static zend_string *mbstring_convert_case(php_case_mode case_mode, const char *str, size_t str_len, const mbfl_encoding *enc) { - return php_unicode_convert_case(case_mode, str, str_len, enc, MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar)); + return php_unicode_convert_case(case_mode, str, str_len, enc, enc, MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar)); } PHP_FUNCTION(mb_convert_case) @@ -4858,10 +4858,10 @@ MBSTRING_API size_t php_mb_stripos(bool mode, zend_string *haystack, zend_string { /* We're using simple case-folding here, because we'd have to deal with remapping of * offsets otherwise. */ - zend_string *haystack_conv = mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, ZSTR_VAL(haystack), ZSTR_LEN(haystack), enc); - zend_string *needle_conv = mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, ZSTR_VAL(needle), ZSTR_LEN(needle), enc); + zend_string *haystack_conv = php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, ZSTR_VAL(haystack), ZSTR_LEN(haystack), enc, &mbfl_encoding_utf8, MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar)); + zend_string *needle_conv = php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, ZSTR_VAL(needle), ZSTR_LEN(needle), enc, &mbfl_encoding_utf8, MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar)); - size_t n = mb_find_strpos(haystack_conv, needle_conv, enc, offset, mode); + size_t n = mb_find_strpos(haystack_conv, needle_conv, &mbfl_encoding_utf8, offset, mode); zend_string_free(haystack_conv); zend_string_free(needle_conv); diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index e07490c884e..bd1d5684163 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -238,7 +238,7 @@ static uint32_t *emit_special_casing_sequence(uint32_t w, uint32_t *out) return out; } -MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, const char *srcstr, size_t in_len, const mbfl_encoding *src_encoding, int illegal_mode, uint32_t illegal_substchar) +MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, const char *srcstr, size_t in_len, const mbfl_encoding *src_encoding, const mbfl_encoding *dst_encoding, int illegal_mode, uint32_t illegal_substchar) { /* A Unicode codepoint can expand out to up to 3 codepoints when uppercased, lowercased, or title cased * See http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt */ @@ -363,7 +363,7 @@ MBSTRING_API zend_string *php_unicode_convert_case(php_case_mode case_mode, cons } ZEND_ASSERT(p - converted_buf <= 192); - src_encoding->from_wchar(converted_buf, p - converted_buf, &buf, !in_len); + dst_encoding->from_wchar(converted_buf, p - converted_buf, &buf, !in_len); } return mb_convert_buf_result(&buf); diff --git a/ext/mbstring/php_unicode.h b/ext/mbstring/php_unicode.h index b65b347df1b..1326761943d 100644 --- a/ext/mbstring/php_unicode.h +++ b/ext/mbstring/php_unicode.h @@ -91,7 +91,7 @@ typedef enum { MBSTRING_API zend_string *php_unicode_convert_case( php_case_mode case_mode, const char *srcstr, size_t srclen, - const mbfl_encoding *src_encoding, int illegal_mode, uint32_t illegal_substchar); + const mbfl_encoding *src_encoding, const mbfl_encoding *dst_encoding, int illegal_mode, uint32_t illegal_substchar); /* Optimize the common ASCII case for lower/upper */ diff --git a/ext/mbstring/tests/mb_stripos.phpt b/ext/mbstring/tests/mb_stripos.phpt index 1383cd5ed2e..7f4898a0f51 100644 --- a/ext/mbstring/tests/mb_stripos.phpt +++ b/ext/mbstring/tests/mb_stripos.phpt @@ -9,9 +9,8 @@ mbstring ini_set('include_path','.'); include_once('common.inc'); - // Test string -$euc_jp = '0123この文字列は日本語です。EUC-JPを使っています。0123日本語は面倒臭い。'; +$euc_jp = "0123\xA4\xB3\xA4\xCE\xCA\xB8\xBB\xFA\xCE\xF3\xA4\xCF\xC6\xFC\xCB\xDC\xB8\xEC\xA4\xC7\xA4\xB9\xA1\xA3EUC-JP\xA4\xF2\xBB\xC8\xA4\xC3\xA4\xC6\xA4\xA4\xA4\xDE\xA4\xB9\xA1\xA30123\xC6\xFC\xCB\xDC\xB8\xEC\xA4\xCF\xCC\xCC\xC5\xDD\xBD\xAD\xA4\xA4\xA1\xA3"; $slen = mb_strlen($euc_jp, 'EUC-JP'); echo "String len: $slen\n"; @@ -21,11 +20,11 @@ mb_internal_encoding('UTF-8') or print("mb_internal_encoding() failed\n"); echo "== POSITIVE OFFSET ==\n"; -print mb_stripos($euc_jp, '日本語', 0, 'EUC-JP') . "\n"; +print mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 0, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, '0', 0, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, 3, 0, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, 0, 0, 'EUC-JP') . "\n"; -print mb_stripos($euc_jp, '日本語', 15, 'EUC-JP') . "\n"; +print mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 15, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, '0', 15, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, 3, 15, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, 0, 15, 'EUC-JP') . "\n"; @@ -34,7 +33,7 @@ print mb_stripos($euc_jp, 0, 15, 'EUC-JP') . "\n"; // Negative offset echo "== NEGATIVE OFFSET ==\n"; -print mb_stripos($euc_jp, '日本語', -15, 'EUC-JP') . "\n"; +print mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", -15, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, '0', -15, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, 3, -15, 'EUC-JP') . "\n"; print mb_stripos($euc_jp, 0, -15, 'EUC-JP') . "\n"; @@ -44,7 +43,7 @@ print mb_stripos($euc_jp, 0, -43, 'EUC-JP') . "\n"; // Out of range - should return false print ("== OUT OF RANGE ==\n"); -$r = mb_stripos($euc_jp, '日本語', 40, 'EUC-JP'); +$r = mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 40, 'EUC-JP'); ($r === FALSE) ? print "OK_OUT_RANGE\n" : print "NG_OUT_RANGE\n"; $r = mb_stripos($euc_jp, '0', 40, 'EUC-JP'); ($r === FALSE) ? print "OK_OUT_RANGE\n" : print "NG_OUT_RANGE\n"; @@ -52,7 +51,7 @@ $r = mb_stripos($euc_jp, 3, 40, 'EUC-JP'); ($r === FALSE) ? print "OK_OUT_RANGE\n" : print "NG_OUT_RANGE\n"; $r = mb_stripos($euc_jp, 0, 40, 'EUC-JP'); ($r === FALSE) ? print "OK_OUT_RANGE\n" : print "NG_OUT_RANGE\n"; -$r = mb_stripos($euc_jp, '日本語', -3, 'EUC-JP'); +$r = mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", -3, 'EUC-JP'); ($r === FALSE) ? print "OK_OUT_RANGE\n" : print "NG_OUT_RANGE\n"; $r = mb_stripos($euc_jp, '0', -3, 'EUC-JP'); ($r === FALSE) ? print "OK_OUT_RANGE\n" : print "NG_OUT_RANGE\n"; @@ -65,7 +64,7 @@ $r = mb_stripos($euc_jp, 0, -3, 'EUC-JP'); // Non-existent echo "== NON-EXISTENT ==\n"; -$r = mb_stripos($euc_jp, '韓国語', 0, 'EUC-JP'); +$r = mb_stripos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC", 0, 'EUC-JP'); ($r === FALSE) ? print "OK_STR\n" : print "NG_STR\n"; $r = mb_stripos($euc_jp, "\n", 0, 'EUC-JP'); ($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n"; @@ -76,12 +75,12 @@ echo "== NO ENCODING PARAMETER ==\n"; mb_internal_encoding('EUC-JP') or print("mb_internal_encoding() failed\n"); -print mb_stripos($euc_jp, '日本語', 0) . "\n"; +print mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC", 0) . "\n"; print mb_stripos($euc_jp, '0', 0) . "\n"; print mb_stripos($euc_jp, 3, 0) . "\n"; print mb_stripos($euc_jp, 0, 0) . "\n"; -$r = mb_stripos($euc_jp, '韓国語', 0); +$r = mb_stripos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC", 0); ($r === FALSE) ? print "OK_STR\n" : print "NG_STR\n"; $r = mb_stripos($euc_jp, "\n", 0); ($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n"; @@ -91,12 +90,12 @@ echo "== NO OFFSET AND ENCODING PARAMETER ==\n"; mb_internal_encoding('EUC-JP') or print("mb_internal_encoding() failed\n"); -print mb_stripos($euc_jp, '日本語') . "\n"; +print mb_stripos($euc_jp, "\xC6\xFC\xCB\xDC\xB8\xEC") . "\n"; print mb_stripos($euc_jp, '0') . "\n"; print mb_stripos($euc_jp, 3) . "\n"; print mb_stripos($euc_jp, 0) . "\n"; -$r = mb_stripos($euc_jp, '韓国語'); +$r = mb_stripos($euc_jp, "\xB4\xDA\xB9\xF1\xB8\xEC"); ($r === FALSE) ? print "OK_STR\n" : print "NG_STR\n"; $r = mb_stripos($euc_jp, "\n"); ($r === FALSE) ? print "OK_NEWLINE\n" : print "NG_NEWLINE\n";