From 4e51810f9bfeb7f153eb9e7cdffbef23ee415748 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sun, 26 Jul 2020 21:17:27 +0200 Subject: [PATCH] Optimize mbstring upper/lowercasing: use fast path in more cases The 'fast path' in the uppercase/lowercase functions for Unicode text can be used for a slightly greater range of characters. This is not expected to have a big impact on performance, since the number of characters which will use the 'fast path' is only increased by about 50-60, and these are not very commonly used characters... but still, it doesn't cost anything. --- ext/mbstring/php_unicode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index 3ed6b4297a9..0da63fe1e6a 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -121,7 +121,9 @@ static inline unsigned mph_lookup( static unsigned php_unicode_toupper_raw(unsigned code, enum mbfl_no_encoding enc) { - if (code < 0x80) { + /* After the ASCII characters, the first codepoint with an uppercase version + * is 0xB5 (MICRO SIGN) */ + if (code < 0xB5) { /* Fast path for ASCII */ if (code >= 0x61 && code <= 0x7A) { if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x69)) { @@ -141,7 +143,9 @@ static unsigned php_unicode_toupper_raw(unsigned code, enum mbfl_no_encoding enc static unsigned php_unicode_tolower_raw(unsigned code, enum mbfl_no_encoding enc) { - if (code < 0x80) { + /* After the ASCII characters, the first codepoint with a lowercase version + * is 0xC0 (LATIN CAPITAL LETTER A WITH GRAVE) */ + if (code < 0xC0) { /* Fast path for ASCII */ if (code >= 0x41 && code <= 0x5A) { if (UNEXPECTED(enc == mbfl_no_encoding_8859_9 && code == 0x0049L)) {