From da6766d778c55c1f07aa7d6cf012f5cfd00a3448 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 30 Dec 2023 02:32:31 +0100 Subject: [PATCH] Use more optimal perfect hash table --- ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 232 +++++++++------------- 1 file changed, 95 insertions(+), 137 deletions(-) diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 59c73767413..f79d2192250 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -146,179 +146,137 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { /* The following perfect hashing table was amended from gperf, and hashing code was generated using gperf. * The table was amended to refer to the table above such that it is lighter for the data cache. - * Command used: gperf encodings.txt --readonly-tables --null-strings --ignore-case - * The encodings.txt contains all the contents of the name fields of the mbfl_encoding_ptr_list table. */ + * You can use the generate_name_perfect_hash_table.php script to help generate the necessary lookup tables. */ -static const int8_t mbfl_encoding_ptr_list_after_hashing[231] = { +static const int8_t mbfl_encoding_ptr_list_after_hashing[] = { + -1, -1, -1, -1, -1, -1, - 61, 66, - 23, + -1, 73, + -1, + 78, + 61, + 76, + -1, 59, - -1, - 1, - -1, -1, -1, - 11, - -1, - 5, - 9, - -1, - 10, - 38, - -1, + 46, 52, 54, - -1, - 2, - 40, - 46, - 27, - 76, - 26, - -1, 49, 57, - -1, - 75, - -1, - 47, - 55, - 78, - 36, - -1, + 69, + 21, 50, 58, - 8, - -1, -1, - 69, - 39, - 7, - -1, -1, + 75, + 35, + 9, 64, - 67, - -1, -1, - 30, 48, 56, - -1, -1, -1, - 35, 74, - -1, -1, - 24, - 53, - 62, - 43, - -1, -1, + 47, + 55, + 40, 45, - 22, - -1, -1, -1, - 6, - 3, - -1, -1, -1, + 53, 18, - 71, - -1, -1, -1, - 21, - -1, - 37, - -1, - 4, - 60, - 25, - -1, -1, + 39, 72, + 60, + 23, + 10, + 30, + 36, + 67, + 71, + 37, + 27, + 77, + 26, 51, - -1, - 44, + 12, + 6, + 11, + 7, 29, - -1, - 28, + 5, + 24, 0, + 2, + 13, + 43, + 31, + 33, + 38, + 63, + 8, + 1, + 15, + -1, + 16, -1, 14, - 31, - 63, - 12, + 3, + 44, -1, - 13, - 33, - -1, -1, - 68, - -1, -1, -1, -1, - -1, -1, 20, - -1, -1, -1, -1, + -1, + 32, + -1, + 68, + 25, + 17, + 28, -1, -1, -1, - 77, - -1, -1, -1, -1, - -1, -1, -1, -1, - 65, - -1, -1, -1, -1, - 70, - -1, -1, -1, -1, + 22, + -1, -1, + 4, + -1, -1, + 62, + -1, -1, + 34, -1, 41, - -1, -1, -1, -1, - -1, - 17, -1, -1, -1, 42, - 16, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - 15, - -1, -1, -1, -1, - 34, - -1, -1, -1, -1, + 70, + 19, -1, -1, -1, - 32, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, - 19 + 65 }; static unsigned int mbfl_name2encoding_perfect_hash(const char *str, size_t len) { static const unsigned char asso_values[] = { - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 5, 231, 231, 0, 50, - 5, 15, 35, 10, 20, 75, 0, 45, 231, 231, - 231, 231, 231, 231, 231, 80, 5, 0, 0, 0, - 75, 75, 0, 0, 15, 70, 0, 5, 0, 0, - 25, 55, 30, 0, 10, 0, 231, 25, 231, 231, - 0, 231, 231, 231, 231, 231, 231, 80, 5, 0, - 0, 0, 75, 75, 0, 0, 15, 70, 0, 5, - 0, 0, 25, 55, 30, 0, 10, 0, 231, 25, - 231, 231, 0, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, - 231, 231, 231, 231, 231, 231 + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 1, 109, 109, 1, 19, + 0, 16, 13, 3, 7, 35, 1, 20, 109, 109, + 109, 109, 109, 109, 109, 16, 1, 0, 44, 6, + 26, 53, 8, 0, 25, 32, 13, 12, 1, 0, + 25, 0, 32, 18, 51, 3, 109, 15, 109, 109, + 1, 109, 109, 109, 109, 109, 109, 16, 1, 0, + 44, 6, 26, 53, 8, 0, 25, 32, 13, 12, + 1, 0, 25, 0, 32, 18, 51, 3, 109, 15, + 109, 109, 1, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 109, 109, 109, 109, 109, 109 }; unsigned int hval = len; @@ -377,7 +335,7 @@ const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len) /* Use perfect hash lookup for name */ if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH) { unsigned int key = mbfl_name2encoding_perfect_hash(name, name_len); - if (key <= sizeof(mbfl_encoding_ptr_list_after_hashing)) { + if (key < sizeof(mbfl_encoding_ptr_list_after_hashing) / sizeof(mbfl_encoding_ptr_list_after_hashing[0])) { int8_t offset = mbfl_encoding_ptr_list_after_hashing[key]; if (offset >= 0) { encoding = mbfl_encoding_ptr_list + offset;