From a5827c2d351d5362c6c75930a06e993dc6fa40fc Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Mon, 9 Nov 2020 21:37:04 +0200 Subject: [PATCH] Fix broken binary search function in mbstring This faulty binary search would never reject values at the very high end of the range being searched, even if they were not actually in the table. Among other things, this meant that some Unicode codepoints which do not correspond to any character in JIS X 0213 would be converted to bogus Shift-JIS-2004 values rather than being rejected. --- .../libmbfl/filters/mbfilter_gb18030.c | 51 +++++++++---------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c index ac59ec85504..dfab3d59dfa 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c @@ -68,42 +68,37 @@ const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int -mbfl_bisec_srch(int w, const unsigned short *tbl, int n) +/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */ +int mbfl_bisec_srch(int w, const unsigned short *tbl, int n) { - int k, k1 = 0, k2 = n-1; - - while (k1 < k2) { - k = (k1+k2) >> 1; - if (w <= tbl[2*k+1]) { - k2 = k; - } else if (w >= tbl[2*k+2]) { - k1 = k + 1; + int l = 0, r = n-1; + while (l <= r) { + int probe = (l + r) >> 1; + unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1]; + if (w < lo) { + r = probe - 1; + } else if (w > hi) { + l = probe + 1; } else { - return -1; + return probe; } } - return k1; + return -1; } -int -mbfl_bisec_srch2(int w, const unsigned short tbl[], int n) +/* `tbl` contains single values, not ranges */ +int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n) { - int k, k1 = 0, k2 = n; - - if (w == tbl[0]) { - return 0; - } - - while (k2 - k1 > 1) { - k = (k1 + k2) >> 1; - if (w < tbl[k]) { - k2 = k; - } else if (w > tbl[k]) { - k1 = k; + int l = 0, r = n-1; + while (l <= r) { + int probe = (l + r) >> 1; + unsigned short val = tbl[probe]; + if (w < val) { + r = probe - 1; + } else if (w > val) { + l = probe + 1; } else { - return k; + return probe; } } return -1;