1
0
mirror of https://github.com/php/php-src.git synced 2026-04-02 13:43:02 +02:00

Fix broken binary search function in mbstring

This faulty binary search would never reject values at the very high
end of the range being searched, even if they were not actually in
the table.

Among other things, this meant that some Unicode codepoints which do
not correspond to any character in JIS X 0213 would be converted to
bogus Shift-JIS-2004 values rather than being rejected.
This commit is contained in:
Alex Dowad
2020-11-09 21:37:04 +02:00
parent b05ad5112a
commit a5827c2d35

View File

@@ -68,42 +68,37 @@ const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = {
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
int
mbfl_bisec_srch(int w, const unsigned short *tbl, int n)
/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */
int mbfl_bisec_srch(int w, const unsigned short *tbl, int n)
{
int k, k1 = 0, k2 = n-1;
while (k1 < k2) {
k = (k1+k2) >> 1;
if (w <= tbl[2*k+1]) {
k2 = k;
} else if (w >= tbl[2*k+2]) {
k1 = k + 1;
int l = 0, r = n-1;
while (l <= r) {
int probe = (l + r) >> 1;
unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1];
if (w < lo) {
r = probe - 1;
} else if (w > hi) {
l = probe + 1;
} else {
return -1;
return probe;
}
}
return k1;
return -1;
}
int
mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
/* `tbl` contains single values, not ranges */
int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
{
int k, k1 = 0, k2 = n;
if (w == tbl[0]) {
return 0;
}
while (k2 - k1 > 1) {
k = (k1 + k2) >> 1;
if (w < tbl[k]) {
k2 = k;
} else if (w > tbl[k]) {
k1 = k;
int l = 0, r = n-1;
while (l <= r) {
int probe = (l + r) >> 1;
unsigned short val = tbl[probe];
if (w < val) {
r = probe - 1;
} else if (w > val) {
l = probe + 1;
} else {
return k;
return probe;
}
}
return -1;