diff --git a/ext/bcmath/libbcmath/src/convert.c b/ext/bcmath/libbcmath/src/convert.c index 484df2aa508..bf3d9a9a415 100644 --- a/ext/bcmath/libbcmath/src/convert.c +++ b/ext/bcmath/libbcmath/src/convert.c @@ -61,3 +61,101 @@ char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char return dest; } + +/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html. + * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD, + * with the caveat that all components must be in the interval [0, 25] to prevent overflow + * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte). + * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper + * due to its divide-and-conquer nature. + */ +#if SIZEOF_SIZE_T == 4 +BC_VECTOR bc_parse_chunk_chars(const char *str) +{ + BC_VECTOR tmp; + memcpy(&tmp, str, sizeof(tmp)); +#if !BC_LITTLE_ENDIAN + tmp = BC_BSWAP(tmp); +#endif + + BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8; + BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10; + + tmp = lower_digits + upper_digits; + + lower_digits = (tmp & 0x00ff0000) >> 16; + upper_digits = (tmp & 0x000000ff) * 100; + + return lower_digits + upper_digits; +} +#elif SIZEOF_SIZE_T == 8 +BC_VECTOR bc_parse_chunk_chars(const char *str) +{ + BC_VECTOR tmp; + memcpy(&tmp, str, sizeof(tmp)); +#if !BC_LITTLE_ENDIAN + tmp = BC_BSWAP(tmp); +#endif + + BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8; + BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10; + + tmp = lower_digits + upper_digits; + + lower_digits = (tmp & 0x00ff000000ff0000) >> 16; + upper_digits = (tmp & 0x000000ff000000ff) * 100; + + tmp = lower_digits + upper_digits; + + lower_digits = (tmp & 0x0000ffff00000000) >> 32; + upper_digits = (tmp & 0x000000000000ffff) * 10000; + + return lower_digits + upper_digits; +} +#endif + +#if BC_LITTLE_ENDIAN +# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4) +#else +# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4) +#endif + +#define LUT_ITERATE(_, A) \ + _(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9) + +/* This LUT encodes the decimal representation of numbers 0-100 + * such that we can avoid taking modulos and divisions which would be slow. */ +static const unsigned char LUT[100] = { + LUT_ITERATE(BC_ENCODE_LUT, 0), + LUT_ITERATE(BC_ENCODE_LUT, 1), + LUT_ITERATE(BC_ENCODE_LUT, 2), + LUT_ITERATE(BC_ENCODE_LUT, 3), + LUT_ITERATE(BC_ENCODE_LUT, 4), + LUT_ITERATE(BC_ENCODE_LUT, 5), + LUT_ITERATE(BC_ENCODE_LUT, 6), + LUT_ITERATE(BC_ENCODE_LUT, 7), + LUT_ITERATE(BC_ENCODE_LUT, 8), + LUT_ITERATE(BC_ENCODE_LUT, 9), +}; + +static inline unsigned short bc_expand_lut(unsigned char c) +{ + return (c & 0x0f) | (c & 0xf0) << 4; +} + +/* Writes the character representation of the number encoded in value. + * E.g. if value = 1234, then the string "1234" will be written to str. */ +void bc_write_bcd_representation(uint32_t value, char *str) +{ + uint32_t upper = value / 100; /* e.g. 12 */ + uint32_t lower = value % 100; /* e.g. 34 */ + +#if BC_LITTLE_ENDIAN + /* Note: little endian, so `lower` comes before `upper`! */ + uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]); +#else + /* Note: big endian, so `upper` comes before `lower`! */ + uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]); +#endif + memcpy(str, &digits, sizeof(digits)); +} diff --git a/ext/bcmath/libbcmath/src/convert.h b/ext/bcmath/libbcmath/src/convert.h index 7705bdc6ea0..6ddd447c804 100644 --- a/ext/bcmath/libbcmath/src/convert.h +++ b/ext/bcmath/libbcmath/src/convert.h @@ -14,9 +14,47 @@ +----------------------------------------------------------------------+ */ +#include "private.h" + #ifndef BCMATH_CONVERT_H #define BCMATH_CONVERT_H char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end); +void bc_write_bcd_representation(uint32_t value, char *str); +BC_VECTOR bc_parse_chunk_chars(const char *str); + +/* + * Converts bc_num to BC_VECTOR, going backwards from pointer n by the number of + * characters specified by len. + */ +static inline BC_VECTOR bc_partial_convert_to_vector(const char *n, size_t len) +{ + if (len == BC_VECTOR_SIZE) { + return bc_parse_chunk_chars(n - BC_VECTOR_SIZE + 1); + } + + BC_VECTOR num = 0; + BC_VECTOR base = 1; + + for (size_t i = 0; i < len; i++) { + num += *n * base; + base *= BASE; + n--; + } + + return num; +} + +static inline void bc_convert_to_vector(BC_VECTOR *n_vector, const char *nend, size_t nlen) +{ + size_t i = 0; + while (nlen > 0) { + size_t len = MIN(BC_VECTOR_SIZE, nlen); + n_vector[i] = bc_partial_convert_to_vector(nend, len); + nend -= len; + nlen -= len; + i++; + } +} #endif diff --git a/ext/bcmath/libbcmath/src/private.h b/ext/bcmath/libbcmath/src/private.h index 50a0dfd3a92..47f6981ca09 100644 --- a/ext/bcmath/libbcmath/src/private.h +++ b/ext/bcmath/libbcmath/src/private.h @@ -84,9 +84,15 @@ static inline uint64_t BC_BSWAP64(uint64_t u) #if SIZEOF_SIZE_T >= 8 # define BC_BSWAP(u) BC_BSWAP64(u) typedef uint64_t BC_VECTOR; +# define BC_VECTOR_SIZE 8 +/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */ +# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 100000000 #else # define BC_BSWAP(u) BC_BSWAP32(u) typedef uint32_t BC_VECTOR; +# define BC_VECTOR_SIZE 4 +/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */ +# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 10000 #endif #ifdef WORDS_BIGENDIAN @@ -95,6 +101,12 @@ static inline uint64_t BC_BSWAP64(uint64_t u) # define BC_LITTLE_ENDIAN 1 #endif +/* + * Adding more than this many times may cause uint32_t/uint64_t to overflow. + * Typically this is 1844 for 64bit and 42 for 32bit. + */ +#define BC_VECTOR_NO_OVERFLOW_ADD_COUNT (~((BC_VECTOR) 0) / (BC_VECTOR_BOUNDARY_NUM * BC_VECTOR_BOUNDARY_NUM)) + /* routines */ bcmath_compare_result _bc_do_compare (bc_num n1, bc_num n2, size_t scale, bool use_sign); diff --git a/ext/bcmath/libbcmath/src/recmul.c b/ext/bcmath/libbcmath/src/recmul.c index 0a5b652a6d9..3341396236e 100644 --- a/ext/bcmath/libbcmath/src/recmul.c +++ b/ext/bcmath/libbcmath/src/recmul.c @@ -34,26 +34,10 @@ #include #include #include "private.h" +#include "convert.h" #include "zend_alloc.h" -#if SIZEOF_SIZE_T >= 8 -# define BC_VECTOR_SIZE 8 -/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */ -# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 100000000 -#else -# define BC_VECTOR_SIZE 4 -/* The boundary number is computed from BASE ** BC_VECTOR_SIZE */ -# define BC_VECTOR_BOUNDARY_NUM (BC_VECTOR) 10000 -#endif - -/* - * Adding more than this many times may cause uint32_t/uint64_t to overflow. - * Typically this is 1844 for 64bit and 42 for 32bit. - */ -#define BC_VECTOR_NO_OVERFLOW_ADD_COUNT (~((BC_VECTOR) 0) / (BC_VECTOR_BOUNDARY_NUM * BC_VECTOR_BOUNDARY_NUM)) - - /* Multiply utility routines */ static inline void bc_mul_carry_calc(BC_VECTOR *prod_vector, size_t prod_arr_size) @@ -64,92 +48,6 @@ static inline void bc_mul_carry_calc(BC_VECTOR *prod_vector, size_t prod_arr_siz } } -/* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html. - * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD, - * with the caveat that all components must be in the interval [0, 25] to prevent overflow - * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte). - * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper - * due to its divide-and-conquer nature. - */ -#if SIZEOF_SIZE_T == 4 -static BC_VECTOR bc_parse_chunk_chars(const char *str) -{ - BC_VECTOR tmp; - memcpy(&tmp, str, sizeof(tmp)); -#if !BC_LITTLE_ENDIAN - tmp = BC_BSWAP(tmp); -#endif - - BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8; - BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10; - - tmp = lower_digits + upper_digits; - - lower_digits = (tmp & 0x00ff0000) >> 16; - upper_digits = (tmp & 0x000000ff) * 100; - - return lower_digits + upper_digits; -} -#elif SIZEOF_SIZE_T == 8 -static BC_VECTOR bc_parse_chunk_chars(const char *str) -{ - BC_VECTOR tmp; - memcpy(&tmp, str, sizeof(tmp)); -#if !BC_LITTLE_ENDIAN - tmp = BC_BSWAP(tmp); -#endif - - BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8; - BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10; - - tmp = lower_digits + upper_digits; - - lower_digits = (tmp & 0x00ff000000ff0000) >> 16; - upper_digits = (tmp & 0x000000ff000000ff) * 100; - - tmp = lower_digits + upper_digits; - - lower_digits = (tmp & 0x0000ffff00000000) >> 32; - upper_digits = (tmp & 0x000000000000ffff) * 10000; - - return lower_digits + upper_digits; -} -#endif - -/* - * Converts bc_num to BC_VECTOR, going backwards from pointer n by the number of - * characters specified by len. - */ -static inline BC_VECTOR bc_partial_convert_to_vector(const char *n, size_t len) -{ - if (len == BC_VECTOR_SIZE) { - return bc_parse_chunk_chars(n - BC_VECTOR_SIZE + 1); - } - - BC_VECTOR num = 0; - BC_VECTOR base = 1; - - for (size_t i = 0; i < len; i++) { - num += *n * base; - base *= BASE; - n--; - } - - return num; -} - -static inline void bc_convert_to_vector(BC_VECTOR *n_vector, const char *nend, size_t nlen) -{ - size_t i = 0; - while (nlen > 0) { - size_t len = MIN(BC_VECTOR_SIZE, nlen); - n_vector[i] = bc_partial_convert_to_vector(nend, len); - nend -= len; - nlen -= len; - i++; - } -} - /* * If the n_values of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less, * the calculation will be performed at high speed without using an array. @@ -174,52 +72,6 @@ static inline void bc_fast_mul(bc_num n1, size_t n1len, bc_num n2, size_t n2len, } } -#if BC_LITTLE_ENDIAN -# define BC_ENCODE_LUT(A, B) ((A) | (B) << 4) -#else -# define BC_ENCODE_LUT(A, B) ((B) | (A) << 4) -#endif - -#define LUT_ITERATE(_, A) \ - _(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9) - -/* This LUT encodes the decimal representation of numbers 0-100 - * such that we can avoid taking modulos and divisions which would be slow. */ -static const unsigned char LUT[100] = { - LUT_ITERATE(BC_ENCODE_LUT, 0), - LUT_ITERATE(BC_ENCODE_LUT, 1), - LUT_ITERATE(BC_ENCODE_LUT, 2), - LUT_ITERATE(BC_ENCODE_LUT, 3), - LUT_ITERATE(BC_ENCODE_LUT, 4), - LUT_ITERATE(BC_ENCODE_LUT, 5), - LUT_ITERATE(BC_ENCODE_LUT, 6), - LUT_ITERATE(BC_ENCODE_LUT, 7), - LUT_ITERATE(BC_ENCODE_LUT, 8), - LUT_ITERATE(BC_ENCODE_LUT, 9), -}; - -static inline unsigned short bc_expand_lut(unsigned char c) -{ - return (c & 0x0f) | (c & 0xf0) << 4; -} - -/* Writes the character representation of the number encoded in value. - * E.g. if value = 1234, then the string "1234" will be written to str. */ -static void bc_write_bcd_representation(uint32_t value, char *str) -{ - uint32_t upper = value / 100; /* e.g. 12 */ - uint32_t lower = value % 100; /* e.g. 34 */ - -#if BC_LITTLE_ENDIAN - /* Note: little endian, so `lower` comes before `upper`! */ - uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]); -#else - /* Note: big endian, so `upper` comes before `lower`! */ - uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]); -#endif - memcpy(str, &digits, sizeof(digits)); -} - /* * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_VECTOR. * The array is generated starting with the smaller digits. diff --git a/ext/bcmath/tests/bcdiv_by_pow_10.phpt b/ext/bcmath/tests/bcdiv_by_pow_10.phpt new file mode 100644 index 00000000000..e89ba27bf08 --- /dev/null +++ b/ext/bcmath/tests/bcdiv_by_pow_10.phpt @@ -0,0 +1,107 @@ +--TEST-- +bcdiv() function with numbers pow 10 +--EXTENSIONS-- +bcmath +--INI-- +bcmath.scale=0 +--FILE-- + +--EXPECT-- +scale: 0 +0.012345 / 0.01 = 1 + 0.12345 / 0.01 = 12 + 1.2345 / 0.01 = 123 + 12.345 / 0.01 = 1234 + 123.45 / 0.01 = 12345 +0.012345 / 0.1 = 0 + 0.12345 / 0.1 = 1 + 1.2345 / 0.1 = 12 + 12.345 / 0.1 = 123 + 123.45 / 0.1 = 1234 +0.012345 / 1 = 0 + 0.12345 / 1 = 0 + 1.2345 / 1 = 1 + 12.345 / 1 = 12 + 123.45 / 1 = 123 +0.012345 / 10 = 0 + 0.12345 / 10 = 0 + 1.2345 / 10 = 0 + 12.345 / 10 = 1 + 123.45 / 10 = 12 +0.012345 / 100 = 0 + 0.12345 / 100 = 0 + 1.2345 / 100 = 0 + 12.345 / 100 = 0 + 123.45 / 100 = 1 + +scale: 3 +0.012345 / 0.01 = 1.234 + 0.12345 / 0.01 = 12.345 + 1.2345 / 0.01 = 123.450 + 12.345 / 0.01 = 1234.500 + 123.45 / 0.01 = 12345.000 +0.012345 / 0.1 = 0.123 + 0.12345 / 0.1 = 1.234 + 1.2345 / 0.1 = 12.345 + 12.345 / 0.1 = 123.450 + 123.45 / 0.1 = 1234.500 +0.012345 / 1 = 0.012 + 0.12345 / 1 = 0.123 + 1.2345 / 1 = 1.234 + 12.345 / 1 = 12.345 + 123.45 / 1 = 123.450 +0.012345 / 10 = 0.001 + 0.12345 / 10 = 0.012 + 1.2345 / 10 = 0.123 + 12.345 / 10 = 1.234 + 123.45 / 10 = 12.345 +0.012345 / 100 = 0.000 + 0.12345 / 100 = 0.001 + 1.2345 / 100 = 0.012 + 12.345 / 100 = 0.123 + 123.45 / 100 = 1.234 + +scale: 5 +0.012345 / 0.01 = 1.23450 + 0.12345 / 0.01 = 12.34500 + 1.2345 / 0.01 = 123.45000 + 12.345 / 0.01 = 1234.50000 + 123.45 / 0.01 = 12345.00000 +0.012345 / 0.1 = 0.12345 + 0.12345 / 0.1 = 1.23450 + 1.2345 / 0.1 = 12.34500 + 12.345 / 0.1 = 123.45000 + 123.45 / 0.1 = 1234.50000 +0.012345 / 1 = 0.01234 + 0.12345 / 1 = 0.12345 + 1.2345 / 1 = 1.23450 + 12.345 / 1 = 12.34500 + 123.45 / 1 = 123.45000 +0.012345 / 10 = 0.00123 + 0.12345 / 10 = 0.01234 + 1.2345 / 10 = 0.12345 + 12.345 / 10 = 1.23450 + 123.45 / 10 = 12.34500 +0.012345 / 100 = 0.00012 + 0.12345 / 100 = 0.00123 + 1.2345 / 100 = 0.01234 + 12.345 / 100 = 0.12345 + 123.45 / 100 = 1.23450