1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

ext/bcmath: Made the same changes to _bc_do_add as _bc_do_sub (#14196)

The code for _bc_do_add and _bc_do_sub were written slightly differently for
similar processing (and add was slower than sub), so I changed the code to one
similar to sub.

Also, _bc_do_add has been changed to use SIMD to perform faster calculations
when possible.
This commit is contained in:
Saki Takamachi
2024-05-12 15:07:37 +09:00
committed by GitHub
parent 525cbe0a6b
commit 7203ca8286

View File

@@ -41,75 +41,123 @@
bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
{
bc_num sum;
size_t sum_scale, sum_digits;
size_t sum_len = MAX(n1->n_len, n2->n_len) + 1;
size_t sum_scale = MAX(n1->n_scale, n2->n_scale);
size_t min_len = MIN (n1->n_len, n2->n_len);
size_t min_scale = MIN(n1->n_scale, n2->n_scale);
size_t min_bytes = min_len + min_scale;
char *n1ptr, *n2ptr, *sumptr;
size_t n1bytes, n2bytes;
bool carry;
bool carry = 0;
size_t count;
/* Prepare sum. */
sum_scale = MAX (n1->n_scale, n2->n_scale);
sum_digits = MAX (n1->n_len, n2->n_len) + 1;
sum = bc_new_num (sum_digits, MAX(sum_scale, scale_min));
sum = bc_new_num (sum_len, MAX(sum_scale, scale_min));
/* Start with the fraction part. Initialize the pointers. */
n1bytes = n1->n_scale;
n2bytes = n2->n_scale;
n1ptr = (char *) (n1->n_value + n1->n_len + n1bytes - 1);
n2ptr = (char *) (n2->n_value + n2->n_len + n2bytes - 1);
sumptr = (char *) (sum->n_value + sum_scale + sum_digits - 1);
n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1);
n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1);
sumptr = (char *) (sum->n_value + sum_scale + sum_len - 1);
/* Add the fraction part. First copy the longer fraction.*/
if (n1bytes != n2bytes) {
if (n1bytes > n2bytes) {
while (n1bytes > n2bytes) {
*sumptr-- = *n1ptr--;
n1bytes--;
}
} else {
while (n2bytes > n1bytes) {
*sumptr-- = *n2ptr--;
n2bytes--;
}
if (n1->n_scale != min_scale) {
/* n1 has the longer scale */
for (count = n1->n_scale - min_scale; count > 0; count--) {
*sumptr-- = *n1ptr--;
}
} else {
/* n2 has the longer scale */
for (count = n2->n_scale - min_scale; count > 0; count--) {
*sumptr-- = *n2ptr--;
}
}
/* Now add the remaining fraction part and equal size integer parts. */
n1bytes += n1->n_len;
n2bytes += n2->n_len;
carry = 0;
while ((n1bytes > 0) && (n2bytes > 0)) {
count = 0;
/* Uses SIMD to perform calculations at high speed. */
if (min_bytes >= sizeof(BC_UINT_T)) {
sumptr++;
n1ptr++;
n2ptr++;
while (count + sizeof(BC_UINT_T) <= min_bytes) {
sumptr -= sizeof(BC_UINT_T);
n1ptr -= sizeof(BC_UINT_T);
n2ptr -= sizeof(BC_UINT_T);
BC_UINT_T n1bytes;
BC_UINT_T n2bytes;
memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
#if BC_LITTLE_ENDIAN
/* Little endian requires changing the order of bytes. */
n1bytes = BC_BSWAP(n1bytes);
n2bytes = BC_BSWAP(n2bytes);
#endif
/*
* In order to add 1 to the "next digit" when a carry occurs, adjust it so that it
* overflows when add 10.
* e.g.
* 00001001(9) + 00000001(1) = 00001010(10) to
* 11111111 + 00000001 = 00000000(0) and carry 1
*/
n1bytes += SWAR_REPEAT(0xF6) + n2bytes + carry;
/* If the most significant bit is 0, a carry has occurred. */
carry = !(n1bytes & ((BC_UINT_T) 1 << (8 * sizeof(BC_UINT_T) - 1)));
/*
* The calculation result is a mixture of bytes that have been carried and bytes that have not.
* The most significant bit of each byte is 0 if it is carried forward, and 1 if it is not.
* Using this, subtract the 0xF6 added for adjustment from the byte that has not been carried
* over to return it to the correct value as a decimal number.
*/
BC_UINT_T sum_mask = ((n1bytes & SWAR_REPEAT(0x80)) >> 7) * 0xF6;
n1bytes -= sum_mask;
#if BC_LITTLE_ENDIAN
/* Little endian requires changing the order of bytes back. */
n1bytes = BC_BSWAP(n1bytes);
#endif
memcpy(sumptr, &n1bytes, sizeof(n1bytes));
count += sizeof(BC_UINT_T);
}
sumptr--;
n1ptr--;
n2ptr--;
}
for (; count < min_bytes; count++) {
*sumptr = *n1ptr-- + *n2ptr-- + carry;
if (*sumptr > (BASE - 1)) {
carry = 1;
if (*sumptr >= BASE) {
*sumptr -= BASE;
carry = 1;
} else {
carry = 0;
}
sumptr--;
n1bytes--;
n2bytes--;
}
/* Now add carry the longer integer part. */
if (n1bytes == 0) {
n1bytes = n2bytes;
n1ptr = n2ptr;
}
while (n1bytes-- > 0) {
*sumptr = *n1ptr-- + carry;
if (*sumptr > (BASE - 1)) {
carry = true;
*sumptr -= BASE;
} else {
carry = false;
if (n1->n_len != n2->n_len) {
if (n2->n_len > n1->n_len) {
n1ptr = n2ptr;
}
for (count = sum_len - min_len; count > 1; count--) {
*sumptr = *n1ptr-- + carry;
if (*sumptr >= BASE) {
*sumptr -= BASE;
carry = 1;
} else {
carry = 0;
}
sumptr--;
}
sumptr--;
}
/* Set final carry. */
if (carry) {
*sumptr += 1;
}
*sumptr += carry;
/* Adjust sum and return. */
_bc_rm_leading_zeros(sum);