kolibrios-gitea/contrib/toolchain/gcc/5x/libgcc/config/libbid/bid128_add.c
Sergey Semyonov (Serge) c7fc8e91d0 libgcc-5.4.0 initial commit
git-svn-id: svn://kolibrios.org@6515 a494cfbc-eb01-0410-851d-a64ba20cac60
2016-09-08 17:51:39 +00:00

2942 lines
100 KiB
C

/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "bid_internal.h"
#if DECIMAL_CALL_BY_REFERENCE
void
bid64dq_add (UINT64 * pres, UINT64 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64dq_add (UINT64 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT128 x1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid64qq_add (&res, &x1, py
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid64qq_add (x1, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid64qd_add (UINT64 * pres, UINT128 * px, UINT64 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64qd_add (UINT128 x, UINT64 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT128 y1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid64qq_add (&res, px, &y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid64qq_add (x, y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid64qq_add (UINT64 * pres, UINT128 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT128 x = *px, y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64qq_add (UINT128 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull}
};
UINT64 res = 0xbaddbaddbaddbaddull;
BID_SWAP128 (one);
#if DECIMAL_CALL_BY_REFERENCE
bid64qqq_fma (&res, &one, &x, &y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
res = bid64qqq_fma (one, x, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128dd_add (UINT128 * pres, UINT64 * px, UINT64 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px, y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128dd_add (UINT64 x, UINT64 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT128 x1, y1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid128_add (&res, &x1, &y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid128_add (x1, y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128dq_add (UINT128 * pres, UINT64 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128dq_add (UINT64 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT128 x1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid128_add (&res, &x1, py
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid128_add (x1, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128qd_add (UINT128 * pres, UINT128 * px, UINT64 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128qd_add (UINT128 x, UINT64 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT128 y1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid128_add (&res, px, &y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid128_add (x, y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
// bid128_add stands for bid128qq_add
/*****************************************************************************
* BID64/BID128 sub
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid64dq_sub (UINT64 * pres, UINT64 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64dq_sub (UINT64 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT128 x1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid64qq_sub (&res, &x1, py
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid64qq_sub (x1, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid64qd_sub (UINT64 * pres, UINT128 * px, UINT64 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64qd_sub (UINT128 x, UINT64 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT64 res = 0xbaddbaddbaddbaddull;
UINT128 y1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid64qq_sub (&res, px, &y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid64qq_sub (x, y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid64qq_sub (UINT64 * pres, UINT128 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT128 x = *px, y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT64
bid64qq_sub (UINT128 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull}
};
UINT64 res = 0xbaddbaddbaddbaddull;
UINT64 y_sign;
BID_SWAP128 (one);
if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN
// change its sign
y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
if (y_sign)
y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull;
else
y.w[HIGH_128W] = y.w[HIGH_128W] | 0x8000000000000000ull;
}
#if DECIMAL_CALL_BY_REFERENCE
bid64qqq_fma (&res, &one, &x, &y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
res = bid64qqq_fma (one, x, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128dd_sub (UINT128 * pres, UINT64 * px, UINT64 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px, y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128dd_sub (UINT64 x, UINT64 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT128 x1, y1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid128_sub (&res, &x1, &y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid128_sub (x1, y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128dq_sub (UINT128 * pres, UINT64 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 x = *px;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128dq_sub (UINT64 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT128 x1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid128_sub (&res, &x1, py
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid128_sub (x1, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128qd_sub (UINT128 * pres, UINT128 * px, UINT64 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT64 y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128qd_sub (UINT128 x, UINT64 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT128 y1;
#if DECIMAL_CALL_BY_REFERENCE
bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
bid128_sub (&res, px, &y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
res = bid128_sub (x, y1
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}
#if DECIMAL_CALL_BY_REFERENCE
void
bid128_add (UINT128 * pres, UINT128 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT128 x = *px, y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128_add (UINT128 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
};
UINT64 x_sign, y_sign, tmp_sign;
UINT64 x_exp, y_exp, tmp_exp; // e1 = x_exp, e2 = y_exp
UINT64 C1_hi, C2_hi, tmp_signif_hi;
UINT64 C1_lo, C2_lo, tmp_signif_lo;
// Note: C1.w[1], C1.w[0] represent C1_hi, C1_lo (all UINT64)
// Note: C2.w[1], C2.w[0] represent C2_hi, C2_lo (all UINT64)
UINT64 tmp64, tmp64A, tmp64B;
BID_UI64DOUBLE tmp1, tmp2;
int x_nr_bits, y_nr_bits;
int q1, q2, delta, scale, x1, ind, shift, tmp_inexact = 0;
UINT64 halfulp64;
UINT128 halfulp128;
UINT128 C1, C2;
UINT128 ten2m1;
UINT128 highf2star; // top 128 bits in f2*; low 128 bits in R256[1], R256[0]
UINT256 P256, Q256, R256;
int is_inexact = 0, is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
int second_pass = 0;
BID_SWAP128 (x);
BID_SWAP128 (y);
x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
// check for NaN or Infinity
if (((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL)
|| ((y.w[1] & MASK_SPECIAL) == MASK_SPECIAL)) {
// x is special or y is special
if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
// check first for non-canonical NaN payload
if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull)
&& (x.w[0] > 0x38c15b09ffffffffull))) {
x.w[1] = x.w[1] & 0xffffc00000000000ull;
x.w[0] = 0x0ull;
}
if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (x)
res.w[1] = x.w[1] & 0xfc003fffffffffffull;
// clear out also G[6]-G[16]
res.w[0] = x.w[0];
} else { // x is QNaN
// return x
res.w[1] = x.w[1] & 0xfc003fffffffffffull;
// clear out G[6]-G[16]
res.w[0] = x.w[0];
// if y = SNaN signal invalid exception
if ((y.w[1] & MASK_SNAN) == MASK_SNAN) {
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
}
}
BID_SWAP128 (res);
BID_RETURN (res);
} else if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
// check first for non-canonical NaN payload
if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
(((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull)
&& (y.w[0] > 0x38c15b09ffffffffull))) {
y.w[1] = y.w[1] & 0xffffc00000000000ull;
y.w[0] = 0x0ull;
}
if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return quiet (y)
res.w[1] = y.w[1] & 0xfc003fffffffffffull;
// clear out also G[6]-G[16]
res.w[0] = y.w[0];
} else { // y is QNaN
// return y
res.w[1] = y.w[1] & 0xfc003fffffffffffull;
// clear out G[6]-G[16]
res.w[0] = y.w[0];
}
BID_SWAP128 (res);
BID_RETURN (res);
} else { // neither x not y is NaN; at least one is infinity
if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x is infinity
if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y is infinity
// if same sign, return either of them
if ((x.w[1] & MASK_SIGN) == (y.w[1] & MASK_SIGN)) {
res.w[1] = x_sign | MASK_INF;
res.w[0] = 0x0ull;
} else { // x and y are infinities of opposite signs
// set invalid flag
*pfpsf |= INVALID_EXCEPTION;
// return QNaN Indefinite
res.w[1] = 0x7c00000000000000ull;
res.w[0] = 0x0000000000000000ull;
}
} else { // y is 0 or finite
// return x
res.w[1] = x_sign | MASK_INF;
res.w[0] = 0x0ull;
}
} else { // x is not NaN or infinity, so y must be infinity
res.w[1] = y_sign | MASK_INF;
res.w[0] = 0x0ull;
}
BID_SWAP128 (res);
BID_RETURN (res);
}
}
// unpack the arguments
// unpack x
C1_hi = x.w[1] & MASK_COEFF;
C1_lo = x.w[0];
// test for non-canonical values:
// - values whose encoding begins with x00, x01, or x10 and whose
// coefficient is larger than 10^34 -1, or
// - values whose encoding begins with x1100, x1101, x1110 (if NaNs
// and infinitis were eliminated already this test is reduced to
// checking for x10x)
// x is not infinity; check for non-canonical values - treated as zero
if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {
// G0_G1=11; non-canonical
x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C1_hi = 0; // significand high
C1_lo = 0; // significand low
} else { // G0_G1 != 11
x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C1_hi > 0x0001ed09bead87c0ull ||
(C1_hi == 0x0001ed09bead87c0ull
&& C1_lo > 0x378d8e63ffffffffull)) {
// x is non-canonical if coefficient is larger than 10^34 -1
C1_hi = 0;
C1_lo = 0;
} else { // canonical
;
}
}
// unpack y
C2_hi = y.w[1] & MASK_COEFF;
C2_lo = y.w[0];
// y is not infinity; check for non-canonical values - treated as zero
if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {
// G0_G1=11; non-canonical
y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
C2_hi = 0; // significand high
C2_lo = 0; // significand low
} else { // G0_G1 != 11
y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
if (C2_hi > 0x0001ed09bead87c0ull ||
(C2_hi == 0x0001ed09bead87c0ull
&& C2_lo > 0x378d8e63ffffffffull)) {
// y is non-canonical if coefficient is larger than 10^34 -1
C2_hi = 0;
C2_lo = 0;
} else { // canonical
;
}
}
if ((C1_hi == 0x0ull) && (C1_lo == 0x0ull)) {
// x is 0 and y is not special
// if y is 0 return 0 with the smaller exponent
if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) {
if (x_exp < y_exp)
res.w[1] = x_exp;
else
res.w[1] = y_exp;
if (x_sign && y_sign)
res.w[1] = res.w[1] | x_sign; // both negative
else if (rnd_mode == ROUNDING_DOWN && x_sign != y_sign)
res.w[1] = res.w[1] | 0x8000000000000000ull; // -0
// else; // res = +0
res.w[0] = 0;
} else {
// for 0 + y return y, with the preferred exponent
if (y_exp <= x_exp) {
res.w[1] = y.w[1];
res.w[0] = y.w[0];
} else { // if y_exp > x_exp
// return (C2 * 10^scale) * 10^(y_exp - scale)
// where scale = min (P34-q2, y_exp-x_exp)
// determine q2 = nr. of decimal digits in y
// determine first the nr. of bits in y (y_nr_bits)
if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo
if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53
// split the 64-bit value in two 32-bit halves to avoid
// rounding errors
if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32
tmp2.d = (double) (C2_lo >> 32); // exact conversion
y_nr_bits =
32 +
((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // y < 2^32
tmp2.d = (double) (C2_lo); // exact conversion
y_nr_bits =
((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if y < 2^53
tmp2.d = (double) C2_lo; // exact conversion
y_nr_bits =
((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
tmp2.d = (double) C2_hi; // exact conversion
y_nr_bits =
64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q2 = nr_digits[y_nr_bits].digits;
if (q2 == 0) {
q2 = nr_digits[y_nr_bits].digits1;
if (C2_hi > nr_digits[y_nr_bits].threshold_hi ||
(C2_hi == nr_digits[y_nr_bits].threshold_hi &&
C2_lo >= nr_digits[y_nr_bits].threshold_lo))
q2++;
}
// return (C2 * 10^scale) * 10^(y_exp - scale)
// where scale = min (P34-q2, y_exp-x_exp)
scale = P34 - q2;
ind = (y_exp - x_exp) >> 49;
if (ind < scale)
scale = ind;
if (scale == 0) {
res.w[1] = y.w[1];
res.w[0] = y.w[0];
} else if (q2 <= 19) { // y fits in 64 bits
if (scale <= 19) { // 10^scale fits in 64 bits
// 64 x 64 C2_lo * ten2k64[scale]
__mul_64x64_to_128MACH (res, C2_lo, ten2k64[scale]);
} else { // 10^scale fits in 128 bits
// 64 x 128 C2_lo * ten2k128[scale - 20]
__mul_128x64_to_128 (res, C2_lo, ten2k128[scale - 20]);
}
} else { // y fits in 128 bits, but 10^scale must fit in 64 bits
// 64 x 128 ten2k64[scale] * C2
C2.w[1] = C2_hi;
C2.w[0] = C2_lo;
__mul_128x64_to_128 (res, ten2k64[scale], C2);
}
// subtract scale from the exponent
y_exp = y_exp - ((UINT64) scale << 49);
res.w[1] = res.w[1] | y_sign | y_exp;
}
}
BID_SWAP128 (res);
BID_RETURN (res);
} else if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) {
// y is 0 and x is not special, and not zero
// for x + 0 return x, with the preferred exponent
if (x_exp <= y_exp) {
res.w[1] = x.w[1];
res.w[0] = x.w[0];
} else { // if x_exp > y_exp
// return (C1 * 10^scale) * 10^(x_exp - scale)
// where scale = min (P34-q1, x_exp-y_exp)
// determine q1 = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo
if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53
// split the 64-bit value in two 32-bit halves to avoid
// rounding errors
if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1_lo >> 32); // exact conversion
x_nr_bits =
32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) -
0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1_lo); // exact conversion
x_nr_bits =
((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1_lo; // exact conversion
x_nr_bits =
((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
tmp1.d = (double) C1_hi; // exact conversion
x_nr_bits =
64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q1 = nr_digits[x_nr_bits].digits;
if (q1 == 0) {
q1 = nr_digits[x_nr_bits].digits1;
if (C1_hi > nr_digits[x_nr_bits].threshold_hi ||
(C1_hi == nr_digits[x_nr_bits].threshold_hi &&
C1_lo >= nr_digits[x_nr_bits].threshold_lo))
q1++;
}
// return (C1 * 10^scale) * 10^(x_exp - scale)
// where scale = min (P34-q1, x_exp-y_exp)
scale = P34 - q1;
ind = (x_exp - y_exp) >> 49;
if (ind < scale)
scale = ind;
if (scale == 0) {
res.w[1] = x.w[1];
res.w[0] = x.w[0];
} else if (q1 <= 19) { // x fits in 64 bits
if (scale <= 19) { // 10^scale fits in 64 bits
// 64 x 64 C1_lo * ten2k64[scale]
__mul_64x64_to_128MACH (res, C1_lo, ten2k64[scale]);
} else { // 10^scale fits in 128 bits
// 64 x 128 C1_lo * ten2k128[scale - 20]
__mul_128x64_to_128 (res, C1_lo, ten2k128[scale - 20]);
}
} else { // x fits in 128 bits, but 10^scale must fit in 64 bits
// 64 x 128 ten2k64[scale] * C1
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
__mul_128x64_to_128 (res, ten2k64[scale], C1);
}
// subtract scale from the exponent
x_exp = x_exp - ((UINT64) scale << 49);
res.w[1] = res.w[1] | x_sign | x_exp;
}
BID_SWAP128 (res);
BID_RETURN (res);
} else { // x and y are not canonical, not special, and are not zero
// note that the result may still be zero, and then it has to have the
// preferred exponent
if (x_exp < y_exp) { // if exp_x < exp_y then swap x and y
tmp_sign = x_sign;
tmp_exp = x_exp;
tmp_signif_hi = C1_hi;
tmp_signif_lo = C1_lo;
x_sign = y_sign;
x_exp = y_exp;
C1_hi = C2_hi;
C1_lo = C2_lo;
y_sign = tmp_sign;
y_exp = tmp_exp;
C2_hi = tmp_signif_hi;
C2_lo = tmp_signif_lo;
}
// q1 = nr. of decimal digits in x
// determine first the nr. of bits in x
if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo
if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53
//split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32
tmp1.d = (double) (C1_lo >> 32); // exact conversion
x_nr_bits =
32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // x < 2^32
tmp1.d = (double) (C1_lo); // exact conversion
x_nr_bits =
((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if x < 2^53
tmp1.d = (double) C1_lo; // exact conversion
x_nr_bits =
((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
tmp1.d = (double) C1_hi; // exact conversion
x_nr_bits =
64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q1 = nr_digits[x_nr_bits].digits;
if (q1 == 0) {
q1 = nr_digits[x_nr_bits].digits1;
if (C1_hi > nr_digits[x_nr_bits].threshold_hi ||
(C1_hi == nr_digits[x_nr_bits].threshold_hi &&
C1_lo >= nr_digits[x_nr_bits].threshold_lo))
q1++;
}
// q2 = nr. of decimal digits in y
// determine first the nr. of bits in y (y_nr_bits)
if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo
if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53
//split the 64-bit value in two 32-bit halves to avoid rounding errors
if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32
tmp2.d = (double) (C2_lo >> 32); // exact conversion
y_nr_bits =
32 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
} else { // y < 2^32
tmp2.d = (double) (C2_lo); // exact conversion
y_nr_bits =
((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // if y < 2^53
tmp2.d = (double) C2_lo; // exact conversion
y_nr_bits =
((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
} else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
tmp2.d = (double) C2_hi; // exact conversion
y_nr_bits =
64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
}
q2 = nr_digits[y_nr_bits].digits;
if (q2 == 0) {
q2 = nr_digits[y_nr_bits].digits1;
if (C2_hi > nr_digits[y_nr_bits].threshold_hi ||
(C2_hi == nr_digits[y_nr_bits].threshold_hi &&
C2_lo >= nr_digits[y_nr_bits].threshold_lo))
q2++;
}
delta = q1 + (int) (x_exp >> 49) - q2 - (int) (y_exp >> 49);
if (delta >= P34) {
// round the result directly because 0 < C2 < ulp (C1 * 10^(x_exp-e2))
// n = C1 * 10^e1 or n = C1 +/- 10^(q1-P34)) * 10^e1
// the result is inexact; the preferred exponent is the least possible
if (delta >= P34 + 1) {
// for RN the result is the operand with the larger magnitude,
// possibly scaled up by 10^(P34-q1)
// an overflow cannot occur in this case (rounding to nearest)
if (q1 < P34) { // scale C1 up by 10^(P34-q1)
// Note: because delta >= P34+1 it is certain that
// x_exp - ((UINT64)scale << 49) will stay above e_min
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
x_exp = x_exp - ((UINT64) scale << 49);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
}
// some special cases arise: if delta = P34 + 1 and C1 = 10^(P34-1)
// (after scaling) and x_sign != y_sign and C2 > 5*10^(q2-1) =>
// subtract 1 ulp
// Note: do this only for rounding to nearest; for other rounding
// modes the correction will be applied next
if ((rnd_mode == ROUNDING_TO_NEAREST
|| rnd_mode == ROUNDING_TIES_AWAY) && delta == (P34 + 1)
&& C1_hi == 0x0000314dc6448d93ull
&& C1_lo == 0x38c15b0a00000000ull && x_sign != y_sign
&& ((q2 <= 19 && C2_lo > midpoint64[q2 - 1]) || (q2 >= 20
&& (C2_hi >
midpoint128
[q2 -
20].
w[1]
||
(C2_hi
==
midpoint128
[q2 -
20].
w[1]
&&
C2_lo
>
midpoint128
[q2 -
20].
w
[0])))))
{
// C1 = 10^34 - 1 and decrement x_exp by 1 (no underflow possible)
C1_hi = 0x0001ed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1;
}
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) ||
(rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
// add 1 ulp and then check for overflow
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set overflow flag (the inexact flag was set too)
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) ||
(rnd_mode == ROUNDING_UP && x_sign && !y_sign) ||
(rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// subtract 1 ulp from C1
// Note: because delta >= P34 + 1 the result cannot be zero
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi = C1_hi - 1;
// if the coefficient is 10^33 - 1 then make it 10^34 - 1 and
// decrease the exponent by 1 (because delta >= P34 + 1 the
// exponent will not become less than e_min)
// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
if (C1_hi == 0x0000314dc6448d93ull
&& C1_lo == 0x38c15b09ffffffffull) {
// make C1 = 10^34 - 1
C1_hi = 0x0001ed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1;
}
} else {
; // the result is already correct
}
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
} else { // delta = P34
// in most cases, the smaller operand may be < or = or > 1/2 ulp of the
// larger operand
// however, the case C1 = 10^(q1-1) and x_sign != y_sign is special due
// to accuracy loss after subtraction, and will be treated separately
if (x_sign == y_sign || (q1 <= 20
&& (C1_hi != 0
|| C1_lo != ten2k64[q1 - 1]))
|| (q1 >= 21 && (C1_hi != ten2k128[q1 - 21].w[1]
|| C1_lo != ten2k128[q1 - 21].w[0]))) {
// if x_sign == y_sign or C1 != 10^(q1-1)
// compare C2 with 1/2 ulp = 5 * 10^(q2-1), the latter read from table
// Note: cases q1<=19 and q1>=20 can be coalesced at some latency cost
if (q2 <= 19) { // C2 and 5*10^(q2-1) both fit in 64 bits
halfulp64 = midpoint64[q2 - 1]; // 5 * 10^(q2-1)
if (C2_lo < halfulp64) { // n2 < 1/2 ulp (n1)
// for RN the result is the operand with the larger magnitude,
// possibly scaled up by 10^(P34-q1)
// an overflow cannot occur in this case (rounding to nearest)
if (q1 < P34) { // scale C1 up by 10^(P34-q1)
// Note: because delta = P34 it is certain that
// x_exp - ((UINT64)scale << 49) will stay above e_min
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
x_exp = x_exp - ((UINT64) scale << 49);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
}
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) ||
(rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
// add 1 ulp and then check for overflow
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set overflow flag (the inexact flag was set too)
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else
if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
|| (rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// subtract 1 ulp from C1
// Note: because delta >= P34 + 1 the result cannot be zero
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi = C1_hi - 1;
// if the coefficient is 10^33-1 then make it 10^34-1 and
// decrease the exponent by 1 (because delta >= P34 + 1 the
// exponent will not become less than e_min)
// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
if (C1_hi == 0x0000314dc6448d93ull
&& C1_lo == 0x38c15b09ffffffffull) {
// make C1 = 10^34 - 1
C1_hi = 0x0001ed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1;
}
} else {
; // the result is already correct
}
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
} else if ((C2_lo == halfulp64)
&& (q1 < P34 || ((C1_lo & 0x1) == 0))) {
// n2 = 1/2 ulp (n1) and C1 is even
// the result is the operand with the larger magnitude,
// possibly scaled up by 10^(P34-q1)
// an overflow cannot occur in this case (rounding to nearest)
if (q1 < P34) { // scale C1 up by 10^(P34-q1)
// Note: because delta = P34 it is certain that
// x_exp - ((UINT64)scale << 49) will stay above e_min
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
x_exp = x_exp - ((UINT64) scale << 49);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
}
if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign == y_sign
&& (C1_lo & 0x01)) || (rnd_mode == ROUNDING_TIES_AWAY
&& x_sign == y_sign)
|| (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)
|| (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)) {
// add 1 ulp and then check for overflow
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set overflow flag (the inexact flag was set too)
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else
if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign
&& (C1_lo & 0x01)) || (rnd_mode == ROUNDING_DOWN
&& !x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
|| (rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// subtract 1 ulp from C1
// Note: because delta >= P34 + 1 the result cannot be zero
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi = C1_hi - 1;
// if the coefficient is 10^33 - 1 then make it 10^34 - 1
// and decrease the exponent by 1 (because delta >= P34 + 1
// the exponent will not become less than e_min)
// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
if (C1_hi == 0x0000314dc6448d93ull
&& C1_lo == 0x38c15b09ffffffffull) {
// make C1 = 10^34 - 1
C1_hi = 0x0001ed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1;
}
} else {
; // the result is already correct
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
} else { // if C2_lo > halfulp64 ||
// (C2_lo == halfulp64 && q1 == P34 && ((C1_lo & 0x1) == 1)), i.e.
// 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
// res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0
if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
// Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
// because q1 < P34 we must first replace C1 by
// C1 * 10^(P34-q1), and must decrease the exponent by
// (P34-q1) (it will still be at least e_min)
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
x_exp = x_exp - ((UINT64) scale << 49);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
// check for rounding overflow
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
}
}
if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign)
|| (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign
&& C2_lo != halfulp64)
|| (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
|| (rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// the result is x - 1
// for RN n1 * n2 < 0; underflow not possible
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi--;
// check if we crossed into the lower decade
if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2
}
} else
if ((rnd_mode == ROUNDING_TO_NEAREST
&& x_sign == y_sign)
|| (rnd_mode == ROUNDING_TIES_AWAY
&& x_sign == y_sign)
|| (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && !x_sign
&& !y_sign)) {
// the result is x + 1
// for RN x_sign = y_sign, i.e. n1*n2 > 0
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else {
; // the result is x
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
}
} else { // if q2 >= 20 then 5*10^(q2-1) and C2 (the latter in
// most cases) fit only in more than 64 bits
halfulp128 = midpoint128[q2 - 20]; // 5 * 10^(q2-1)
if ((C2_hi < halfulp128.w[1])
|| (C2_hi == halfulp128.w[1]
&& C2_lo < halfulp128.w[0])) {
// n2 < 1/2 ulp (n1)
// the result is the operand with the larger magnitude,
// possibly scaled up by 10^(P34-q1)
// an overflow cannot occur in this case (rounding to nearest)
if (q1 < P34) { // scale C1 up by 10^(P34-q1)
// Note: because delta = P34 it is certain that
// x_exp - ((UINT64)scale << 49) will stay above e_min
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
C1_hi = C1.w[1];
C1_lo = C1.w[0];
x_exp = x_exp - ((UINT64) scale << 49);
}
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) ||
(rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
// add 1 ulp and then check for overflow
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set overflow flag (the inexact flag was set too)
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else
if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
|| (rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// subtract 1 ulp from C1
// Note: because delta >= P34 + 1 the result cannot be zero
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi = C1_hi - 1;
// if the coefficient is 10^33-1 then make it 10^34-1 and
// decrease the exponent by 1 (because delta >= P34 + 1 the
// exponent will not become less than e_min)
// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
if (C1_hi == 0x0000314dc6448d93ull
&& C1_lo == 0x38c15b09ffffffffull) {
// make C1 = 10^34 - 1
C1_hi = 0x0001ed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1;
}
} else {
; // the result is already correct
}
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
} else if ((C2_hi == halfulp128.w[1]
&& C2_lo == halfulp128.w[0])
&& (q1 < P34 || ((C1_lo & 0x1) == 0))) {
// midpoint & lsb in C1 is 0
// n2 = 1/2 ulp (n1) and C1 is even
// the result is the operand with the larger magnitude,
// possibly scaled up by 10^(P34-q1)
// an overflow cannot occur in this case (rounding to nearest)
if (q1 < P34) { // scale C1 up by 10^(P34-q1)
// Note: because delta = P34 it is certain that
// x_exp - ((UINT64)scale << 49) will stay above e_min
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
x_exp = x_exp - ((UINT64) scale << 49);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
}
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((rnd_mode == ROUNDING_TIES_AWAY && x_sign == y_sign)
|| (rnd_mode == ROUNDING_UP && !y_sign)) {
// add 1 ulp and then check for overflow
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set overflow flag (the inexact flag was set too)
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else if ((rnd_mode == ROUNDING_DOWN && y_sign)
|| (rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// subtract 1 ulp from C1
// Note: because delta >= P34 + 1 the result cannot be zero
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi = C1_hi - 1;
// if the coefficient is 10^33 - 1 then make it 10^34 - 1
// and decrease the exponent by 1 (because delta >= P34 + 1
// the exponent will not become less than e_min)
// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
if (C1_hi == 0x0000314dc6448d93ull
&& C1_lo == 0x38c15b09ffffffffull) {
// make C1 = 10^34 - 1
C1_hi = 0x0001ed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1;
}
} else {
; // the result is already correct
}
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
} else { // if C2 > halfulp128 ||
// (C2 == halfulp128 && q1 == P34 && ((C1 & 0x1) == 1)), i.e.
// 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
// res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0
if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
// Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
// because q1 < P34 we must first replace C1 by C1*10^(P34-q1),
// and must decrease the exponent by (P34-q1) (it will still be
// at least e_min)
scale = P34 - q1;
if (q1 <= 19) { // C1 fits in 64 bits
// 1 <= q1 <= 19 => 15 <= scale <= 33
if (scale <= 19) { // 10^scale fits in 64 bits
__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
} else { // if 20 <= scale <= 33
// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
// (C1 * 10^(scale-19)) fits in 64 bits
C1_lo = C1_lo * ten2k64[scale - 19];
__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
}
} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
// C1 = ten2k64[P34 - q1] * C1
__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
}
C1_hi = C1.w[1];
C1_lo = C1.w[0];
x_exp = x_exp - ((UINT64) scale << 49);
}
if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign)
|| (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign
&& (C2_hi != halfulp128.w[1]
|| C2_lo != halfulp128.w[0]))
|| (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
|| (rnd_mode == ROUNDING_TO_ZERO
&& x_sign != y_sign)) {
// the result is x - 1
// for RN n1 * n2 < 0; underflow not possible
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi--;
// check if we crossed into the lower decade
if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
C1_lo = 0x378d8e63ffffffffull;
x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2
}
} else
if ((rnd_mode == ROUNDING_TO_NEAREST
&& x_sign == y_sign)
|| (rnd_mode == ROUNDING_TIES_AWAY
&& x_sign == y_sign)
|| (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)
|| (rnd_mode == ROUNDING_UP && !x_sign
&& !y_sign)) {
// the result is x + 1
// for RN x_sign = y_sign, i.e. n1*n2 > 0
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
x_exp = x_exp + EXP_P1;
if (x_exp == EXP_MAX_P1) { // overflow
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
x_exp = 0; // x_sign is preserved
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} else {
; // the result is x
}
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// assemble the result
res.w[1] = x_sign | x_exp | C1_hi;
res.w[0] = C1_lo;
}
} // end q1 >= 20
// end case where C1 != 10^(q1-1)
} else { // C1 = 10^(q1-1) and x_sign != y_sign
// instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
// calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
// where x1 = q2 - 1, 0 <= x1 <= P34 - 1
// Because C1 = 10^(q1-1) and x_sign != y_sign, C' will have P34
// digits and n = C' * 10^(e2+x1)
// If the result has P34+1 digits, redo the steps above with x1+1
// If the result has P34-1 digits or less, redo the steps above with
// x1-1 but only if initially x1 >= 1
// NOTE: these two steps can be improved, e.g we could guess if
// P34+1 or P34-1 digits will be obtained by adding/subtracting
// just the top 64 bits of the two operands
// The result cannot be zero, and it cannot overflow
x1 = q2 - 1; // 0 <= x1 <= P34-1
// Calculate C1 * 10^(e1-e2-x1) where 1 <= e1-e2-x1 <= P34
// scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
scale = P34 - q1 + 1; // scale=e1-e2-x1 = P34+1-q1; 1<=scale<=P34
// either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
// but their product fits with certainty in 128 bits
if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
} else { // if (scale >= 1
// if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
if (q1 <= 19) { // C1 fits in 64 bits
__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
} else { // q1 >= 20
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
__mul_128x64_to_128 (C1, ten2k64[scale], C1);
}
}
tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
// now round C2 to q2-x1 = 1 decimal digit
// C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
ind = x1 - 1; // -1 <= ind <= P34 - 2
if (ind >= 0) { // if (x1 >= 1)
C2.w[0] = C2_lo;
C2.w[1] = C2_hi;
if (ind <= 18) {
C2.w[0] = C2.w[0] + midpoint64[ind];
if (C2.w[0] < C2_lo)
C2.w[1]++;
} else { // 19 <= ind <= 32
C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0];
C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1];
if (C2.w[0] < C2_lo)
C2.w[1]++;
}
// the approximation of 10^(-x1) was rounded up to 118 bits
__mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2*, f2*
// calculate C2* and f2*
// C2* is actually floor(C2*) in this case
// C2* and f2* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g.
// if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
// if (0 < f2* < 10^(-x1)) then
// if floor(C1+C2*) is even then C2* = floor(C2*) - logical right
// shift; C2* has p decimal digits, correct by Prop. 1)
// else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right
// shift; C2* has p decimal digits, correct by Pr. 1)
// else
// C2* = floor(C2*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C2* * 10^(e2+x1)
if (ind <= 2) {
highf2star.w[1] = 0x0;
highf2star.w[0] = 0x0; // low f2* ok
} else if (ind <= 21) {
highf2star.w[1] = 0x0;
highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok
} else {
highf2star.w[1] = R256.w[3] & maskhigh128[ind];
highf2star.w[0] = R256.w[2]; // low f2* is ok
}
// shift right C2* by Ex-128 = shiftright128[ind]
if (ind >= 3) {
shift = shiftright128[ind];
if (shift < 64) { // 3 <= shift <= 63
R256.w[2] =
(R256.w[2] >> shift) | (R256.w[3] << (64 - shift));
R256.w[3] = (R256.w[3] >> shift);
} else { // 66 <= shift <= 102
R256.w[2] = (R256.w[3] >> (shift - 64));
R256.w[3] = 0x0ULL;
}
}
// redundant
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
is_midpoint_lt_even = 0;
is_midpoint_gt_even = 0;
// determine inexactness of the rounding of C2*
// (cannot be followed by a second rounding)
// if (0 < f2* - 1/2 < 10^(-x1)) then
// the result is exact
// else (if f2* - 1/2 > T* then)
// the result of is inexact
if (ind <= 2) {
if (R256.w[1] > 0x8000000000000000ull ||
(R256.w[1] == 0x8000000000000000ull
&& R256.w[0] > 0x0ull)) {
// f2* > 1/2 and the result may be exact
tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2
if ((tmp64A > ten2mk128trunc[ind].w[1]
|| (tmp64A == ten2mk128trunc[ind].w[1]
&& R256.w[0] >= ten2mk128trunc[ind].w[0]))) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_inexact_gt_midpoint = 1;
} // else the result is exact
// rounding down, unless a midpoint in [ODD, EVEN]
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_inexact_lt_midpoint = 1;
}
} else if (ind <= 21) { // if 3 <= ind <= 21
if (highf2star.w[1] > 0x0 || (highf2star.w[1] == 0x0
&& highf2star.w[0] >
onehalf128[ind])
|| (highf2star.w[1] == 0x0
&& highf2star.w[0] == onehalf128[ind]
&& (R256.w[1] || R256.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
tmp64A = highf2star.w[0] - onehalf128[ind];
tmp64B = highf2star.w[1];
if (tmp64A > highf2star.w[0])
tmp64B--;
if (tmp64B || tmp64A
|| R256.w[1] > ten2mk128trunc[ind].w[1]
|| (R256.w[1] == ten2mk128trunc[ind].w[1]
&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_inexact_gt_midpoint = 1;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_inexact_lt_midpoint = 1;
}
} else { // if 22 <= ind <= 33
if (highf2star.w[1] > onehalf128[ind]
|| (highf2star.w[1] == onehalf128[ind]
&& (highf2star.w[0] || R256.w[1]
|| R256.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
// tmp64A = highf2star.w[0];
tmp64B = highf2star.w[1] - onehalf128[ind];
if (tmp64B || highf2star.w[0]
|| R256.w[1] > ten2mk128trunc[ind].w[1]
|| (R256.w[1] == ten2mk128trunc[ind].w[1]
&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_inexact_gt_midpoint = 1;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_inexact_lt_midpoint = 1;
}
}
// check for midpoints after determining inexactness
if ((R256.w[1] || R256.w[0]) && (highf2star.w[1] == 0)
&& (highf2star.w[0] == 0)
&& (R256.w[1] < ten2mk128trunc[ind].w[1]
|| (R256.w[1] == ten2mk128trunc[ind].w[1]
&& R256.w[0] <= ten2mk128trunc[ind].w[0]))) {
// the result is a midpoint
if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD]
// if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0
R256.w[2]--;
if (R256.w[2] == 0xffffffffffffffffull)
R256.w[3]--;
// this rounding is applied to C2 only!
// x_sign != y_sign
is_midpoint_lt_even = 1;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
} else {
// else MP in [ODD, EVEN]
// this rounding is applied to C2 only!
// x_sign != y_sign
is_midpoint_gt_even = 1;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
}
}
} else { // if (ind == -1) only when x1 = 0
R256.w[2] = C2_lo;
R256.w[3] = C2_hi;
is_midpoint_lt_even = 0;
is_midpoint_gt_even = 0;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
}
// and now subtract C1 * 10^(e1-e2-x1) - (C2 * 10^(-x1))rnd,P34
// because x_sign != y_sign this last operation is exact
C1.w[0] = C1.w[0] - R256.w[2];
C1.w[1] = C1.w[1] - R256.w[3];
if (C1.w[0] > tmp64)
C1.w[1]--; // borrow
if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient!
C1.w[0] = ~C1.w[0];
C1.w[0]++;
C1.w[1] = ~C1.w[1];
if (C1.w[0] == 0x0)
C1.w[1]++;
tmp_sign = y_sign; // the result will have the sign of y
} else {
tmp_sign = x_sign;
}
// the difference has exactly P34 digits
x_sign = tmp_sign;
if (x1 >= 1)
y_exp = y_exp + ((UINT64) x1 << 49);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
// general correction from RN to RA, RM, RP, RZ; result uses y_exp
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((!x_sign
&& ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint)
||
((rnd_mode == ROUNDING_TIES_AWAY
|| rnd_mode == ROUNDING_UP)
&& is_midpoint_gt_even))) || (x_sign
&&
((rnd_mode ==
ROUNDING_DOWN
&&
is_inexact_lt_midpoint)
||
((rnd_mode ==
ROUNDING_TIES_AWAY
|| rnd_mode ==
ROUNDING_DOWN)
&&
is_midpoint_gt_even))))
{
// C1 = C1 + 1
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
y_exp = y_exp + EXP_P1;
}
} else if ((is_midpoint_lt_even || is_inexact_gt_midpoint)
&&
((x_sign
&& (rnd_mode == ROUNDING_UP
|| rnd_mode == ROUNDING_TO_ZERO))
|| (!x_sign
&& (rnd_mode == ROUNDING_DOWN
|| rnd_mode == ROUNDING_TO_ZERO)))) {
// C1 = C1 - 1
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi--;
// check if we crossed into the lower decade
if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
C1_lo = 0x378d8e63ffffffffull;
y_exp = y_exp - EXP_P1;
// no underflow, because delta + q2 >= P34 + 1
}
} else {
; // exact, the result is already correct
}
}
// assemble the result
res.w[1] = x_sign | y_exp | C1_hi;
res.w[0] = C1_lo;
}
} // end delta = P34
} else { // if (|delta| <= P34 - 1)
if (delta >= 0) { // if (0 <= delta <= P34 - 1)
if (delta <= P34 - 1 - q2) {
// calculate C' directly; the result is exact
// in this case 1<=q1<=P34-1, 1<=q2<=P34-1 and 0 <= e1-e2 <= P34-2
// The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
// exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
// but their product fits with certainty in 128 bits (actually in 113)
scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
C1_hi = C1.w[1];
C1_lo = C1.w[0];
} else if (scale >= 1) {
// if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
if (q1 <= 19) { // C1 fits in 64 bits
__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
} else { // q1 >= 20
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
__mul_128x64_to_128 (C1, ten2k64[scale], C1);
}
C1_hi = C1.w[1];
C1_lo = C1.w[0];
} else { // if (scale == 0) C1 is unchanged
C1.w[0] = C1_lo; // C1.w[1] = C1_hi;
}
// now add C2
if (x_sign == y_sign) {
// the result cannot overflow
C1_lo = C1_lo + C2_lo;
C1_hi = C1_hi + C2_hi;
if (C1_lo < C1.w[0])
C1_hi++;
} else { // if x_sign != y_sign
C1_lo = C1_lo - C2_lo;
C1_hi = C1_hi - C2_hi;
if (C1_lo > C1.w[0])
C1_hi--;
// the result can be zero, but it cannot overflow
if (C1_lo == 0 && C1_hi == 0) {
// assemble the result
if (x_exp < y_exp)
res.w[1] = x_exp;
else
res.w[1] = y_exp;
res.w[0] = 0;
if (rnd_mode == ROUNDING_DOWN) {
res.w[1] |= 0x8000000000000000ull;
}
BID_SWAP128 (res);
BID_RETURN (res);
}
if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
C1_lo = ~C1_lo;
C1_lo++;
C1_hi = ~C1_hi;
if (C1_lo == 0x0)
C1_hi++;
x_sign = y_sign; // the result will have the sign of y
}
}
// assemble the result
res.w[1] = x_sign | y_exp | C1_hi;
res.w[0] = C1_lo;
} else if (delta == P34 - q2) {
// calculate C' directly; the result may be inexact if it requires
// P34+1 decimal digits; in this case the 'cutoff' point for addition
// is at the position of the lsb of C2, so 0 <= e1-e2 <= P34-1
// The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
// exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
// but their product fits with certainty in 128 bits (actually in 113)
scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
} else if (scale >= 1) {
// if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
if (q1 <= 19) { // C1 fits in 64 bits
__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
} else { // q1 >= 20
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
__mul_128x64_to_128 (C1, ten2k64[scale], C1);
}
} else { // if (scale == 0) C1 is unchanged
C1.w[1] = C1_hi;
C1.w[0] = C1_lo; // only the low part is necessary
}
C1_hi = C1.w[1];
C1_lo = C1.w[0];
// now add C2
if (x_sign == y_sign) {
// the result can overflow!
C1_lo = C1_lo + C2_lo;
C1_hi = C1_hi + C2_hi;
if (C1_lo < C1.w[0])
C1_hi++;
// test for overflow, possible only when C1 >= 10^34
if (C1_hi > 0x0001ed09bead87c0ull || (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34
// in this case q = P34 + 1 and x = q - P34 = 1, so multiply
// C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
// decimal digits
// Calculate C'' = C' + 1/2 * 10^x
if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry
C1_lo = C1_lo + 5;
C1_hi = C1_hi + 1;
} else {
C1_lo = C1_lo + 5;
}
// the approximation of 10^(-1) was rounded up to 118 bits
// 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
// 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
C1.w[1] = C1_hi;
C1.w[0] = C1_lo; // C''
ten2m1.w[1] = 0x1999999999999999ull;
ten2m1.w[0] = 0x9999999999999a00ull;
__mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C*, f*
// C* is actually floor(C*) in this case
// the top Ex = 128 bits of 10^(-1) are
// T* = 0x00199999999999999999999999999999
// if (0 < f* < 10^(-x)) then
// if floor(C*) is even then C = floor(C*) - logical right
// shift; C has p decimal digits, correct by Prop. 1)
// else if floor(C*) is odd C = floor(C*) - 1 (logical right
// shift; C has p decimal digits, correct by Pr. 1)
// else
// C = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C * 10^(e2+x)
if ((P256.w[1] || P256.w[0])
&& (P256.w[1] < 0x1999999999999999ull
|| (P256.w[1] == 0x1999999999999999ull
&& P256.w[0] <= 0x9999999999999999ull))) {
// the result is a midpoint
if (P256.w[2] & 0x01) {
is_midpoint_gt_even = 1;
// if floor(C*) is odd C = floor(C*) - 1; the result is not 0
P256.w[2]--;
if (P256.w[2] == 0xffffffffffffffffull)
P256.w[3]--;
} else {
is_midpoint_lt_even = 1;
}
}
// n = Cstar * 10^(e2+1)
y_exp = y_exp + EXP_P1;
// C* != 10^P because C* has P34 digits
// check for overflow
if (y_exp == EXP_MAX_P1
&& (rnd_mode == ROUNDING_TO_NEAREST
|| rnd_mode == ROUNDING_TIES_AWAY)) {
// overflow for RN
res.w[1] = x_sign | 0x7800000000000000ull; // +/-inf
res.w[0] = 0x0ull;
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
BID_SWAP128 (res);
BID_RETURN (res);
}
// if (0 < f* - 1/2 < 10^(-x)) then
// the result of the addition is exact
// else
// the result of the addition is inexact
if (P256.w[1] > 0x8000000000000000ull || (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact
tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2
if ((tmp64 > 0x1999999999999999ull
|| (tmp64 == 0x1999999999999999ull
&& P256.w[0] >= 0x9999999999999999ull))) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
is_inexact = 1;
} // else the result is exact
} else { // the result is inexact
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
is_inexact = 1;
}
C1_hi = P256.w[3];
C1_lo = P256.w[2];
if (!is_midpoint_gt_even && !is_midpoint_lt_even) {
is_inexact_lt_midpoint = is_inexact
&& (P256.w[1] & 0x8000000000000000ull);
is_inexact_gt_midpoint = is_inexact
&& !(P256.w[1] & 0x8000000000000000ull);
}
// general correction from RN to RA, RM, RP, RZ;
// result uses y_exp
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((!x_sign
&&
((rnd_mode == ROUNDING_UP
&& is_inexact_lt_midpoint)
||
((rnd_mode == ROUNDING_TIES_AWAY
|| rnd_mode == ROUNDING_UP)
&& is_midpoint_gt_even))) || (x_sign
&&
((rnd_mode ==
ROUNDING_DOWN
&&
is_inexact_lt_midpoint)
||
((rnd_mode ==
ROUNDING_TIES_AWAY
|| rnd_mode ==
ROUNDING_DOWN)
&&
is_midpoint_gt_even))))
{
// C1 = C1 + 1
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
y_exp = y_exp + EXP_P1;
}
} else
if ((is_midpoint_lt_even || is_inexact_gt_midpoint)
&&
((x_sign
&& (rnd_mode == ROUNDING_UP
|| rnd_mode == ROUNDING_TO_ZERO))
|| (!x_sign
&& (rnd_mode == ROUNDING_DOWN
|| rnd_mode == ROUNDING_TO_ZERO)))) {
// C1 = C1 - 1
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi--;
// check if we crossed into the lower decade
if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
C1_lo = 0x378d8e63ffffffffull;
y_exp = y_exp - EXP_P1;
// no underflow, because delta + q2 >= P34 + 1
}
} else {
; // exact, the result is already correct
}
// in all cases check for overflow (RN and RA solved already)
if (y_exp == EXP_MAX_P1) { // overflow
if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0
(rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
} else { // RM and res > 0, RP and res < 0, or RZ
C1_hi = 0x5fffed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
}
y_exp = 0; // x_sign is preserved
// set the inexact flag (in case the exact addition was exact)
*pfpsf |= INEXACT_EXCEPTION;
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
} else { // if x_sign != y_sign the result is exact
C1_lo = C1_lo - C2_lo;
C1_hi = C1_hi - C2_hi;
if (C1_lo > C1.w[0])
C1_hi--;
// the result can be zero, but it cannot overflow
if (C1_lo == 0 && C1_hi == 0) {
// assemble the result
if (x_exp < y_exp)
res.w[1] = x_exp;
else
res.w[1] = y_exp;
res.w[0] = 0;
if (rnd_mode == ROUNDING_DOWN) {
res.w[1] |= 0x8000000000000000ull;
}
BID_SWAP128 (res);
BID_RETURN (res);
}
if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
C1_lo = ~C1_lo;
C1_lo++;
C1_hi = ~C1_hi;
if (C1_lo == 0x0)
C1_hi++;
x_sign = y_sign; // the result will have the sign of y
}
}
// assemble the result
res.w[1] = x_sign | y_exp | C1_hi;
res.w[0] = C1_lo;
} else { // if (delta >= P34 + 1 - q2)
// instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
// calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
// where x1 = q1 + e1 - e2 - P34, 1 <= x1 <= P34 - 1
// In most cases C' will have P34 digits, and n = C' * 10^(e2+x1)
// If the result has P34+1 digits, redo the steps above with x1+1
// If the result has P34-1 digits or less, redo the steps above with
// x1-1 but only if initially x1 >= 1
// NOTE: these two steps can be improved, e.g we could guess if
// P34+1 or P34-1 digits will be obtained by adding/subtracting just
// the top 64 bits of the two operands
// The result cannot be zero, but it can overflow
x1 = delta + q2 - P34; // 1 <= x1 <= P34-1
roundC2:
// Calculate C1 * 10^(e1-e2-x1) where 0 <= e1-e2-x1 <= P34 - 1
// scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
scale = delta - q1 + q2 - x1; // scale = e1 - e2 - x1 = P34 - q1
// either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
// but their product fits with certainty in 128 bits (actually in 113)
if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
} else if (scale >= 1) {
// if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
if (q1 <= 19) { // C1 fits in 64 bits
__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
} else { // q1 >= 20
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
__mul_128x64_to_128 (C1, ten2k64[scale], C1);
}
} else { // if (scale == 0) C1 is unchanged
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
}
tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
// now round C2 to q2-x1 decimal digits, where 1<=x1<=q2-1<=P34-1
// (but if we got here a second time after x1 = x1 - 1, then
// x1 >= 0; note that for x1 = 0 C2 is unchanged)
// C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
ind = x1 - 1; // 0 <= ind <= q2-2<=P34-2=32; but note that if x1 = 0
// during a second pass, then ind = -1
if (ind >= 0) { // if (x1 >= 1)
C2.w[0] = C2_lo;
C2.w[1] = C2_hi;
if (ind <= 18) {
C2.w[0] = C2.w[0] + midpoint64[ind];
if (C2.w[0] < C2_lo)
C2.w[1]++;
} else { // 19 <= ind <= 32
C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0];
C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1];
if (C2.w[0] < C2_lo)
C2.w[1]++;
}
// the approximation of 10^(-x1) was rounded up to 118 bits
__mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2*, f2*
// calculate C2* and f2*
// C2* is actually floor(C2*) in this case
// C2* and f2* need shifting and masking, as shown by
// shiftright128[] and maskhigh128[]
// the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g.
// if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
// if (0 < f2* < 10^(-x1)) then
// if floor(C1+C2*) is even then C2* = floor(C2*) - logical right
// shift; C2* has p decimal digits, correct by Prop. 1)
// else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right
// shift; C2* has p decimal digits, correct by Pr. 1)
// else
// C2* = floor(C2*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C2* * 10^(e2+x1)
if (ind <= 2) {
highf2star.w[1] = 0x0;
highf2star.w[0] = 0x0; // low f2* ok
} else if (ind <= 21) {
highf2star.w[1] = 0x0;
highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok
} else {
highf2star.w[1] = R256.w[3] & maskhigh128[ind];
highf2star.w[0] = R256.w[2]; // low f2* is ok
}
// shift right C2* by Ex-128 = shiftright128[ind]
if (ind >= 3) {
shift = shiftright128[ind];
if (shift < 64) { // 3 <= shift <= 63
R256.w[2] =
(R256.w[2] >> shift) | (R256.w[3] << (64 - shift));
R256.w[3] = (R256.w[3] >> shift);
} else { // 66 <= shift <= 102
R256.w[2] = (R256.w[3] >> (shift - 64));
R256.w[3] = 0x0ULL;
}
}
if (second_pass) {
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
is_midpoint_lt_even = 0;
is_midpoint_gt_even = 0;
}
// determine inexactness of the rounding of C2* (this may be
// followed by a second rounding only if we get P34+1
// decimal digits)
// if (0 < f2* - 1/2 < 10^(-x1)) then
// the result is exact
// else (if f2* - 1/2 > T* then)
// the result of is inexact
if (ind <= 2) {
if (R256.w[1] > 0x8000000000000000ull ||
(R256.w[1] == 0x8000000000000000ull
&& R256.w[0] > 0x0ull)) {
// f2* > 1/2 and the result may be exact
tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2
if ((tmp64A > ten2mk128trunc[ind].w[1]
|| (tmp64A == ten2mk128trunc[ind].w[1]
&& R256.w[0] >= ten2mk128trunc[ind].w[0]))) {
// set the inexact flag
// *pfpsf |= INEXACT_EXCEPTION;
tmp_inexact = 1; // may be set again during a second pass
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_inexact_lt_midpoint = 1;
else // if (x_sign != y_sign)
is_inexact_gt_midpoint = 1;
} // else the result is exact
// rounding down, unless a midpoint in [ODD, EVEN]
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
// *pfpsf |= INEXACT_EXCEPTION;
tmp_inexact = 1; // just in case we will round a second time
// rounding up, unless a midpoint in [EVEN, ODD]
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_inexact_gt_midpoint = 1;
else // if (x_sign != y_sign)
is_inexact_lt_midpoint = 1;
}
} else if (ind <= 21) { // if 3 <= ind <= 21
if (highf2star.w[1] > 0x0 || (highf2star.w[1] == 0x0
&& highf2star.w[0] >
onehalf128[ind])
|| (highf2star.w[1] == 0x0
&& highf2star.w[0] == onehalf128[ind]
&& (R256.w[1] || R256.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
tmp64A = highf2star.w[0] - onehalf128[ind];
tmp64B = highf2star.w[1];
if (tmp64A > highf2star.w[0])
tmp64B--;
if (tmp64B || tmp64A
|| R256.w[1] > ten2mk128trunc[ind].w[1]
|| (R256.w[1] == ten2mk128trunc[ind].w[1]
&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
// set the inexact flag
// *pfpsf |= INEXACT_EXCEPTION;
tmp_inexact = 1; // may be set again during a second pass
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_inexact_lt_midpoint = 1;
else // if (x_sign != y_sign)
is_inexact_gt_midpoint = 1;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
// *pfpsf |= INEXACT_EXCEPTION;
tmp_inexact = 1; // may be set again during a second pass
// rounding up, unless a midpoint in [EVEN, ODD]
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_inexact_gt_midpoint = 1;
else // if (x_sign != y_sign)
is_inexact_lt_midpoint = 1;
}
} else { // if 22 <= ind <= 33
if (highf2star.w[1] > onehalf128[ind]
|| (highf2star.w[1] == onehalf128[ind]
&& (highf2star.w[0] || R256.w[1]
|| R256.w[0]))) {
// f2* > 1/2 and the result may be exact
// Calculate f2* - 1/2
// tmp64A = highf2star.w[0];
tmp64B = highf2star.w[1] - onehalf128[ind];
if (tmp64B || highf2star.w[0]
|| R256.w[1] > ten2mk128trunc[ind].w[1]
|| (R256.w[1] == ten2mk128trunc[ind].w[1]
&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
// set the inexact flag
// *pfpsf |= INEXACT_EXCEPTION;
tmp_inexact = 1; // may be set again during a second pass
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_inexact_lt_midpoint = 1;
else // if (x_sign != y_sign)
is_inexact_gt_midpoint = 1;
} // else the result is exact
} else { // the result is inexact; f2* <= 1/2
// set the inexact flag
// *pfpsf |= INEXACT_EXCEPTION;
tmp_inexact = 1; // may be set again during a second pass
// rounding up, unless a midpoint in [EVEN, ODD]
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_inexact_gt_midpoint = 1;
else // if (x_sign != y_sign)
is_inexact_lt_midpoint = 1;
}
}
// check for midpoints
if ((R256.w[1] || R256.w[0]) && (highf2star.w[1] == 0)
&& (highf2star.w[0] == 0)
&& (R256.w[1] < ten2mk128trunc[ind].w[1]
|| (R256.w[1] == ten2mk128trunc[ind].w[1]
&& R256.w[0] <= ten2mk128trunc[ind].w[0]))) {
// the result is a midpoint
if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD]
// if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0
R256.w[2]--;
if (R256.w[2] == 0xffffffffffffffffull)
R256.w[3]--;
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_midpoint_gt_even = 1;
else // if (x_sign != y_sign)
is_midpoint_lt_even = 1;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
} else {
// else MP in [ODD, EVEN]
// this rounding is applied to C2 only!
if (x_sign == y_sign)
is_midpoint_lt_even = 1;
else // if (x_sign != y_sign)
is_midpoint_gt_even = 1;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
}
}
// end if (ind >= 0)
} else { // if (ind == -1); only during a 2nd pass, and when x1 = 0
R256.w[2] = C2_lo;
R256.w[3] = C2_hi;
tmp_inexact = 0;
// to correct a possible setting to 1 from 1st pass
if (second_pass) {
is_midpoint_lt_even = 0;
is_midpoint_gt_even = 0;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
}
}
// and now add/subtract C1 * 10^(e1-e2-x1) +/- (C2 * 10^(-x1))rnd,P34
if (x_sign == y_sign) { // addition; could overflow
// no second pass is possible this way (only for x_sign != y_sign)
C1.w[0] = C1.w[0] + R256.w[2];
C1.w[1] = C1.w[1] + R256.w[3];
if (C1.w[0] < tmp64)
C1.w[1]++; // carry
// if the sum has P34+1 digits, i.e. C1>=10^34 redo the calculation
// with x1=x1+1
if (C1.w[1] > 0x0001ed09bead87c0ull || (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] >= 0x378d8e6400000000ull)) { // C1 >= 10^34
// chop off one more digit from the sum, but make sure there is
// no double-rounding error (see table - double rounding logic)
// now round C1 from P34+1 to P34 decimal digits
// C1' = C1 + 1/2 * 10 = C1 + 5
if (C1.w[0] >= 0xfffffffffffffffbull) { // low half add has carry
C1.w[0] = C1.w[0] + 5;
C1.w[1] = C1.w[1] + 1;
} else {
C1.w[0] = C1.w[0] + 5;
}
// the approximation of 10^(-1) was rounded up to 118 bits
__mul_128x128_to_256 (Q256, C1, ten2mk128[0]); // Q256 = C1*, f1*
// C1* is actually floor(C1*) in this case
// the top 128 bits of 10^(-1) are
// T* = ten2mk128trunc[0]=0x19999999999999999999999999999999
// if (0 < f1* < 10^(-1)) then
// if floor(C1*) is even then C1* = floor(C1*) - logical right
// shift; C1* has p decimal digits, correct by Prop. 1)
// else if floor(C1*) is odd C1* = floor(C1*) - 1 (logical right
// shift; C1* has p decimal digits, correct by Pr. 1)
// else
// C1* = floor(C1*) (logical right shift; C has p decimal digits
// correct by Property 1)
// n = C1* * 10^(e2+x1+1)
if ((Q256.w[1] || Q256.w[0])
&& (Q256.w[1] < ten2mk128trunc[0].w[1]
|| (Q256.w[1] == ten2mk128trunc[0].w[1]
&& Q256.w[0] <= ten2mk128trunc[0].w[0]))) {
// the result is a midpoint
if (is_inexact_lt_midpoint) { // for the 1st rounding
is_inexact_gt_midpoint = 1;
is_inexact_lt_midpoint = 0;
is_midpoint_gt_even = 0;
is_midpoint_lt_even = 0;
} else if (is_inexact_gt_midpoint) { // for the 1st rounding
Q256.w[2]--;
if (Q256.w[2] == 0xffffffffffffffffull)
Q256.w[3]--;
is_inexact_gt_midpoint = 0;
is_inexact_lt_midpoint = 1;
is_midpoint_gt_even = 0;
is_midpoint_lt_even = 0;
} else if (is_midpoint_gt_even) { // for the 1st rounding
// Note: cannot have is_midpoint_lt_even
is_inexact_gt_midpoint = 0;
is_inexact_lt_midpoint = 1;
is_midpoint_gt_even = 0;
is_midpoint_lt_even = 0;
} else { // the first rounding must have been exact
if (Q256.w[2] & 0x01) { // MP in [EVEN, ODD]
// the truncated result is correct
Q256.w[2]--;
if (Q256.w[2] == 0xffffffffffffffffull)
Q256.w[3]--;
is_inexact_gt_midpoint = 0;
is_inexact_lt_midpoint = 0;
is_midpoint_gt_even = 1;
is_midpoint_lt_even = 0;
} else { // MP in [ODD, EVEN]
is_inexact_gt_midpoint = 0;
is_inexact_lt_midpoint = 0;
is_midpoint_gt_even = 0;
is_midpoint_lt_even = 1;
}
}
tmp_inexact = 1; // in all cases
} else { // the result is not a midpoint
// determine inexactness of the rounding of C1 (the sum C1+C2*)
// if (0 < f1* - 1/2 < 10^(-1)) then
// the result is exact
// else (if f1* - 1/2 > T* then)
// the result of is inexact
// ind = 0
if (Q256.w[1] > 0x8000000000000000ull
|| (Q256.w[1] == 0x8000000000000000ull
&& Q256.w[0] > 0x0ull)) {
// f1* > 1/2 and the result may be exact
Q256.w[1] = Q256.w[1] - 0x8000000000000000ull; // f1* - 1/2
if ((Q256.w[1] > ten2mk128trunc[0].w[1]
|| (Q256.w[1] == ten2mk128trunc[0].w[1]
&& Q256.w[0] > ten2mk128trunc[0].w[0]))) {
is_inexact_gt_midpoint = 0;
is_inexact_lt_midpoint = 1;
is_midpoint_gt_even = 0;
is_midpoint_lt_even = 0;
// set the inexact flag
tmp_inexact = 1;
// *pfpsf |= INEXACT_EXCEPTION;
} else { // else the result is exact for the 2nd rounding
if (tmp_inexact) { // if the previous rounding was inexact
if (is_midpoint_lt_even) {
is_inexact_gt_midpoint = 1;
is_midpoint_lt_even = 0;
} else if (is_midpoint_gt_even) {
is_inexact_lt_midpoint = 1;
is_midpoint_gt_even = 0;
} else {
; // no change
}
}
}
// rounding down, unless a midpoint in [ODD, EVEN]
} else { // the result is inexact; f1* <= 1/2
is_inexact_gt_midpoint = 1;
is_inexact_lt_midpoint = 0;
is_midpoint_gt_even = 0;
is_midpoint_lt_even = 0;
// set the inexact flag
tmp_inexact = 1;
// *pfpsf |= INEXACT_EXCEPTION;
}
} // end 'the result is not a midpoint'
// n = C1 * 10^(e2+x1)
C1.w[1] = Q256.w[3];
C1.w[0] = Q256.w[2];
y_exp = y_exp + ((UINT64) (x1 + 1) << 49);
} else { // C1 < 10^34
// C1.w[1] and C1.w[0] already set
// n = C1 * 10^(e2+x1)
y_exp = y_exp + ((UINT64) x1 << 49);
}
// check for overflow
if (y_exp == EXP_MAX_P1
&& (rnd_mode == ROUNDING_TO_NEAREST
|| rnd_mode == ROUNDING_TIES_AWAY)) {
res.w[1] = 0x7800000000000000ull | x_sign; // +/-inf
res.w[0] = 0x0ull;
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
BID_SWAP128 (res);
BID_RETURN (res);
} // else no overflow
} else { // if x_sign != y_sign the result of this subtract. is exact
C1.w[0] = C1.w[0] - R256.w[2];
C1.w[1] = C1.w[1] - R256.w[3];
if (C1.w[0] > tmp64)
C1.w[1]--; // borrow
if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient!
C1.w[0] = ~C1.w[0];
C1.w[0]++;
C1.w[1] = ~C1.w[1];
if (C1.w[0] == 0x0)
C1.w[1]++;
tmp_sign = y_sign;
// the result will have the sign of y if last rnd
} else {
tmp_sign = x_sign;
}
// if the difference has P34-1 digits or less, i.e. C1 < 10^33 then
// redo the calculation with x1=x1-1;
// redo the calculation also if C1 = 10^33 and
// (is_inexact_gt_midpoint or is_midpoint_lt_even);
// (the last part should have really been
// (is_inexact_lt_midpoint or is_midpoint_gt_even) from
// the rounding of C2, but the position flags have been reversed)
// 10^33 = 0x0000314dc6448d93 0x38c15b0a00000000
if ((C1.w[1] < 0x0000314dc6448d93ull || (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] < 0x38c15b0a00000000ull)) || (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] == 0x38c15b0a00000000ull && (is_inexact_gt_midpoint || is_midpoint_lt_even))) { // C1=10^33
x1 = x1 - 1; // x1 >= 0
if (x1 >= 0) {
// clear position flags and tmp_inexact
is_midpoint_lt_even = 0;
is_midpoint_gt_even = 0;
is_inexact_lt_midpoint = 0;
is_inexact_gt_midpoint = 0;
tmp_inexact = 0;
second_pass = 1;
goto roundC2; // else result has less than P34 digits
}
}
// if the coefficient of the result is 10^34 it means that this
// must be the second pass, and we are done
if (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] == 0x378d8e6400000000ull) { // if C1 = 10^34
C1.w[1] = 0x0000314dc6448d93ull; // C1 = 10^33
C1.w[0] = 0x38c15b0a00000000ull;
y_exp = y_exp + ((UINT64) 1 << 49);
}
x_sign = tmp_sign;
if (x1 >= 1)
y_exp = y_exp + ((UINT64) x1 << 49);
// x1 = -1 is possible at the end of a second pass when the
// first pass started with x1 = 1
}
C1_hi = C1.w[1];
C1_lo = C1.w[0];
// general correction from RN to RA, RM, RP, RZ; result uses y_exp
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((!x_sign
&& ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint)
||
((rnd_mode == ROUNDING_TIES_AWAY
|| rnd_mode == ROUNDING_UP)
&& is_midpoint_gt_even))) || (x_sign
&&
((rnd_mode ==
ROUNDING_DOWN
&&
is_inexact_lt_midpoint)
||
((rnd_mode ==
ROUNDING_TIES_AWAY
|| rnd_mode ==
ROUNDING_DOWN)
&&
is_midpoint_gt_even))))
{
// C1 = C1 + 1
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
y_exp = y_exp + EXP_P1;
}
} else if ((is_midpoint_lt_even || is_inexact_gt_midpoint)
&&
((x_sign
&& (rnd_mode == ROUNDING_UP
|| rnd_mode == ROUNDING_TO_ZERO))
|| (!x_sign
&& (rnd_mode == ROUNDING_DOWN
|| rnd_mode == ROUNDING_TO_ZERO)))) {
// C1 = C1 - 1
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi--;
// check if we crossed into the lower decade
if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
C1_lo = 0x378d8e63ffffffffull;
y_exp = y_exp - EXP_P1;
// no underflow, because delta + q2 >= P34 + 1
}
} else {
; // exact, the result is already correct
}
// in all cases check for overflow (RN and RA solved already)
if (y_exp == EXP_MAX_P1) { // overflow
if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0
(rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
} else { // RM and res > 0, RP and res < 0, or RZ
C1_hi = 0x5fffed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
}
y_exp = 0; // x_sign is preserved
// set the inexact flag (in case the exact addition was exact)
*pfpsf |= INEXACT_EXCEPTION;
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
// assemble the result
res.w[1] = x_sign | y_exp | C1_hi;
res.w[0] = C1_lo;
if (tmp_inexact)
*pfpsf |= INEXACT_EXCEPTION;
}
} else { // if (-P34 + 1 <= delta <= -1) <=> 1 <= -delta <= P34 - 1
// NOTE: the following, up to "} else { // if x_sign != y_sign
// the result is exact" is identical to "else if (delta == P34 - q2) {"
// from above; also, the code is not symmetric: a+b and b+a may take
// different paths (need to unify eventually!)
// calculate C' = C2 + C1 * 10^(e1-e2) directly; the result may be
// inexact if it requires P34 + 1 decimal digits; in either case the
// 'cutoff' point for addition is at the position of the lsb of C2
// The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
// exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
// but their product fits with certainty in 128 bits (actually in 113)
// Note that 0 <= e1 - e2 <= P34 - 2
// -P34 + 1 <= delta <= -1 <=> -P34 + 1 <= delta <= -1 <=>
// -P34 + 1 <= q1 + e1 - q2 - e2 <= -1 <=>
// q2 - q1 - P34 + 1 <= e1 - e2 <= q2 - q1 - 1 <=>
// 1 - P34 - P34 + 1 <= e1-e2 <= P34 - 1 - 1 => 0 <= e1-e2 <= P34 - 2
scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
} else if (scale >= 1) {
// if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
if (q1 <= 19) { // C1 fits in 64 bits
__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
} else { // q1 >= 20
C1.w[1] = C1_hi;
C1.w[0] = C1_lo;
__mul_128x64_to_128 (C1, ten2k64[scale], C1);
}
} else { // if (scale == 0) C1 is unchanged
C1.w[1] = C1_hi;
C1.w[0] = C1_lo; // only the low part is necessary
}
C1_hi = C1.w[1];
C1_lo = C1.w[0];
// now add C2
if (x_sign == y_sign) {
// the result can overflow!
C1_lo = C1_lo + C2_lo;
C1_hi = C1_hi + C2_hi;
if (C1_lo < C1.w[0])
C1_hi++;
// test for overflow, possible only when C1 >= 10^34
if (C1_hi > 0x0001ed09bead87c0ull || (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34
// in this case q = P34 + 1 and x = q - P34 = 1, so multiply
// C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
// decimal digits
// Calculate C'' = C' + 1/2 * 10^x
if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry
C1_lo = C1_lo + 5;
C1_hi = C1_hi + 1;
} else {
C1_lo = C1_lo + 5;
}
// the approximation of 10^(-1) was rounded up to 118 bits
// 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
// 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
C1.w[1] = C1_hi;
C1.w[0] = C1_lo; // C''
ten2m1.w[1] = 0x1999999999999999ull;
ten2m1.w[0] = 0x9999999999999a00ull;
__mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C*, f*
// C* is actually floor(C*) in this case
// the top Ex = 128 bits of 10^(-1) are
// T* = 0x00199999999999999999999999999999
// if (0 < f* < 10^(-x)) then
// if floor(C*) is even then C = floor(C*) - logical right
// shift; C has p decimal digits, correct by Prop. 1)
// else if floor(C*) is odd C = floor(C*) - 1 (logical right
// shift; C has p decimal digits, correct by Pr. 1)
// else
// C = floor(C*) (logical right shift; C has p decimal digits,
// correct by Property 1)
// n = C * 10^(e2+x)
if ((P256.w[1] || P256.w[0])
&& (P256.w[1] < 0x1999999999999999ull
|| (P256.w[1] == 0x1999999999999999ull
&& P256.w[0] <= 0x9999999999999999ull))) {
// the result is a midpoint
if (P256.w[2] & 0x01) {
is_midpoint_gt_even = 1;
// if floor(C*) is odd C = floor(C*) - 1; the result is not 0
P256.w[2]--;
if (P256.w[2] == 0xffffffffffffffffull)
P256.w[3]--;
} else {
is_midpoint_lt_even = 1;
}
}
// n = Cstar * 10^(e2+1)
y_exp = y_exp + EXP_P1;
// C* != 10^P34 because C* has P34 digits
// check for overflow
if (y_exp == EXP_MAX_P1
&& (rnd_mode == ROUNDING_TO_NEAREST
|| rnd_mode == ROUNDING_TIES_AWAY)) {
// overflow for RN
res.w[1] = x_sign | 0x7800000000000000ull; // +/-inf
res.w[0] = 0x0ull;
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
BID_SWAP128 (res);
BID_RETURN (res);
}
// if (0 < f* - 1/2 < 10^(-x)) then
// the result of the addition is exact
// else
// the result of the addition is inexact
if (P256.w[1] > 0x8000000000000000ull || (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact
tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2
if ((tmp64 > 0x1999999999999999ull
|| (tmp64 == 0x1999999999999999ull
&& P256.w[0] >= 0x9999999999999999ull))) {
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
is_inexact = 1;
} // else the result is exact
} else { // the result is inexact
// set the inexact flag
*pfpsf |= INEXACT_EXCEPTION;
is_inexact = 1;
}
C1_hi = P256.w[3];
C1_lo = P256.w[2];
if (!is_midpoint_gt_even && !is_midpoint_lt_even) {
is_inexact_lt_midpoint = is_inexact
&& (P256.w[1] & 0x8000000000000000ull);
is_inexact_gt_midpoint = is_inexact
&& !(P256.w[1] & 0x8000000000000000ull);
}
// general correction from RN to RA, RM, RP, RZ; result uses y_exp
if (rnd_mode != ROUNDING_TO_NEAREST) {
if ((!x_sign
&& ((rnd_mode == ROUNDING_UP
&& is_inexact_lt_midpoint)
|| ((rnd_mode == ROUNDING_TIES_AWAY
|| rnd_mode == ROUNDING_UP)
&& is_midpoint_gt_even))) || (x_sign
&&
((rnd_mode ==
ROUNDING_DOWN
&&
is_inexact_lt_midpoint)
||
((rnd_mode ==
ROUNDING_TIES_AWAY
|| rnd_mode
==
ROUNDING_DOWN)
&&
is_midpoint_gt_even))))
{
// C1 = C1 + 1
C1_lo = C1_lo + 1;
if (C1_lo == 0) { // rounding overflow in the low 64 bits
C1_hi = C1_hi + 1;
}
if (C1_hi == 0x0001ed09bead87c0ull
&& C1_lo == 0x378d8e6400000000ull) {
// C1 = 10^34 => rounding overflow
C1_hi = 0x0000314dc6448d93ull;
C1_lo = 0x38c15b0a00000000ull; // 10^33
y_exp = y_exp + EXP_P1;
}
} else
if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
((x_sign && (rnd_mode == ROUNDING_UP ||
rnd_mode == ROUNDING_TO_ZERO)) ||
(!x_sign && (rnd_mode == ROUNDING_DOWN ||
rnd_mode == ROUNDING_TO_ZERO)))) {
// C1 = C1 - 1
C1_lo = C1_lo - 1;
if (C1_lo == 0xffffffffffffffffull)
C1_hi--;
// check if we crossed into the lower decade
if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
C1_lo = 0x378d8e63ffffffffull;
y_exp = y_exp - EXP_P1;
// no underflow, because delta + q2 >= P34 + 1
}
} else {
; // exact, the result is already correct
}
// in all cases check for overflow (RN and RA solved already)
if (y_exp == EXP_MAX_P1) { // overflow
if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0
(rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
C1_hi = 0x7800000000000000ull; // +inf
C1_lo = 0x0ull;
} else { // RM and res > 0, RP and res < 0, or RZ
C1_hi = 0x5fffed09bead87c0ull;
C1_lo = 0x378d8e63ffffffffull;
}
y_exp = 0; // x_sign is preserved
// set the inexact flag (in case the exact addition was exact)
*pfpsf |= INEXACT_EXCEPTION;
// set the overflow flag
*pfpsf |= OVERFLOW_EXCEPTION;
}
}
} // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
// assemble the result
res.w[1] = x_sign | y_exp | C1_hi;
res.w[0] = C1_lo;
} else { // if x_sign != y_sign the result is exact
C1_lo = C2_lo - C1_lo;
C1_hi = C2_hi - C1_hi;
if (C1_lo > C2_lo)
C1_hi--;
if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
C1_lo = ~C1_lo;
C1_lo++;
C1_hi = ~C1_hi;
if (C1_lo == 0x0)
C1_hi++;
x_sign = y_sign; // the result will have the sign of y
}
// the result can be zero, but it cannot overflow
if (C1_lo == 0 && C1_hi == 0) {
// assemble the result
if (x_exp < y_exp)
res.w[1] = x_exp;
else
res.w[1] = y_exp;
res.w[0] = 0;
if (rnd_mode == ROUNDING_DOWN) {
res.w[1] |= 0x8000000000000000ull;
}
BID_SWAP128 (res);
BID_RETURN (res);
}
// assemble the result
res.w[1] = y_sign | y_exp | C1_hi;
res.w[0] = C1_lo;
}
}
}
BID_SWAP128 (res);
BID_RETURN (res)
}
}
// bid128_sub stands for bid128qq_sub
/*****************************************************************************
* BID128 sub
****************************************************************************/
#if DECIMAL_CALL_BY_REFERENCE
void
bid128_sub (UINT128 * pres, UINT128 * px, UINT128 * py
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
UINT128 x = *px, y = *py;
#if !DECIMAL_GLOBAL_ROUNDING
unsigned int rnd_mode = *prnd_mode;
#endif
#else
UINT128
bid128_sub (UINT128 x, UINT128 y
_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
_EXC_INFO_PARAM) {
#endif
UINT128 res;
UINT64 y_sign;
if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN
// change its sign
y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
if (y_sign)
y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull;
else
y.w[HIGH_128W] = y.w[HIGH_128W] | 0x8000000000000000ull;
}
#if DECIMAL_CALL_BY_REFERENCE
bid128_add (&res, &x, &y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#else
res = bid128_add (x, y
_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
_EXC_INFO_ARG);
#endif
BID_RETURN (res);
}