ld80/s_expl.c:

* Update the evaluation of the polynomial.  This allows the removal
  of the now unused variables t23 and t45.

ld128/s_expl.c:

* Update the evaluation of the polynomial and the intermediate
  result t.  This update allows several numerical constants to be
  written as double rather than long double constants.   Update
  the constants as appropriate.

Obtained from:	bde
This commit is contained in:
Steve Kargl 2013-06-03 18:40:00 +00:00
parent 199b8e343d
commit 31407861b8
2 changed files with 55 additions and 27 deletions

View File

@ -41,28 +41,48 @@ __FBSDID("$FreeBSD$");
#define LOG2_INTERVALS 7
#define BIAS (LDBL_MAX_EXP - 1)
static const long double
huge = 0x1p10000L,
twom10000 = 0x1p-10000L,
/* XXX Prevent gcc from erroneously constant folding this: */
static volatile const long double tiny = 0x1p-10000L;
static const long double
INV_L = 1.84664965233787316142070359168242182e+02L,
L1 = 5.41521234812457272982212595914567508e-03L,
L2 = -1.02536706388947310094527932552595546e-29L,
huge = 0x1p10000L,
/* log(2**16384 - 0.5) rounded towards zero: */
/* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */
o_threshold = 11356.523406294143949491931077970763428L,
twom10000 = 0x1p-10000L,
/* log(2**(-16381-64-1)) rounded towards zero: */
u_threshold = -11433.462743336297878837243843452621503L;
static const double
/*
* ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication). L1 must
* have at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(INTERVALS)) lowest
* bits zero so that multiplication of it by n is exact.
*/
INV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */
L2 = -1.0253670638894731e-29; /* -0x1.9ff0342542fc3p-97 */
static const long double
A2 = 5.00000000000000000000000000000000000e-1L,
A3 = 1.66666666666666666666666666666666972e-1L,
A4 = 4.16666666666666666666666666653708268e-2L,
A5 = 8.33333333333333333333333315069867254e-3L,
A6 = 1.38888888888888888888996596213795377e-3L,
A7 = 1.98412698412698412718821436278644414e-4L,
A8 = 2.48015873015869681884882576649543128e-5L,
A9 = 2.75573192240103867817876199544468806e-6L,
A10 = 2.75573236172670046201884000197885520e-7L,
A11 = 2.50517544183909126492878226167697856e-8L;
/* 0x1.62e42fefa39ef35793c768000000p-8 */
L1 = 5.41521234812457272982212595914567508e-3L;
static const long double
/*
* Domain [-0.002708, 0.002708], range ~[-2.4011e-38, 2.4244e-38]:
* |exp(x) - p(x)| < 2**-124.9
* (0.002708 is ln2/(2*INTERVALS) rounded up a little).
*/
A2 = 0.5,
A3 = 1.66666666666666666666666666651085500e-1L,
A4 = 4.16666666666666666666666666425885320e-2L,
A5 = 8.33333333333333333334522877160175842e-3L,
A6 = 1.38888888888888888889971139751596836e-3L;
static const double
A7 = 1.9841269841269471e-4,
A8 = 2.4801587301585284e-5,
A9 = 2.7557324277411234e-6,
A10 = 2.7557333722375072e-7;
static const struct {
long double hi;
@ -202,7 +222,9 @@ long double
expl(long double x)
{
union IEEEl2bits u, v;
long double fn, r, r1, r2, q, t, twopk, twopkp10000;
long double q, r, r1, t, twopk, twopkp10000;
double dr, fn, r2;
int k, n, n2;
uint32_t hx, ix;
@ -227,8 +249,15 @@ expl(long double x)
}
/* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
fn = x * INV_L + 0x1.8p112 - 0x1.8p112;
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
/* XXX assume no extra precision for the additions, as for trig fns. */
/* XXX this set of comments is now quadruplicated. */
fn = (double)x * INV_L + 0x1.8p52 - 0x1.8p52;
#if defined(HAVE_EFFICIENT_IRINT)
n = irint(fn);
#else
n = (int)fn;
#endif
n2 = (unsigned)n % INTERVALS;
k = n >> LOG2_INTERVALS;
r1 = x - fn * L1;
@ -245,11 +274,12 @@ expl(long double x)
twopkp10000 = v.e;
}
r = r1 + r2;
q = r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 + r * (A7 +
r * (A8 + r * (A9 + r * (A10 + r * A11)))))))));
/* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */
dr = r;
q = r2 + r * r * (A2 + r * (A3 + r * (A4 + r * (A5 + r * (A6 +
dr * (A7 + dr * (A8 + dr * (A9 + dr * A10))))))));
t = tbl[n2].lo + tbl[n2].hi;
t = tbl[n2].hi + (tbl[n2].lo + t * (r2 + q + r1));
t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi;
/* Scale by 2**k. */
if (k >= LDBL_MIN_EXP) {

View File

@ -235,7 +235,8 @@ long double
expl(long double x)
{
union IEEEl2bits u, v;
long double fn, q, r, r1, r2, t, t23, t45, twopk, twopkp10000, z;
long double fn, q, r, r1, r2, t, twopk, twopkp10000;
long double z;
int k, n, n2;
uint16_t hx, ix;
@ -288,12 +289,9 @@ expl(long double x)
twopkp10000 = v.e;
}
/* Evaluate expl(midpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */
/* Here q = q(r), not q(r1), since r1 is lopped like L1. */
t45 = r * A5 + A4;
/* Evaluate expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2). */
z = r * r;
t23 = r * A3 + A2;
q = r2 + z * t23 + z * z * t45 + z * z * z * A6;
q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6;
t = (long double)tbl[n2].lo + tbl[n2].hi;
t = tbl[n2].lo + t * (q + r1) + tbl[n2].hi;