Refactor this code by introducing separate functions to handle the
extra-precision add and multiply operations. This simplifies future work but shouldn't result in any functional change.
This commit is contained in:
parent
f4b36cb777
commit
7792754061
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
|
||||
* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -31,6 +31,63 @@ __FBSDID("$FreeBSD$");
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/*
|
||||
* A struct dd represents a floating-point number with twice the precision
|
||||
* of a double. We maintain the invariant that "hi" stores the 53 high-order
|
||||
* bits of the result.
|
||||
*/
|
||||
struct dd {
|
||||
double hi;
|
||||
double lo;
|
||||
};
|
||||
|
||||
/*
|
||||
* Compute a+b exactly, returning the exact result in a struct dd. We assume
|
||||
* that both a and b are finite, but make no assumptions about their relative
|
||||
* magnitudes.
|
||||
*/
|
||||
static inline struct dd
|
||||
dd_add(double a, double b)
|
||||
{
|
||||
struct dd ret;
|
||||
double s;
|
||||
|
||||
ret.hi = a + b;
|
||||
s = ret.hi - a;
|
||||
ret.lo = (a - (ret.hi - s)) + (b - s);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a*b exactly, returning the exact result in a struct dd. We assume
|
||||
* that both a and b are normalized, so no underflow or overflow will occur.
|
||||
* The current rounding mode must be round-to-nearest.
|
||||
*/
|
||||
static inline struct dd
|
||||
dd_mul(double a, double b)
|
||||
{
|
||||
static const double split = 0x1p27 + 1.0;
|
||||
struct dd ret;
|
||||
double ha, hb, la, lb, p, q;
|
||||
|
||||
p = a * split;
|
||||
ha = a - p;
|
||||
ha += p;
|
||||
la = a - ha;
|
||||
|
||||
p = b * split;
|
||||
hb = b - p;
|
||||
hb += p;
|
||||
lb = b - hb;
|
||||
|
||||
p = ha * hb;
|
||||
q = ha * lb + la * hb;
|
||||
|
||||
ret.hi = p + q;
|
||||
ret.lo = p - ret.hi + q + la * lb;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fused multiply-add: Compute x * y + z with a single rounding error.
|
||||
*
|
||||
@ -52,10 +109,10 @@ __FBSDID("$FreeBSD$");
|
||||
double
|
||||
fma(double x, double y, double z)
|
||||
{
|
||||
static const double split = 0x1p27 + 1.0;
|
||||
double xs, ys, zs;
|
||||
double c, cc, hx, hy, p, q, tx, ty;
|
||||
double r, rr, s;
|
||||
struct dd xy, r, r2;
|
||||
double p;
|
||||
double s;
|
||||
int oround;
|
||||
int ex, ey, ez;
|
||||
int spread;
|
||||
@ -95,29 +152,29 @@ fma(double x, double y, double z)
|
||||
if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
|
||||
return (x * y);
|
||||
feholdexcept(&env);
|
||||
r = x * y;
|
||||
s = x * y;
|
||||
if (!fetestexcept(FE_INEXACT))
|
||||
r = nextafter(r, 0);
|
||||
s = nextafter(s, 0);
|
||||
feupdateenv(&env);
|
||||
return (r);
|
||||
return (s);
|
||||
case FE_DOWNWARD:
|
||||
if (z > 0.0)
|
||||
return (x * y);
|
||||
feholdexcept(&env);
|
||||
r = x * y;
|
||||
s = x * y;
|
||||
if (!fetestexcept(FE_INEXACT))
|
||||
r = nextafter(r, -INFINITY);
|
||||
s = nextafter(s, -INFINITY);
|
||||
feupdateenv(&env);
|
||||
return (r);
|
||||
return (s);
|
||||
default: /* FE_UPWARD */
|
||||
if (z < 0.0)
|
||||
return (x * y);
|
||||
feholdexcept(&env);
|
||||
r = x * y;
|
||||
s = x * y;
|
||||
if (!fetestexcept(FE_INEXACT))
|
||||
r = nextafter(r, INFINITY);
|
||||
s = nextafter(s, INFINITY);
|
||||
feupdateenv(&env);
|
||||
return (r);
|
||||
return (s);
|
||||
}
|
||||
}
|
||||
if (spread < -DBL_MANT_DIG) {
|
||||
@ -145,50 +202,29 @@ fma(double x, double y, double z)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Use Dekker's algorithm to perform the multiplication and
|
||||
* subsequent addition in twice the machine precision.
|
||||
* Arrange so that x * y = c + cc, and x * y + z = r + rr.
|
||||
*/
|
||||
fesetround(FE_TONEAREST);
|
||||
|
||||
p = xs * split;
|
||||
hx = xs - p;
|
||||
hx += p;
|
||||
tx = xs - hx;
|
||||
|
||||
p = ys * split;
|
||||
hy = ys - p;
|
||||
hy += p;
|
||||
ty = ys - hy;
|
||||
|
||||
p = hx * hy;
|
||||
q = hx * ty + tx * hy;
|
||||
c = p + q;
|
||||
cc = p - c + q + tx * ty;
|
||||
|
||||
xy = dd_mul(xs, ys);
|
||||
zs = ldexp(zs, -spread);
|
||||
r = c + zs;
|
||||
s = r - c;
|
||||
rr = (c - (r - s)) + (zs - s) + cc;
|
||||
r = dd_add(xy.hi, zs);
|
||||
r.lo += xy.lo;
|
||||
|
||||
spread = ex + ey;
|
||||
if (spread + ilogb(r) > -1023) {
|
||||
if (spread + ilogb(r.hi) > -1023) {
|
||||
fesetround(oround);
|
||||
r = r + rr;
|
||||
r.hi = r.hi + r.lo;
|
||||
} else {
|
||||
/*
|
||||
* The result is subnormal, so we round before scaling to
|
||||
* avoid double rounding.
|
||||
*/
|
||||
p = ldexp(copysign(0x1p-1022, r), -spread);
|
||||
c = r + p;
|
||||
s = c - r;
|
||||
cc = (r - (c - s)) + (p - s) + rr;
|
||||
p = ldexp(copysign(0x1p-1022, r.hi), -spread);
|
||||
r2 = dd_add(r.hi, p);
|
||||
r2.lo += r.lo;
|
||||
fesetround(oround);
|
||||
r = (c + cc) - p;
|
||||
r.hi = (r2.hi + r2.lo) - p;
|
||||
}
|
||||
return (ldexp(r, spread));
|
||||
return (ldexp(r.hi, spread));
|
||||
}
|
||||
#else /* LDBL_MANT_DIG == 113 */
|
||||
/*
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
|
||||
* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -31,6 +31,67 @@ __FBSDID("$FreeBSD$");
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/*
|
||||
* A struct dd represents a floating-point number with twice the precision
|
||||
* of a long double. We maintain the invariant that "hi" stores the high-order
|
||||
* bits of the result.
|
||||
*/
|
||||
struct dd {
|
||||
long double hi;
|
||||
long double lo;
|
||||
};
|
||||
|
||||
/*
|
||||
* Compute a+b exactly, returning the exact result in a struct dd. We assume
|
||||
* that both a and b are finite, but make no assumptions about their relative
|
||||
* magnitudes.
|
||||
*/
|
||||
static inline struct dd
|
||||
dd_add(long double a, long double b)
|
||||
{
|
||||
struct dd ret;
|
||||
long double s;
|
||||
|
||||
ret.hi = a + b;
|
||||
s = ret.hi - a;
|
||||
ret.lo = (a - (ret.hi - s)) + (b - s);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a*b exactly, returning the exact result in a struct dd. We assume
|
||||
* that both a and b are normalized, so no underflow or overflow will occur.
|
||||
* The current rounding mode must be round-to-nearest.
|
||||
*/
|
||||
static inline struct dd
|
||||
dd_mul(long double a, long double b)
|
||||
{
|
||||
#if LDBL_MANT_DIG == 64
|
||||
static const long double split = 0x1p32L + 1.0;
|
||||
#elif LDBL_MANT_DIG == 113
|
||||
static const long double split = 0x1p57L + 1.0;
|
||||
#endif
|
||||
struct dd ret;
|
||||
long double ha, hb, la, lb, p, q;
|
||||
|
||||
p = a * split;
|
||||
ha = a - p;
|
||||
ha += p;
|
||||
la = a - ha;
|
||||
|
||||
p = b * split;
|
||||
hb = b - p;
|
||||
hb += p;
|
||||
lb = b - hb;
|
||||
|
||||
p = ha * hb;
|
||||
q = ha * lb + la * hb;
|
||||
|
||||
ret.hi = p + q;
|
||||
ret.lo = p - ret.hi + q + la * lb;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fused multiply-add: Compute x * y + z with a single rounding error.
|
||||
*
|
||||
@ -43,14 +104,10 @@ __FBSDID("$FreeBSD$");
|
||||
long double
|
||||
fmal(long double x, long double y, long double z)
|
||||
{
|
||||
#if LDBL_MANT_DIG == 64
|
||||
static const long double split = 0x1p32L + 1.0;
|
||||
#elif LDBL_MANT_DIG == 113
|
||||
static const long double split = 0x1p57L + 1.0;
|
||||
#endif
|
||||
long double xs, ys, zs;
|
||||
long double c, cc, hx, hy, p, q, tx, ty;
|
||||
long double r, rr, s;
|
||||
struct dd xy, r, r2;
|
||||
long double p;
|
||||
long double s;
|
||||
int oround;
|
||||
int ex, ey, ez;
|
||||
int spread;
|
||||
@ -90,29 +147,29 @@ fmal(long double x, long double y, long double z)
|
||||
if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
|
||||
return (x * y);
|
||||
feholdexcept(&env);
|
||||
r = x * y;
|
||||
s = x * y;
|
||||
if (!fetestexcept(FE_INEXACT))
|
||||
r = nextafterl(r, 0);
|
||||
s = nextafterl(s, 0);
|
||||
feupdateenv(&env);
|
||||
return (r);
|
||||
return (s);
|
||||
case FE_DOWNWARD:
|
||||
if (z > 0.0)
|
||||
return (x * y);
|
||||
feholdexcept(&env);
|
||||
r = x * y;
|
||||
s = x * y;
|
||||
if (!fetestexcept(FE_INEXACT))
|
||||
r = nextafterl(r, -INFINITY);
|
||||
s = nextafterl(s, -INFINITY);
|
||||
feupdateenv(&env);
|
||||
return (r);
|
||||
return (s);
|
||||
default: /* FE_UPWARD */
|
||||
if (z < 0.0)
|
||||
return (x * y);
|
||||
feholdexcept(&env);
|
||||
r = x * y;
|
||||
s = x * y;
|
||||
if (!fetestexcept(FE_INEXACT))
|
||||
r = nextafterl(r, INFINITY);
|
||||
s = nextafterl(s, INFINITY);
|
||||
feupdateenv(&env);
|
||||
return (r);
|
||||
return (s);
|
||||
}
|
||||
}
|
||||
if (spread < -LDBL_MANT_DIG) {
|
||||
@ -140,48 +197,27 @@ fmal(long double x, long double y, long double z)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Use Dekker's algorithm to perform the multiplication and
|
||||
* subsequent addition in twice the machine precision.
|
||||
* Arrange so that x * y = c + cc, and x * y + z = r + rr.
|
||||
*/
|
||||
fesetround(FE_TONEAREST);
|
||||
|
||||
p = xs * split;
|
||||
hx = xs - p;
|
||||
hx += p;
|
||||
tx = xs - hx;
|
||||
|
||||
p = ys * split;
|
||||
hy = ys - p;
|
||||
hy += p;
|
||||
ty = ys - hy;
|
||||
|
||||
p = hx * hy;
|
||||
q = hx * ty + tx * hy;
|
||||
c = p + q;
|
||||
cc = p - c + q + tx * ty;
|
||||
|
||||
xy = dd_mul(xs, ys);
|
||||
zs = ldexpl(zs, -spread);
|
||||
r = c + zs;
|
||||
s = r - c;
|
||||
rr = (c - (r - s)) + (zs - s) + cc;
|
||||
r = dd_add(xy.hi, zs);
|
||||
r.lo += xy.lo;
|
||||
|
||||
spread = ex + ey;
|
||||
if (spread + ilogbl(r) > -16383) {
|
||||
if (spread + ilogbl(r.hi) > -16383) {
|
||||
fesetround(oround);
|
||||
r = r + rr;
|
||||
r.hi = r.hi + r.lo;
|
||||
} else {
|
||||
/*
|
||||
* The result is subnormal, so we round before scaling to
|
||||
* avoid double rounding.
|
||||
*/
|
||||
p = ldexpl(copysignl(0x1p-16382L, r), -spread);
|
||||
c = r + p;
|
||||
s = c - r;
|
||||
cc = (r - (c - s)) + (p - s) + rr;
|
||||
p = ldexpl(copysignl(0x1p-16382L, r.hi), -spread);
|
||||
r2 = dd_add(r.hi, p);
|
||||
r2.lo += r.lo;
|
||||
fesetround(oround);
|
||||
r = (c + cc) - p;
|
||||
r.hi = (r2.hi + r2.lo) - p;
|
||||
}
|
||||
return (ldexpl(r, spread));
|
||||
return (ldexpl(r.hi, spread));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user