Refactor this code by introducing separate functions to handle the

extra-precision add and multiply operations. This simplifies future
work but shouldn't result in any functional change.
This commit is contained in:
David Schultz 2011-10-11 05:17:45 +00:00
parent f4b36cb777
commit 7792754061
2 changed files with 164 additions and 92 deletions

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -31,6 +31,63 @@ __FBSDID("$FreeBSD$");
#include <float.h>
#include <math.h>
/*
* A struct dd represents a floating-point number with twice the precision
* of a double. We maintain the invariant that "hi" stores the 53 high-order
* bits of the result.
*/
struct dd {
double hi;
double lo;
};
/*
* Compute a+b exactly, returning the exact result in a struct dd. We assume
* that both a and b are finite, but make no assumptions about their relative
* magnitudes.
*/
static inline struct dd
dd_add(double a, double b)
{
struct dd ret;
double s;
ret.hi = a + b;
s = ret.hi - a;
ret.lo = (a - (ret.hi - s)) + (b - s);
return (ret);
}
/*
* Compute a*b exactly, returning the exact result in a struct dd. We assume
* that both a and b are normalized, so no underflow or overflow will occur.
* The current rounding mode must be round-to-nearest.
*/
static inline struct dd
dd_mul(double a, double b)
{
static const double split = 0x1p27 + 1.0;
struct dd ret;
double ha, hb, la, lb, p, q;
p = a * split;
ha = a - p;
ha += p;
la = a - ha;
p = b * split;
hb = b - p;
hb += p;
lb = b - hb;
p = ha * hb;
q = ha * lb + la * hb;
ret.hi = p + q;
ret.lo = p - ret.hi + q + la * lb;
return (ret);
}
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
@ -52,10 +109,10 @@ __FBSDID("$FreeBSD$");
double
fma(double x, double y, double z)
{
static const double split = 0x1p27 + 1.0;
double xs, ys, zs;
double c, cc, hx, hy, p, q, tx, ty;
double r, rr, s;
struct dd xy, r, r2;
double p;
double s;
int oround;
int ex, ey, ez;
int spread;
@ -95,29 +152,29 @@ fma(double x, double y, double z)
if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
return (x * y);
feholdexcept(&env);
r = x * y;
s = x * y;
if (!fetestexcept(FE_INEXACT))
r = nextafter(r, 0);
s = nextafter(s, 0);
feupdateenv(&env);
return (r);
return (s);
case FE_DOWNWARD:
if (z > 0.0)
return (x * y);
feholdexcept(&env);
r = x * y;
s = x * y;
if (!fetestexcept(FE_INEXACT))
r = nextafter(r, -INFINITY);
s = nextafter(s, -INFINITY);
feupdateenv(&env);
return (r);
return (s);
default: /* FE_UPWARD */
if (z < 0.0)
return (x * y);
feholdexcept(&env);
r = x * y;
s = x * y;
if (!fetestexcept(FE_INEXACT))
r = nextafter(r, INFINITY);
s = nextafter(s, INFINITY);
feupdateenv(&env);
return (r);
return (s);
}
}
if (spread < -DBL_MANT_DIG) {
@ -145,50 +202,29 @@ fma(double x, double y, double z)
}
}
/*
* Use Dekker's algorithm to perform the multiplication and
* subsequent addition in twice the machine precision.
* Arrange so that x * y = c + cc, and x * y + z = r + rr.
*/
fesetround(FE_TONEAREST);
p = xs * split;
hx = xs - p;
hx += p;
tx = xs - hx;
p = ys * split;
hy = ys - p;
hy += p;
ty = ys - hy;
p = hx * hy;
q = hx * ty + tx * hy;
c = p + q;
cc = p - c + q + tx * ty;
xy = dd_mul(xs, ys);
zs = ldexp(zs, -spread);
r = c + zs;
s = r - c;
rr = (c - (r - s)) + (zs - s) + cc;
r = dd_add(xy.hi, zs);
r.lo += xy.lo;
spread = ex + ey;
if (spread + ilogb(r) > -1023) {
if (spread + ilogb(r.hi) > -1023) {
fesetround(oround);
r = r + rr;
r.hi = r.hi + r.lo;
} else {
/*
* The result is subnormal, so we round before scaling to
* avoid double rounding.
*/
p = ldexp(copysign(0x1p-1022, r), -spread);
c = r + p;
s = c - r;
cc = (r - (c - s)) + (p - s) + rr;
p = ldexp(copysign(0x1p-1022, r.hi), -spread);
r2 = dd_add(r.hi, p);
r2.lo += r.lo;
fesetround(oround);
r = (c + cc) - p;
r.hi = (r2.hi + r2.lo) - p;
}
return (ldexp(r, spread));
return (ldexp(r.hi, spread));
}
#else /* LDBL_MANT_DIG == 113 */
/*

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
* Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -31,6 +31,67 @@ __FBSDID("$FreeBSD$");
#include <float.h>
#include <math.h>
/*
* A struct dd represents a floating-point number with twice the precision
* of a long double. We maintain the invariant that "hi" stores the high-order
* bits of the result.
*/
struct dd {
long double hi;
long double lo;
};
/*
* Compute a+b exactly, returning the exact result in a struct dd. We assume
* that both a and b are finite, but make no assumptions about their relative
* magnitudes.
*/
static inline struct dd
dd_add(long double a, long double b)
{
struct dd ret;
long double s;
ret.hi = a + b;
s = ret.hi - a;
ret.lo = (a - (ret.hi - s)) + (b - s);
return (ret);
}
/*
* Compute a*b exactly, returning the exact result in a struct dd. We assume
* that both a and b are normalized, so no underflow or overflow will occur.
* The current rounding mode must be round-to-nearest.
*/
static inline struct dd
dd_mul(long double a, long double b)
{
#if LDBL_MANT_DIG == 64
static const long double split = 0x1p32L + 1.0;
#elif LDBL_MANT_DIG == 113
static const long double split = 0x1p57L + 1.0;
#endif
struct dd ret;
long double ha, hb, la, lb, p, q;
p = a * split;
ha = a - p;
ha += p;
la = a - ha;
p = b * split;
hb = b - p;
hb += p;
lb = b - hb;
p = ha * hb;
q = ha * lb + la * hb;
ret.hi = p + q;
ret.lo = p - ret.hi + q + la * lb;
return (ret);
}
/*
* Fused multiply-add: Compute x * y + z with a single rounding error.
*
@ -43,14 +104,10 @@ __FBSDID("$FreeBSD$");
long double
fmal(long double x, long double y, long double z)
{
#if LDBL_MANT_DIG == 64
static const long double split = 0x1p32L + 1.0;
#elif LDBL_MANT_DIG == 113
static const long double split = 0x1p57L + 1.0;
#endif
long double xs, ys, zs;
long double c, cc, hx, hy, p, q, tx, ty;
long double r, rr, s;
struct dd xy, r, r2;
long double p;
long double s;
int oround;
int ex, ey, ez;
int spread;
@ -90,29 +147,29 @@ fmal(long double x, long double y, long double z)
if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
return (x * y);
feholdexcept(&env);
r = x * y;
s = x * y;
if (!fetestexcept(FE_INEXACT))
r = nextafterl(r, 0);
s = nextafterl(s, 0);
feupdateenv(&env);
return (r);
return (s);
case FE_DOWNWARD:
if (z > 0.0)
return (x * y);
feholdexcept(&env);
r = x * y;
s = x * y;
if (!fetestexcept(FE_INEXACT))
r = nextafterl(r, -INFINITY);
s = nextafterl(s, -INFINITY);
feupdateenv(&env);
return (r);
return (s);
default: /* FE_UPWARD */
if (z < 0.0)
return (x * y);
feholdexcept(&env);
r = x * y;
s = x * y;
if (!fetestexcept(FE_INEXACT))
r = nextafterl(r, INFINITY);
s = nextafterl(s, INFINITY);
feupdateenv(&env);
return (r);
return (s);
}
}
if (spread < -LDBL_MANT_DIG) {
@ -140,48 +197,27 @@ fmal(long double x, long double y, long double z)
}
}
/*
* Use Dekker's algorithm to perform the multiplication and
* subsequent addition in twice the machine precision.
* Arrange so that x * y = c + cc, and x * y + z = r + rr.
*/
fesetround(FE_TONEAREST);
p = xs * split;
hx = xs - p;
hx += p;
tx = xs - hx;
p = ys * split;
hy = ys - p;
hy += p;
ty = ys - hy;
p = hx * hy;
q = hx * ty + tx * hy;
c = p + q;
cc = p - c + q + tx * ty;
xy = dd_mul(xs, ys);
zs = ldexpl(zs, -spread);
r = c + zs;
s = r - c;
rr = (c - (r - s)) + (zs - s) + cc;
r = dd_add(xy.hi, zs);
r.lo += xy.lo;
spread = ex + ey;
if (spread + ilogbl(r) > -16383) {
if (spread + ilogbl(r.hi) > -16383) {
fesetround(oround);
r = r + rr;
r.hi = r.hi + r.lo;
} else {
/*
* The result is subnormal, so we round before scaling to
* avoid double rounding.
*/
p = ldexpl(copysignl(0x1p-16382L, r), -spread);
c = r + p;
s = c - r;
cc = (r - (c - s)) + (p - s) + rr;
p = ldexpl(copysignl(0x1p-16382L, r.hi), -spread);
r2 = dd_add(r.hi, p);
r2.lo += r.lo;
fesetround(oround);
r = (c + cc) - p;
r.hi = (r2.hi + r2.lo) - p;
}
return (ldexpl(r, spread));
return (ldexpl(r.hi, spread));
}