Fix scaling bugs which gave innaccuracies and spurious underflows in csqrt()

and csqrtl().

When one component is huge and the other is tiny, scaling down the tiny
component gave spurious underflow.

When both components are denormal, not scaling them up gave inaccuracies
of 34+ ulps on not very carefully selected args.  Fixing this reduces the
maximum error to 1.6 ulps on the same set of args (mosly not denormal ones).

The scaling used multiplication of a complex variable by 2, but clang messes
this on amd64 up by losing the sign of -0.0.  Calculate the components
separately, as is well known to be needed for operations on more exceptional
values.
This commit is contained in:
Bruce Evans 2018-07-17 10:44:16 +00:00
parent 52ff436841
commit 9d7dc13615
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=336400
2 changed files with 48 additions and 28 deletions

View File

@ -51,9 +51,7 @@ double complex
csqrt(double complex z)
{
double complex result;
double a, b;
double t;
int scale;
double a, b, rx, ry, scale, t;
a = creal(z);
b = cimag(z);
@ -86,27 +84,39 @@ csqrt(double complex z)
/* Scale to avoid overflow. */
if (fabs(a) >= THRESH || fabs(b) >= THRESH) {
a *= 0.25;
b *= 0.25;
scale = 1;
/*
* Don't scale a or b if this might give (spurious)
* underflow. Then the unscaled value is an equivalent
* infinitesmal (or 0).
*/
if (fabs(a) >= 0x1p-1020)
a *= 0.25;
if (fabs(b) >= 0x1p-1020)
b *= 0.25;
scale = 2;
} else {
scale = 0;
scale = 1;
}
/* Scale to reduce inaccuracies when both components are denormal. */
if (fabs(a) < 0x1p-1022 && fabs(b) < 0x1p-1022) {
a *= 0x1p54;
b *= 0x1p54;
scale = 0x1p-27;
}
/* Algorithm 312, CACM vol 10, Oct 1967. */
if (a >= 0) {
t = sqrt((a + hypot(a, b)) * 0.5);
result = CMPLX(t, b / (2 * t));
rx = t;
ry = b / (2 * t);
} else {
t = sqrt((-a + hypot(a, b)) * 0.5);
result = CMPLX(fabs(b) / (2 * t), copysign(t, b));
rx = fabs(b) / (2 * t);
ry = copysign(t, b);
}
/* Rescale. */
if (scale)
return (result * 2);
else
return (result);
return (CMPLX(rx * scale, ry * scale));
}
#if LDBL_MANT_DIG == 53

View File

@ -59,9 +59,7 @@ long double complex
csqrtl(long double complex z)
{
long double complex result;
long double a, b;
long double t;
int scale;
long double a, b, rx, ry, scale, t;
a = creall(z);
b = cimagl(z);
@ -94,25 +92,37 @@ csqrtl(long double complex z)
/* Scale to avoid overflow. */
if (fabsl(a) >= THRESH || fabsl(b) >= THRESH) {
a *= 0.25;
b *= 0.25;
scale = 1;
/*
* Don't scale a or b if this might give (spurious)
* underflow. Then the unscaled value is an equivalent
* infinitesmal (or 0).
*/
if (fabsl(a) >= 0x1p-16380L)
a *= 0.25;
if (fabsl(b) >= 0x1p-16380L)
b *= 0.25;
scale = 2;
} else {
scale = 0;
scale = 1;
}
/* Scale to reduce inaccuracies when both components are denormal. */
if (fabsl(a) < 0x1p-16382L && fabsl(b) < 0x1p-16382L) {
a *= 0x1p64;
b *= 0x1p64;
scale = 0x1p-32;
}
/* Algorithm 312, CACM vol 10, Oct 1967. */
if (a >= 0) {
t = sqrtl((a + hypotl(a, b)) * 0.5);
result = CMPLXL(t, b / (2 * t));
rx = t;
ry = b / (2 * t);
} else {
t = sqrtl((-a + hypotl(a, b)) * 0.5);
result = CMPLXL(fabsl(b) / (2 * t), copysignl(t, b));
rx = fabsl(b) / (2 * t);
ry = copysignl(t, b);
}
/* Rescale. */
if (scale)
return (result * 2);
else
return (result);
return (CMPLXL(rx * scale, ry * scale));
}