Use a minimax polynomial approximation instead of a Pade rational
function approximation for the second step. The polynomial has degree 2 for cbrtf() and 4 for cbrt(). These degrees are minimal for the final accuracy to be essentially the same as before (slightly smaller). Adjust the rounding between steps 2 and 3 to match. Unfortunately, for cbrt(), this breaks the claimed accuracy slightly although incorrect rounding doesn't. Claim less accuracy since its not worth pessimizing the polynomial or relying on exhaustive testing to get insignificantly more accuracy. This saves about 30 cycles on Athlons (mainly by avoiding 2 divisions) so it gives an overall optimization in the 10-25% range (a larger percentage for float precision, especially in 32-bit mode, since other overheads are more dominant for double precision, surprisingly more in 32-bit mode).
This commit is contained in:
parent
cb3a140c1c
commit
ce5f09f38d
@ -26,12 +26,13 @@ static const u_int32_t
|
||||
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
|
||||
B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
|
||||
|
||||
/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
|
||||
static const double
|
||||
C = 5.42857142857142815906e-01, /* 19/35 = 0x3FE15F15, 0xF15F15F1 */
|
||||
D = -7.05306122448979611050e-01, /* -864/1225 = 0xBFE691DE, 0x2532C834 */
|
||||
E = 1.41428571428571436819e+00, /* 99/70 = 0x3FF6A0EA, 0x0EA0EA0F */
|
||||
F = 1.60714285714285720630e+00, /* 45/28 = 0x3FF9B6DB, 0x6DB6DB6E */
|
||||
G = 3.57142857142857150787e-01; /* 5/14 = 0x3FD6DB6D, 0xB6DB6DB7 */
|
||||
P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
|
||||
P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */
|
||||
P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */
|
||||
P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */
|
||||
P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
|
||||
|
||||
double
|
||||
cbrt(double x)
|
||||
@ -78,36 +79,30 @@ cbrt(double x)
|
||||
SET_HIGH_WORD(t,sign|(hx/3+B1));
|
||||
|
||||
/*
|
||||
* New cbrt to 25 bits:
|
||||
* cbrt(x) = t*cbrt(x/t**3) ~= t*R(x/t**3)
|
||||
* where R(r) = (14*r**2 + 35*r + 5)/(5*r**2 + 35*r + 14) is the
|
||||
* (2,2) Pade approximation to cbrt(r) at r = 1. We replace
|
||||
* r = x/t**3 by 1/r = t**3/x since the latter can be evaluated
|
||||
* more efficiently, and rearrange the expression for R(r) to use
|
||||
* 4 additions and 2 divisions instead of the 4 additions, 4
|
||||
* multiplications and 1 division that would be required using
|
||||
* Horner's rule on the numerator and denominator. t being good
|
||||
* to 32 bits means that |t/cbrt(x)-1| < 1/32, so |x/t**3-1| < 0.1
|
||||
* and for R(r) we can use any approximation to cbrt(r) that is good
|
||||
* to 20 bits on [0.9, 1.1]. The (2,2) Pade approximation is not an
|
||||
* especially good choice.
|
||||
* New cbrt to 23 bits:
|
||||
* cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
|
||||
* where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
|
||||
* to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
|
||||
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
|
||||
* gives us bounds for r = t**3/x.
|
||||
*
|
||||
* Try to optimize for parallel evaluation as in k_tanf.c.
|
||||
*/
|
||||
r=t*t/x;
|
||||
s=C+r*t;
|
||||
t*=G+F/(s+E+D/s);
|
||||
r=(t*t)*(t/x);
|
||||
t=t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
|
||||
|
||||
/*
|
||||
* Round t away from zero to 25 bits (sloppily except for ensuring that
|
||||
* Round t away from zero to 23 bits (sloppily except for ensuring that
|
||||
* the result is larger in magnitude than cbrt(x) but not much more than
|
||||
* 2 25-bit ulps larger). With rounding towards zero, the error bound
|
||||
* would be ~5/6 instead of ~4/6. With a maximum error of 1 25-bit ulps
|
||||
* 2 23-bit ulps larger). With rounding towards zero, the error bound
|
||||
* would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
|
||||
* in the rounded t, the infinite-precision error in the Newton
|
||||
* approximation barely affects third digit in the the final error
|
||||
* 0.667; the error in the rounded t can be up to about 12 25-bit ulps
|
||||
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
|
||||
* before the final error is larger than 0.667 ulps.
|
||||
*/
|
||||
u.value=t;
|
||||
u.bits=(u.bits+0x20000000)&0xfffffffff0000000ULL;
|
||||
u.bits=(u.bits+0x80000000)&0xffffffffc0000000ULL;
|
||||
t=u.value;
|
||||
|
||||
/* one step Newton iteration to 53 bits with error < 0.667 ulps */
|
||||
|
@ -28,12 +28,11 @@ static const unsigned
|
||||
B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
|
||||
B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
|
||||
|
||||
/* |1/cbrt(x) - p(x)| < 2**-14.5 (~[-4.37e-4, 4.366e-5]). */
|
||||
static const float
|
||||
C = 5.4285717010e-01, /* 19/35 = 0x3f0af8b0 */
|
||||
D = -7.0530611277e-01, /* -864/1225 = 0xbf348ef1 */
|
||||
E = 1.4142856598e+00, /* 99/70 = 0x3fb50750 */
|
||||
F = 1.6071428061e+00, /* 45/28 = 0x3fcdb6db */
|
||||
G = 3.5714286566e-01; /* 5/14 = 0x3eb6db6e */
|
||||
P0 = 1.5586718321, /* 0x3fc7828f */
|
||||
P1 = -0.78271341324, /* -0xbf485fe8 */
|
||||
P2 = 0.22403796017; /* 0x3e656a35 */
|
||||
|
||||
float
|
||||
cbrtf(float x)
|
||||
@ -59,10 +58,9 @@ cbrtf(float x)
|
||||
} else
|
||||
SET_FLOAT_WORD(t,sign|(hx/3+B1));
|
||||
|
||||
/* new cbrt to 23 bits */
|
||||
r=t*t/x;
|
||||
s=C+r*t;
|
||||
t*=G+F/(s+E+D/s);
|
||||
/* new cbrt to 14 bits */
|
||||
r=(t*t)*(t/x);
|
||||
t=t*((P0+r*P1)+(r*r)*P2);
|
||||
|
||||
/*
|
||||
* Round t away from zero to 12 bits (sloppily except for ensuring that
|
||||
@ -73,9 +71,9 @@ cbrtf(float x)
|
||||
* the second digit instead of the third digit of 4/6 = 0.666..., etc.
|
||||
*/
|
||||
GET_FLOAT_WORD(high,t);
|
||||
SET_FLOAT_WORD(t,(high+0x1002)&0xfffff000);
|
||||
SET_FLOAT_WORD(t,(high+0x1800)&0xfffff000);
|
||||
|
||||
/* one step Newton iteration to 24 bits with error < 0.667 ulps */
|
||||
/* one step Newton iteration to 24 bits with error < 0.669 ulps */
|
||||
s=t*t; /* t*t is exact */
|
||||
r=x/s; /* error <= 0.5 ulps; |r| < |t| */
|
||||
w=t+t; /* t+t is exact */
|
||||
|
Loading…
x
Reference in New Issue
Block a user