As for the float trig functions, use a minimax polynomial that is

specialized for float precision.  The new polynomial has degree 8
instead of 14, and a maximum error of 2**-34.34 (absolute) instead of
2**-30.66.  This doesn't affect the final error significantly; the
maximum error was and is about 0.8879 ulps on amd64 -01.

The fdlibm expf() is not used on i386's (the "optimized" asm version
is used), but probably should be since it was already significantly
faster than the asm version on athlons.  The asm version has the
advantage of being more accurate, so keep using it for now.
This commit is contained in:
Bruce Evans 2005-11-12 18:20:09 +00:00
parent a60c16ad2f
commit d4a74de9fc

View File

@ -24,13 +24,11 @@ static const float
ln2_hi = 6.9313812256e-01, /* 0x3f317180 */
ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */
two25 = 3.355443200e+07, /* 0x4c000000 */
Lg1 = 6.6666668653e-01, /* 3F2AAAAB */
Lg2 = 4.0000000596e-01, /* 3ECCCCCD */
Lg3 = 2.8571429849e-01, /* 3E924925 */
Lg4 = 2.2222198546e-01, /* 3E638E29 */
Lg5 = 1.8183572590e-01, /* 3E3A3325 */
Lg6 = 1.5313838422e-01, /* 3E1CD04F */
Lg7 = 1.4798198640e-01; /* 3E178897 */
/* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
Lg1 = 0xaaaaaa.0p-24, /* 0.66666662693 */
Lg2 = 0xccce13.0p-25, /* 0.40000972152 */
Lg3 = 0x91e9ee.0p-25, /* 0.28498786688 */
Lg4 = 0xf89e26.0p-26; /* 0.24279078841 */
static const float zero = 0.0;
@ -70,8 +68,8 @@ __ieee754_logf(float x)
i = ix-(0x6147a<<3);
w = z*z;
j = (0x6b851<<3)-ix;
t1= w*(Lg2+w*(Lg4+w*Lg6));
t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
t1= w*(Lg2+w*Lg4);
t2= z*(Lg1+w*Lg3);
i |= j;
R = t2+t1;
if(i>0) {