Fix some problems with asinf(), acosf(), atanf(), and atan2f():
- Adjust several constants for float precision. Some thresholds that were appropriate for double precision were never changed when these routines were converted to float precision. This has an impact on performance but not accuracy. (Submitted by bde.) - Reduce the degrees of the polynomials used. A smaller degree suffices for float precision. - In asinf(), use double arithmetic in part of the calculation to avoid a corner case and some complicated arithmetic involving a division and some buggy constants. This improves performance and accuracy. Max error (ulps): asinf acosf atanf before 0.925 0.782 0.852 after 0.743 0.804 0.852 As bde points out, it's cheaper for asin*() and acos*() to use polynomials instead of rational functions, but that's a task for another day.
This commit is contained in:
parent
fc45432be6
commit
8862f666ad
@ -26,16 +26,10 @@ pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */
|
||||
static volatile float
|
||||
pio2_lo = 7.5497894159e-08; /* 0x33a22168 */
|
||||
static const float
|
||||
pS0 = 1.6666667163e-01, /* 0x3e2aaaab */
|
||||
pS1 = -3.2556581497e-01, /* 0xbea6b090 */
|
||||
pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */
|
||||
pS3 = -4.0055535734e-02, /* 0xbd241146 */
|
||||
pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */
|
||||
pS5 = 3.4793309169e-05, /* 0x3811ef08 */
|
||||
qS1 = -2.4033949375e+00, /* 0xc019d139 */
|
||||
qS2 = 2.0209457874e+00, /* 0x4001572d */
|
||||
qS3 = -6.8828397989e-01, /* 0xbf303361 */
|
||||
qS4 = 7.7038154006e-02; /* 0x3d9dc62e */
|
||||
pS0 = 1.6666586697e-01,
|
||||
pS1 = -4.2743422091e-02,
|
||||
pS2 = -8.6563630030e-03,
|
||||
qS1 = -7.0662963390e-01;
|
||||
|
||||
float
|
||||
__ieee754_acosf(float x)
|
||||
@ -51,16 +45,16 @@ __ieee754_acosf(float x)
|
||||
return (x-x)/(x-x); /* acos(|x|>1) is NaN */
|
||||
}
|
||||
if(ix<0x3f000000) { /* |x| < 0.5 */
|
||||
if(ix<=0x23000000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
|
||||
if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<2**-26*/
|
||||
z = x*x;
|
||||
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||
p = z*(pS0+z*(pS1+z*pS2));
|
||||
q = one+z*qS1;
|
||||
r = p/q;
|
||||
return pio2_hi - (x - (pio2_lo-x*r));
|
||||
} else if (hx<0) { /* x < -0.5 */
|
||||
z = (one+x)*(float)0.5;
|
||||
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||
p = z*(pS0+z*(pS1+z*pS2));
|
||||
q = one+z*qS1;
|
||||
s = __ieee754_sqrtf(z);
|
||||
r = p/q;
|
||||
w = r*s-pio2_lo;
|
||||
@ -73,8 +67,8 @@ __ieee754_acosf(float x)
|
||||
GET_FLOAT_WORD(idf,df);
|
||||
SET_FLOAT_WORD(df,idf&0xfffff000);
|
||||
c = (z-df*df)/(s+df);
|
||||
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||
p = z*(pS0+z*(pS1+z*pS2));
|
||||
q = one+z*qS1;
|
||||
r = p/q;
|
||||
w = r*s+c;
|
||||
return (float)2.0*(df+w);
|
||||
|
@ -22,62 +22,45 @@ __FBSDID("$FreeBSD$");
|
||||
static const float
|
||||
one = 1.0000000000e+00, /* 0x3F800000 */
|
||||
huge = 1.000e+30,
|
||||
pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
|
||||
pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
|
||||
pio4_hi = 7.8539812565e-01, /* 0x3f490fda */
|
||||
/* coefficient for R(x^2) */
|
||||
pS0 = 1.6666667163e-01, /* 0x3e2aaaab */
|
||||
pS1 = -3.2556581497e-01, /* 0xbea6b090 */
|
||||
pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */
|
||||
pS3 = -4.0055535734e-02, /* 0xbd241146 */
|
||||
pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */
|
||||
pS5 = 3.4793309169e-05, /* 0x3811ef08 */
|
||||
qS1 = -2.4033949375e+00, /* 0xc019d139 */
|
||||
qS2 = 2.0209457874e+00, /* 0x4001572d */
|
||||
qS3 = -6.8828397989e-01, /* 0xbf303361 */
|
||||
qS4 = 7.7038154006e-02; /* 0x3d9dc62e */
|
||||
pS0 = 1.6666586697e-01,
|
||||
pS1 = -4.2743422091e-02,
|
||||
pS2 = -8.6563630030e-03,
|
||||
qS1 = -7.0662963390e-01;
|
||||
|
||||
static const double
|
||||
pio2 = 1.570796326794896558e+00;
|
||||
|
||||
float
|
||||
__ieee754_asinf(float x)
|
||||
{
|
||||
float t=0.0,w,p,q,c,r,s;
|
||||
double s;
|
||||
float t=0.0,w,p,q,c,r;
|
||||
int32_t hx,ix;
|
||||
GET_FLOAT_WORD(hx,x);
|
||||
ix = hx&0x7fffffff;
|
||||
if(ix==0x3f800000) {
|
||||
/* asin(1)=+-pi/2 with inexact */
|
||||
return x*pio2_hi+x*pio2_lo;
|
||||
return x*pio2;
|
||||
} else if(ix> 0x3f800000) { /* |x|>= 1 */
|
||||
return (x-x)/(x-x); /* asin(|x|>1) is NaN */
|
||||
} else if (ix<0x3f000000) { /* |x|<0.5 */
|
||||
if(ix<0x32000000) { /* if |x| < 2**-27 */
|
||||
if(ix<0x39800000) { /* if |x| < 2**-12 */
|
||||
if(huge+x>one) return x;/* return x with inexact if x!=0*/
|
||||
} else
|
||||
t = x*x;
|
||||
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
|
||||
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
|
||||
p = t*(pS0+t*(pS1+t*pS2));
|
||||
q = one+t*qS1;
|
||||
w = p/q;
|
||||
return x+x*w;
|
||||
}
|
||||
/* 1> |x|>= 0.5 */
|
||||
w = one-fabsf(x);
|
||||
t = w*(float)0.5;
|
||||
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
|
||||
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
|
||||
s = __ieee754_sqrtf(t);
|
||||
if(ix>=0x3F79999A) { /* if |x| > 0.975 */
|
||||
w = p/q;
|
||||
t = pio2_hi-((float)2.0*(s+s*w)-pio2_lo);
|
||||
} else {
|
||||
int32_t iw;
|
||||
w = s;
|
||||
GET_FLOAT_WORD(iw,w);
|
||||
SET_FLOAT_WORD(w,iw&0xfffff000);
|
||||
c = (t-w*w)/(s+w);
|
||||
r = p/q;
|
||||
p = (float)2.0*s*r-(pio2_lo-(float)2.0*c);
|
||||
q = pio4_hi-(float)2.0*w;
|
||||
t = pio4_hi-(p-q);
|
||||
}
|
||||
p = t*(pS0+t*(pS1+t*pS2));
|
||||
q = one+t*qS1;
|
||||
s = __ieee754_sqrt(t);
|
||||
w = p/q;
|
||||
t = pio2-2.0*(s+s*w);
|
||||
if(hx>0) return t; else return -t;
|
||||
}
|
||||
|
@ -80,8 +80,8 @@ __ieee754_atan2f(float y, float x)
|
||||
|
||||
/* compute y/x */
|
||||
k = (iy-ix)>>23;
|
||||
if(k > 60) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**60 */
|
||||
else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */
|
||||
if(k > 26) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**26 */
|
||||
else if(hx<0&&k<-26) z=0.0; /* |y|/x < -2**26 */
|
||||
else z=atanf(fabsf(y/x)); /* safe to do y/x */
|
||||
switch (m) {
|
||||
case 0: return z ; /* atan(+,+) */
|
||||
|
@ -34,20 +34,14 @@ static const float atanlo[] = {
|
||||
};
|
||||
|
||||
static const float aT[] = {
|
||||
3.3333334327e-01, /* 0x3eaaaaaa */
|
||||
-2.0000000298e-01, /* 0xbe4ccccd */
|
||||
1.4285714924e-01, /* 0x3e124925 */
|
||||
-1.1111110449e-01, /* 0xbde38e38 */
|
||||
9.0908870101e-02, /* 0x3dba2e6e */
|
||||
-7.6918758452e-02, /* 0xbd9d8795 */
|
||||
6.6610731184e-02, /* 0x3d886b35 */
|
||||
-5.8335702866e-02, /* 0xbd6ef16b */
|
||||
4.9768779427e-02, /* 0x3d4bda59 */
|
||||
-3.6531571299e-02, /* 0xbd15a221 */
|
||||
1.6285819933e-02, /* 0x3c8569d7 */
|
||||
3.3333328366e-01,
|
||||
-1.9999158382e-01,
|
||||
1.4253635705e-01,
|
||||
-1.0648017377e-01,
|
||||
6.1687607318e-02,
|
||||
};
|
||||
|
||||
static const float
|
||||
static const float
|
||||
one = 1.0,
|
||||
huge = 1.0e30;
|
||||
|
||||
@ -59,13 +53,13 @@ atanf(float x)
|
||||
|
||||
GET_FLOAT_WORD(hx,x);
|
||||
ix = hx&0x7fffffff;
|
||||
if(ix>=0x50800000) { /* if |x| >= 2^34 */
|
||||
if(ix>=0x4c800000) { /* if |x| >= 2**26 */
|
||||
if(ix>0x7f800000)
|
||||
return x+x; /* NaN */
|
||||
if(hx>0) return atanhi[3]+*(volatile float *)&atanlo[3];
|
||||
else return -atanhi[3]-*(volatile float *)&atanlo[3];
|
||||
} if (ix < 0x3ee00000) { /* |x| < 0.4375 */
|
||||
if (ix < 0x31000000) { /* |x| < 2^-29 */
|
||||
if (ix < 0x39800000) { /* |x| < 2**-12 */
|
||||
if(huge+x>one) return x; /* raise inexact */
|
||||
}
|
||||
id = -1;
|
||||
@ -80,7 +74,7 @@ atanf(float x)
|
||||
} else {
|
||||
if (ix < 0x401c0000) { /* |x| < 2.4375 */
|
||||
id = 2; x = (x-(float)1.5)/(one+(float)1.5*x);
|
||||
} else { /* 2.4375 <= |x| < 2^66 */
|
||||
} else { /* 2.4375 <= |x| < 2**26 */
|
||||
id = 3; x = -(float)1.0/x;
|
||||
}
|
||||
}}
|
||||
@ -88,8 +82,8 @@ atanf(float x)
|
||||
z = x*x;
|
||||
w = z*z;
|
||||
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
|
||||
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
|
||||
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
|
||||
s1 = z*(aT[0]+w*(aT[2]+w*aT[4]));
|
||||
s2 = w*(aT[1]+w*aT[3]);
|
||||
if (id<0) return x - x*(s1+s2);
|
||||
else {
|
||||
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
|
||||
|
Loading…
Reference in New Issue
Block a user