Fix some problems with asinf(), acosf(), atanf(), and atan2f():

- Adjust several constants for float precision. Some thresholds
  that were appropriate for double precision were never changed
  when these routines were converted to float precision. This
  has an impact on performance but not accuracy. (Submitted by bde.)

- Reduce the degrees of the polynomials used. A smaller degree
  suffices for float precision.

- In asinf(), use double arithmetic in part of the calculation to
  avoid a corner case and some complicated arithmetic involving a
  division and some buggy constants. This improves performance and
  accuracy.

Max error (ulps):
         asinf  acosf  atanf
before   0.925  0.782  0.852
after    0.743  0.804  0.852

As bde points out, it's cheaper for asin*() and acos*() to use
polynomials instead of rational functions, but that's a task for
another day.
This commit is contained in:
David Schultz 2008-08-01 01:24:25 +00:00
parent fc45432be6
commit 8862f666ad
4 changed files with 42 additions and 71 deletions

View File

@ -26,16 +26,10 @@ pio2_hi = 1.5707962513e+00; /* 0x3fc90fda */
static volatile float
pio2_lo = 7.5497894159e-08; /* 0x33a22168 */
static const float
pS0 = 1.6666667163e-01, /* 0x3e2aaaab */
pS1 = -3.2556581497e-01, /* 0xbea6b090 */
pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */
pS3 = -4.0055535734e-02, /* 0xbd241146 */
pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */
pS5 = 3.4793309169e-05, /* 0x3811ef08 */
qS1 = -2.4033949375e+00, /* 0xc019d139 */
qS2 = 2.0209457874e+00, /* 0x4001572d */
qS3 = -6.8828397989e-01, /* 0xbf303361 */
qS4 = 7.7038154006e-02; /* 0x3d9dc62e */
pS0 = 1.6666586697e-01,
pS1 = -4.2743422091e-02,
pS2 = -8.6563630030e-03,
qS1 = -7.0662963390e-01;
float
__ieee754_acosf(float x)
@ -51,16 +45,16 @@ __ieee754_acosf(float x)
return (x-x)/(x-x); /* acos(|x|>1) is NaN */
}
if(ix<0x3f000000) { /* |x| < 0.5 */
if(ix<=0x23000000) return pio2_hi+pio2_lo;/*if|x|<2**-57*/
if(ix<=0x32800000) return pio2_hi+pio2_lo;/*if|x|<2**-26*/
z = x*x;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
p = z*(pS0+z*(pS1+z*pS2));
q = one+z*qS1;
r = p/q;
return pio2_hi - (x - (pio2_lo-x*r));
} else if (hx<0) { /* x < -0.5 */
z = (one+x)*(float)0.5;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
p = z*(pS0+z*(pS1+z*pS2));
q = one+z*qS1;
s = __ieee754_sqrtf(z);
r = p/q;
w = r*s-pio2_lo;
@ -73,8 +67,8 @@ __ieee754_acosf(float x)
GET_FLOAT_WORD(idf,df);
SET_FLOAT_WORD(df,idf&0xfffff000);
c = (z-df*df)/(s+df);
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
p = z*(pS0+z*(pS1+z*pS2));
q = one+z*qS1;
r = p/q;
w = r*s+c;
return (float)2.0*(df+w);

View File

@ -22,62 +22,45 @@ __FBSDID("$FreeBSD$");
static const float
one = 1.0000000000e+00, /* 0x3F800000 */
huge = 1.000e+30,
pio2_hi = 1.5707962513e+00, /* 0x3fc90fda */
pio2_lo = 7.5497894159e-08, /* 0x33a22168 */
pio4_hi = 7.8539812565e-01, /* 0x3f490fda */
/* coefficient for R(x^2) */
pS0 = 1.6666667163e-01, /* 0x3e2aaaab */
pS1 = -3.2556581497e-01, /* 0xbea6b090 */
pS2 = 2.0121252537e-01, /* 0x3e4e0aa8 */
pS3 = -4.0055535734e-02, /* 0xbd241146 */
pS4 = 7.9153501429e-04, /* 0x3a4f7f04 */
pS5 = 3.4793309169e-05, /* 0x3811ef08 */
qS1 = -2.4033949375e+00, /* 0xc019d139 */
qS2 = 2.0209457874e+00, /* 0x4001572d */
qS3 = -6.8828397989e-01, /* 0xbf303361 */
qS4 = 7.7038154006e-02; /* 0x3d9dc62e */
pS0 = 1.6666586697e-01,
pS1 = -4.2743422091e-02,
pS2 = -8.6563630030e-03,
qS1 = -7.0662963390e-01;
static const double
pio2 = 1.570796326794896558e+00;
float
__ieee754_asinf(float x)
{
float t=0.0,w,p,q,c,r,s;
double s;
float t=0.0,w,p,q,c,r;
int32_t hx,ix;
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
if(ix==0x3f800000) {
/* asin(1)=+-pi/2 with inexact */
return x*pio2_hi+x*pio2_lo;
return x*pio2;
} else if(ix> 0x3f800000) { /* |x|>= 1 */
return (x-x)/(x-x); /* asin(|x|>1) is NaN */
} else if (ix<0x3f000000) { /* |x|<0.5 */
if(ix<0x32000000) { /* if |x| < 2**-27 */
if(ix<0x39800000) { /* if |x| < 2**-12 */
if(huge+x>one) return x;/* return x with inexact if x!=0*/
} else
t = x*x;
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
p = t*(pS0+t*(pS1+t*pS2));
q = one+t*qS1;
w = p/q;
return x+x*w;
}
/* 1> |x|>= 0.5 */
w = one-fabsf(x);
t = w*(float)0.5;
p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5)))));
q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4)));
s = __ieee754_sqrtf(t);
if(ix>=0x3F79999A) { /* if |x| > 0.975 */
w = p/q;
t = pio2_hi-((float)2.0*(s+s*w)-pio2_lo);
} else {
int32_t iw;
w = s;
GET_FLOAT_WORD(iw,w);
SET_FLOAT_WORD(w,iw&0xfffff000);
c = (t-w*w)/(s+w);
r = p/q;
p = (float)2.0*s*r-(pio2_lo-(float)2.0*c);
q = pio4_hi-(float)2.0*w;
t = pio4_hi-(p-q);
}
p = t*(pS0+t*(pS1+t*pS2));
q = one+t*qS1;
s = __ieee754_sqrt(t);
w = p/q;
t = pio2-2.0*(s+s*w);
if(hx>0) return t; else return -t;
}

View File

@ -80,8 +80,8 @@ __ieee754_atan2f(float y, float x)
/* compute y/x */
k = (iy-ix)>>23;
if(k > 60) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**60 */
else if(hx<0&&k<-60) z=0.0; /* |y|/x < -2**60 */
if(k > 26) z=pi_o_2+(float)0.5*pi_lo; /* |y/x| > 2**26 */
else if(hx<0&&k<-26) z=0.0; /* |y|/x < -2**26 */
else z=atanf(fabsf(y/x)); /* safe to do y/x */
switch (m) {
case 0: return z ; /* atan(+,+) */

View File

@ -34,20 +34,14 @@ static const float atanlo[] = {
};
static const float aT[] = {
3.3333334327e-01, /* 0x3eaaaaaa */
-2.0000000298e-01, /* 0xbe4ccccd */
1.4285714924e-01, /* 0x3e124925 */
-1.1111110449e-01, /* 0xbde38e38 */
9.0908870101e-02, /* 0x3dba2e6e */
-7.6918758452e-02, /* 0xbd9d8795 */
6.6610731184e-02, /* 0x3d886b35 */
-5.8335702866e-02, /* 0xbd6ef16b */
4.9768779427e-02, /* 0x3d4bda59 */
-3.6531571299e-02, /* 0xbd15a221 */
1.6285819933e-02, /* 0x3c8569d7 */
3.3333328366e-01,
-1.9999158382e-01,
1.4253635705e-01,
-1.0648017377e-01,
6.1687607318e-02,
};
static const float
static const float
one = 1.0,
huge = 1.0e30;
@ -59,13 +53,13 @@ atanf(float x)
GET_FLOAT_WORD(hx,x);
ix = hx&0x7fffffff;
if(ix>=0x50800000) { /* if |x| >= 2^34 */
if(ix>=0x4c800000) { /* if |x| >= 2**26 */
if(ix>0x7f800000)
return x+x; /* NaN */
if(hx>0) return atanhi[3]+*(volatile float *)&atanlo[3];
else return -atanhi[3]-*(volatile float *)&atanlo[3];
} if (ix < 0x3ee00000) { /* |x| < 0.4375 */
if (ix < 0x31000000) { /* |x| < 2^-29 */
if (ix < 0x39800000) { /* |x| < 2**-12 */
if(huge+x>one) return x; /* raise inexact */
}
id = -1;
@ -80,7 +74,7 @@ atanf(float x)
} else {
if (ix < 0x401c0000) { /* |x| < 2.4375 */
id = 2; x = (x-(float)1.5)/(one+(float)1.5*x);
} else { /* 2.4375 <= |x| < 2^66 */
} else { /* 2.4375 <= |x| < 2**26 */
id = 3; x = -(float)1.0/x;
}
}}
@ -88,8 +82,8 @@ atanf(float x)
z = x*x;
w = z*z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
s1 = z*(aT[0]+w*(aT[2]+w*aT[4]));
s2 = w*(aT[1]+w*aT[3]);
if (id<0) return x - x*(s1+s2);
else {
z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);