Change __ieee754_rem_pio2f() to return double instead of float so that

this function and its callers cosf(), sinf() and tanf() don't waste time
converting values from doubles to floats and back for |x| > 9pi/4.
All these functions were optimized a few years ago to mostly use doubles
internally and across the __kernel*() interfaces but not across the
__ieee754_rem_pio2f() interface.

This saves about 40 cycles in cosf(), sinf() and tanf() for |x| > 9pi/4
on amd64 (A64), and about 20 cycles on i386 (A64) (except for cosf()
and sinf() in the upper range).  40 cycles is about 35% for |x| < 9pi/4
<= 2**19pi/2 and about 5% for |x| > 2**19pi/2.  The saving is much
larger on amd64 than on i386 since the conversions are not easy to
optimize except on i386 where some of them are automatic and others
are optimized invalidly.  amd64 is still about 10% slower in cosf()
and tanf() in the lower range due to conversion overhead.

This also gives a tiny speedup for |x| <= 9pi/4 on amd64 (by simplifying
the code).  It also avoids compiler bugs and/or additional slowness
in the conversions on (not yet supported) machines where double_t !=
double.
This commit is contained in:
Bruce Evans 2008-02-25 13:33:20 +00:00
parent 636133e3dd
commit 70d818a20e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=176552
5 changed files with 24 additions and 27 deletions

View File

@ -19,8 +19,8 @@ __FBSDID("$FreeBSD$");
/* __ieee754_rem_pio2f(x,y)
*
* return the remainder of x rem pi/2 in y[0]+y[1]
* use double precision internally
* return the remainder of x rem pi/2 in *y
* use double precision for everything except passing x
* use __kernel_rem_pio2() for large x
*/
@ -42,10 +42,10 @@ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
int
__ieee754_rem_pio2f(float x, float *y)
__ieee754_rem_pio2f(float x, double *y)
{
double w,r,fn;
double tx[1],ty[1];
double tx[1];
float z;
int32_t e0,n,ix,hx;
@ -63,23 +63,20 @@ __ieee754_rem_pio2f(float x, float *y)
#endif
r = x-fn*pio2_1;
w = fn*pio2_1t;
y[0] = r-w;
y[1] = (r-y[0])-w;
*y = r-w;
return n;
}
/*
* all other (large) arguments
*/
if(ix>=0x7f800000) { /* x is inf or NaN */
y[0]=y[1]=x-x; return 0;
*y=x-x; return 0;
}
/* set z = scalbn(|x|,ilogb(|x|)-23) */
e0 = (ix>>23)-150; /* e0 = ilogb(|x|)-23; */
SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
tx[0] = z;
n = __kernel_rem_pio2(tx,ty,e0,1,0);
y[0] = ty[0];
y[1] = ty[0] - y[0];
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
n = __kernel_rem_pio2(tx,y,e0,1,0);
if(hx<0) {*y = -*y; return -n;}
return n;
}

View File

@ -320,7 +320,7 @@ double __kernel_cos(double,double);
double __kernel_tan(double,double,int);
/* float precision kernel functions */
int __ieee754_rem_pio2f(float,float*);
int __ieee754_rem_pio2f(float,double*);
float __kernel_sindf(double);
float __kernel_cosdf(double);
float __kernel_tandf(double,int);

View File

@ -34,7 +34,7 @@ c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float
cosf(float x)
{
float y[2];
double y;
int32_t n, hx, ix;
GET_FLOAT_WORD(hx,x);
@ -71,13 +71,13 @@ cosf(float x)
/* general argument reduction needed */
else {
n = __ieee754_rem_pio2f(x,y);
n = __ieee754_rem_pio2f(x,&y);
switch(n&3) {
case 0: return __kernel_cosdf((double)y[0]+y[1]);
case 1: return __kernel_sindf(-(double)y[0]-y[1]);
case 2: return -__kernel_cosdf((double)y[0]+y[1]);
case 0: return __kernel_cosdf(y);
case 1: return __kernel_sindf(-y);
case 2: return -__kernel_cosdf(y);
default:
return __kernel_sindf((double)y[0]+y[1]);
return __kernel_sindf(y);
}
}
}

View File

@ -34,7 +34,7 @@ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float
sinf(float x)
{
float y[2];
double y;
int32_t n, hx, ix;
GET_FLOAT_WORD(hx,x);
@ -69,13 +69,13 @@ sinf(float x)
/* general argument reduction needed */
else {
n = __ieee754_rem_pio2f(x,y);
n = __ieee754_rem_pio2f(x,&y);
switch(n&3) {
case 0: return __kernel_sindf((double)y[0]+y[1]);
case 1: return __kernel_cosdf((double)y[0]+y[1]);
case 2: return __kernel_sindf(-(double)y[0]-y[1]);
case 0: return __kernel_sindf(y);
case 1: return __kernel_cosdf(y);
case 2: return __kernel_sindf(-y);
default:
return -__kernel_cosdf((double)y[0]+y[1]);
return -__kernel_cosdf(y);
}
}
}

View File

@ -32,7 +32,7 @@ t4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float
tanf(float x)
{
float y[2];
double y;
int32_t n, hx, ix;
GET_FLOAT_WORD(hx,x);
@ -61,8 +61,8 @@ tanf(float x)
/* general argument reduction needed */
else {
n = __ieee754_rem_pio2f(x,y);
n = __ieee754_rem_pio2f(x,&y);
/* integer parameter: 1 -- n even; -1 -- n odd */
return __kernel_tandf((double)y[0]+y[1],1-((n&1)<<1));
return __kernel_tandf(y,1-((n&1)<<1));
}
}