Use a temporary array instead of the arg array y[] for calling

__kernel_rem_pio2().  This simplifies analysis of aliasing and thus
results in better code for the usual case where __kernel_rem_pio2()
is not called.  In particular, when __ieee854_rem_pio2[f]() is inlined,
it normally results in y[] being returned in registers.  I couldn't
get this to work using the restrict qualifier.

In float precision, this saves 2-3% in most cases on amd64 and i386
(A64) despite it not being inlined in float precision yet.  In double
precision, this has high variance, with an average gain of 2% for
amd64 and 0.7% for i386 (but a much larger gain for usual cases) and
some losses.
This commit is contained in:
bde 2008-02-25 18:28:58 +00:00
parent b9adbf63f9
commit 49cb35343e
2 changed files with 8 additions and 8 deletions

View File

@ -56,7 +56,7 @@ int
__ieee754_rem_pio2(double x, double *y)
{
double z,w,t,r,fn;
double tx[3];
double tx[3],ty[2];
int32_t e0,i,j,nx,n,ix,hx;
u_int32_t low;
@ -182,7 +182,7 @@ medium:
tx[2] = z;
nx = 3;
while(tx[nx-1]==zero) nx--; /* skip zero term */
n = __kernel_rem_pio2(tx,y,e0,nx,1);
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
return n;
n = __kernel_rem_pio2(tx,ty,e0,nx,1);
if(hx<0) {y[0] = -ty[0]; y[1] = -ty[1]; return -n;}
y[0] = ty[0]; y[1] = ty[1]; return n;
}

View File

@ -45,7 +45,7 @@ int
__ieee754_rem_pio2f(float x, double *y)
{
double w,r,fn;
double tx[1];
double tx[1],ty[1];
float z;
int32_t e0,n,ix,hx;
@ -76,7 +76,7 @@ __ieee754_rem_pio2f(float x, double *y)
e0 = (ix>>23)-150; /* e0 = ilogb(|x|)-23; */
SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
tx[0] = z;
n = __kernel_rem_pio2(tx,y,e0,1,0);
if(hx<0) {*y = -*y; return -n;}
return n;
n = __kernel_rem_pio2(tx,ty,e0,1,0);
if(hx<0) {*y = -ty[0]; return -n;}
*y = ty[0]; return n;
}