Use a temporary array instead of the arg array y[] for calling
__kernel_rem_pio2(). This simplifies analysis of aliasing and thus results in better code for the usual case where __kernel_rem_pio2() is not called. In particular, when __ieee854_rem_pio2[f]() is inlined, it normally results in y[] being returned in registers. I couldn't get this to work using the restrict qualifier. In float precision, this saves 2-3% in most cases on amd64 and i386 (A64) despite it not being inlined in float precision yet. In double precision, this has high variance, with an average gain of 2% for amd64 and 0.7% for i386 (but a much larger gain for usual cases) and some losses.
This commit is contained in:
parent
b9adbf63f9
commit
49cb35343e
@ -56,7 +56,7 @@ int
|
||||
__ieee754_rem_pio2(double x, double *y)
|
||||
{
|
||||
double z,w,t,r,fn;
|
||||
double tx[3];
|
||||
double tx[3],ty[2];
|
||||
int32_t e0,i,j,nx,n,ix,hx;
|
||||
u_int32_t low;
|
||||
|
||||
@ -182,7 +182,7 @@ medium:
|
||||
tx[2] = z;
|
||||
nx = 3;
|
||||
while(tx[nx-1]==zero) nx--; /* skip zero term */
|
||||
n = __kernel_rem_pio2(tx,y,e0,nx,1);
|
||||
if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
|
||||
return n;
|
||||
n = __kernel_rem_pio2(tx,ty,e0,nx,1);
|
||||
if(hx<0) {y[0] = -ty[0]; y[1] = -ty[1]; return -n;}
|
||||
y[0] = ty[0]; y[1] = ty[1]; return n;
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ int
|
||||
__ieee754_rem_pio2f(float x, double *y)
|
||||
{
|
||||
double w,r,fn;
|
||||
double tx[1];
|
||||
double tx[1],ty[1];
|
||||
float z;
|
||||
int32_t e0,n,ix,hx;
|
||||
|
||||
@ -76,7 +76,7 @@ __ieee754_rem_pio2f(float x, double *y)
|
||||
e0 = (ix>>23)-150; /* e0 = ilogb(|x|)-23; */
|
||||
SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
|
||||
tx[0] = z;
|
||||
n = __kernel_rem_pio2(tx,y,e0,1,0);
|
||||
if(hx<0) {*y = -*y; return -n;}
|
||||
return n;
|
||||
n = __kernel_rem_pio2(tx,ty,e0,1,0);
|
||||
if(hx<0) {*y = -ty[0]; return -n;}
|
||||
*y = ty[0]; return n;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user