Use a temporary array instead of the arg array y[] for calling

__kernel_rem_pio2(). This simplifies analysis of aliasing and thus results in better code for the usual case where __kernel_rem_pio2() is not called. In particular, when __ieee854_rem_pio2[f]() is inlined, it normally results in y[] being returned in registers. I couldn't get this to work using the restrict qualifier. In float precision, this saves 2-3% in most cases on amd64 and i386 (A64) despite it not being inlined in float precision yet. In double precision, this has high variance, with an average gain of 2% for amd64 and 0.7% for i386 (but a much larger gain for usual cases) and some losses.
2008-02-25 18:28:58 +00:00 · 2008-02-25 18:28:58 +00:00 · 49cb35343e
commit 49cb35343e
parent b9adbf63f9
2 changed files with 8 additions and 8 deletions
--- a/lib/msun/src/e_rem_pio2.c
+++ b/lib/msun/src/e_rem_pio2.c
@ -56,7 +56,7 @@ int
 __ieee754_rem_pio2(double x, double *y)
 {
 	double z,w,t,r,fn;
-	double tx[3];
+	double tx[3],ty[2];
 	int32_t e0,i,j,nx,n,ix,hx;
 	u_int32_t low;

@ -182,7 +182,7 @@ medium:
 	tx[2] = z;
 	nx = 3;
 	while(tx[nx-1]==zero) nx--;	/* skip zero term */
-	n  =  __kernel_rem_pio2(tx,y,e0,nx,1);
-	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
-	return n;
+	n  =  __kernel_rem_pio2(tx,ty,e0,nx,1);
+	if(hx<0) {y[0] = -ty[0]; y[1] = -ty[1]; return -n;}
+	y[0] = ty[0]; y[1] = ty[1]; return n;
 }
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@ -45,7 +45,7 @@ int
 __ieee754_rem_pio2f(float x, double *y)
 {
 	double w,r,fn;
-	double tx[1];
+	double tx[1],ty[1];
 	float z;
 	int32_t e0,n,ix,hx;

@ -76,7 +76,7 @@ __ieee754_rem_pio2f(float x, double *y)
 	e0 = (ix>>23)-150;		/* e0 = ilogb(|x|)-23; */
 	SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
 	tx[0] = z;
-	n  =  __kernel_rem_pio2(tx,y,e0,1,0);
-	if(hx<0) {*y = -*y; return -n;}
-	return n;
+	n  =  __kernel_rem_pio2(tx,ty,e0,1,0);
+	if(hx<0) {*y = -ty[0]; return -n;}
+	*y = ty[0]; return n;
 }