Exploit skew-symmetry to avoid an operation: -sin(x-A) = sin(A-x). This

gives a tiny but hopefully always free optimization in the 2 quadrants
to which it applies.  On Athlons, it reduces maximum latency by 4 cycles
in these quadrants but has usually has a smaller effect on total time
(typically ~2 cycles (~5%), but sometimes 8 cycles when the compiler
generates poor code).
This commit is contained in:
Bruce Evans 2005-11-28 06:15:10 +00:00
parent 35ae347641
commit 0bea84b2d4
2 changed files with 4 additions and 4 deletions

View File

@ -48,7 +48,7 @@ cosf(float x)
}
if(ix<=0x407b53d1) { /* |x| ~<= 5*pi/4 */
if(ix<=0x4016cbe3) /* |x| ~<= 3pi/4 */
return -__kernel_sindf(x - c1pio2);
return __kernel_sindf(c1pio2 - x);
else
return -__kernel_cosdf(x - c2pio2);
}
@ -67,7 +67,7 @@ cosf(float x)
n = __ieee754_rem_pio2f(x,y);
switch(n&3) {
case 0: return __kernel_cosdf((double)y[0]+y[1]);
case 1: return -__kernel_sindf((double)y[0]+y[1]);
case 1: return __kernel_sindf(-(double)y[0]-y[1]);
case 2: return -__kernel_cosdf((double)y[0]+y[1]);
default:
return __kernel_sindf((double)y[0]+y[1]);

View File

@ -53,7 +53,7 @@ sinf(float x)
else
return -__kernel_cosdf(x + s1pio2);
} else
return -__kernel_sindf(x + (hx > 0 ? -s2pio2 : s2pio2));
return __kernel_sindf((hx > 0 ? s2pio2 : -s2pio2) - x);
}
if(ix<=0x40e231d5) { /* |x| ~<= 9*pi/4 */
if(ix<=0x40afeddf) { /* |x| ~<= 7*pi/4 */
@ -74,7 +74,7 @@ sinf(float x)
switch(n&3) {
case 0: return __kernel_sindf((double)y[0]+y[1]);
case 1: return __kernel_cosdf((double)y[0]+y[1]);
case 2: return -__kernel_sindf((double)y[0]+y[1]);
case 2: return __kernel_sindf(-(double)y[0]-y[1]);
default:
return -__kernel_cosdf((double)y[0]+y[1]);
}