Use only double precision for "kernel" cosf and sinf (except for

returning float). The functions are renamed from __kernel_{cos,sin}f() to __kernel_{cos,sin}df() so that misuses of them will cause link errors and not crashes. This version is an almost-routine translation with no special optimizations for accuracy or efficiency. The not-quite-routine part is that in __kernel_cosf(), regenerating the minimax polynomial with double precision coefficients gives a coefficient for the x**2 term that is not quite -0.5, so the literal 0.5 in the code and the related `hz' variable need to be modified; also, the special code for reducing the error in 1.0-x**2*0.5 is no longer needed, so it is convenient to adjust all the logic for the x**2 term a little. Note that without extra precision, it would be very bad to use a coefficient of other than -0.5 for the x**2 term -- the old version depends on multiplication by -0.5 being infinitely precise so as not to need even more special code for reducing the error in 1-x**2*0.5. This gives an unimportant increase in accuracy, from ~0.8 to ~0.501 ulps. Almost all of the error is from the final rounding step, since the choice of the minimax polynomials so that their contribution to the error is a bit less than 0.5 ulps just happens to give contributions that are significantly less (~.001 ulps). An Athlons, for uniformly distributed args in [-2pi, 2pi], this gives overall speed increases in the 10-20% range, despite giving a speed decrease of typically 19% (from 31 cycles up to 37) for sinf() on args in [-pi/4, pi/4].
2005-11-28 04:58:57 +00:00 · 2005-11-28 04:58:57 +00:00 · 8fdb019b17
commit 8fdb019b17
parent 75b4a96462
6 changed files with 44 additions and 71 deletions
--- a/lib/msun/src/e_lgammaf_r.c
+++ b/lib/msun/src/e_lgammaf_r.c
@ -98,7 +98,7 @@ static const float zero=  0.0000000000e+00;
 	GET_FLOAT_WORD(ix,x);
 	ix &= 0x7fffffff;

-	if(ix<0x3e800000) return __kernel_sinf(pi*x,zero,0);
+	if(ix<0x3e800000) return __kernel_sindf(pi*x);
 	y = -x;		/* x is assume negative */

    /*
@ -122,14 +122,14 @@ static const float zero=  0.0000000000e+00;
            }
        }
 	switch (n) {
-	    case 0:   y =  __kernel_sinf(pi*y,zero,0); break;
+	    case 0:   y =  __kernel_sindf(pi*y); break;
 	    case 1:
-	    case 2:   y =  __kernel_cosf(pi*((float)0.5-y),zero); break;
+	    case 2:   y =  __kernel_cosdf(pi*((float)0.5-y)); break;
 	    case 3:
-	    case 4:   y =  __kernel_sinf(pi*(one-y),zero,0); break;
+	    case 4:   y =  __kernel_sindf(pi*(one-y)); break;
 	    case 5:
-	    case 6:   y = -__kernel_cosf(pi*(y-(float)1.5),zero); break;
-	    default:  y =  __kernel_sinf(pi*(y-(float)2.0),zero,0); break;
+	    case 6:   y = -__kernel_cosdf(pi*(y-(float)1.5)); break;
+	    default:  y =  __kernel_sindf(pi*(y-(float)2.0)); break;
 	    }
 	return -y;
 }
--- a/lib/msun/src/k_cosf.c
+++ b/lib/msun/src/k_cosf.c
@ -14,7 +14,7 @@
 * ====================================================
 */

-#ifndef INLINE_KERNEL_COSF
+#ifndef INLINE_KERNEL_COSDF
 #ifndef lint
 static char rcsid[] = "$FreeBSD$";
 #endif
@ -23,24 +23,23 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"

-/* |cos(x) - c(x)| < 2**-33.1 (~[-9.39e-11, 1.083e-10]). */
-static const float
+/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
+static const double
 one =  1.0,
-C1  =  0xaaaaa5.0p-28,		/*  0.041666645557 */
-C2  = -0xb60615.0p-33,		/* -0.0013887310633 */
-C3  =  0xccf47d.0p-39;		/*  0.000024432542887 */
+C0  = -0x1ffffffd0c5e81.0p-54,	/* -0.499999997251031003120 */
+C1  =  0x155553e1053a42.0p-57,	/*  0.0416666233237390631894 */
+C2  = -0x16c087e80f1e27.0p-62,	/* -0.00138867637746099294692 */
+C3  =  0x199342e0ee5069.0p-68;	/*  0.0000243904487962774090654 */

-#ifdef INLINE_KERNEL_COSF
+#ifdef INLINE_KERNEL_COSDF
 extern inline
 #endif
 float
-__kernel_cosf(float x, float y)
+__kernel_cosdf(double x)
 {
-	float hz,z,r,w;
+	double z,r;

 	z  = x*x;
 	r  = z*(C1+z*(C2+z*C3));
-	hz = (float)0.5*z;
-	w  = one-hz;
-	return w + (((one-w)-hz) + (z*r-x*y));
+	return (one+z*C0) + z*r;
 }
--- a/lib/msun/src/k_sinf.c
+++ b/lib/msun/src/k_sinf.c
@ -14,7 +14,7 @@
 * ====================================================
 */

-#ifndef INLINE_KERNEL_SINF
+#ifndef INLINE_KERNEL_SINDF
 #ifndef lint
 static char rcsid[] = "$FreeBSD$";
 #endif
@ -23,25 +23,23 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"

-/* |sin(x)/x - s(x)| < 2**-32.5 (~[-1.57e-10, 1.572e-10]). */
-static const float
-half = 0.5,
-S1  = -0xaaaaab.0p-26,		/* -0.16666667163 */
-S2  =  0x8888bb.0p-30,		/*  0.0083333803341 */
-S3  = -0xd02de1.0p-36,		/* -0.00019853517006 */
-S4  =  0xbe6dbe.0p-42;		/*  0.0000028376084629 */
+/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
+static const double
+S1 = -0x15555554cbac77.0p-55,	/* -0.166666666416265235595 */
+S2 =  0x111110896efbb2.0p-59,	/*  0.0083333293858894631756 */
+S3 = -0x1a00f9e2cae774.0p-65,	/* -0.000198393348360966317347 */
+S4 =  0x16cd878c3b46a7.0p-71;	/*  0.0000027183114939898219064 */

-#ifdef INLINE_KERNEL_SINF
+#ifdef INLINE_KERNEL_SINDF
 extern inline
 #endif
 float
-__kernel_sinf(float x, float y, int iy)
+__kernel_sindf(double x)
 {
-	float z,r,v;
+	double z,r,v;

 	z	=  x*x;
 	v	=  z*x;
 	r	=  S2+z*(S3+z*S4);
-	if(iy==0) return x+v*(S1+z*r);
-	else      return x-((z*(half*y-v*r)-y)-v*S1);
+	return x+v*(S1+z*r);
 }
--- a/lib/msun/src/math_private.h
+++ b/lib/msun/src/math_private.h
@ -264,8 +264,8 @@ int	__kernel_rem_pio2(double*,double*,int,int,int,const int*);

 /* float versions of fdlibm kernel functions */
 int	__ieee754_rem_pio2f(float,float*);
-float	__kernel_sinf(float,float,int);
-float	__kernel_cosf(float,float);
+float	__kernel_sindf(double);
+float	__kernel_cosdf(double);
 float	__kernel_tandf(double,int);
 int	__kernel_rem_pio2f(float*,float*,int,int,int,const int*);

--- a/lib/msun/src/s_cosf.c
+++ b/lib/msun/src/s_cosf.c
@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$";
 #endif

 #include "math.h"
-#define	INLINE_KERNEL_COSF
-#define	INLINE_KERNEL_SINF
+#define	INLINE_KERNEL_COSDF
+#define	INLINE_KERNEL_SINDF
 #include "math_private.h"
 #include "k_cosf.c"
 #include "k_sinf.c"
@ -31,18 +31,6 @@ c2pio2 = 2*M_PI_2,			/* 0x400921FB, 0x54442D18 */
 c3pio2 = 3*M_PI_2,			/* 0x4012D97C, 0x7F3321D2 */
 c4pio2 = 4*M_PI_2;			/* 0x401921FB, 0x54442D18 */

-static inline float
-__kernel_cosdf(double x)
-{
-	return __kernel_cosf((float)x, x - (float)x);
-}
-
-static inline float
-__kernel_sindf(double x)
-{
-	return __kernel_sinf((float)x, x - (float)x, 1);
-}
-
 float
 cosf(float x)
 {
@ -55,7 +43,7 @@ cosf(float x)
 	if(ix <= 0x3f490fda) {		/* |x| ~<= pi/4 */
 	    if(ix<0x39800000)		/* |x| < 2**-12 */
 		if(((int)x)==0) return 1.0;	/* 1 with inexact if x != 0 */
-	    return __kernel_cosf(x,0.0);
+	    return __kernel_cosdf(x);
 	}
 	if(ix<=0x407b53d1) {		/* |x| <= ~5*pi/4 */
 	    if(ix<=0x4016cbe3)		/* |x| <= ~3pi/4 */
@ -77,11 +65,11 @@ cosf(float x)
 	else {
 	    n = __ieee754_rem_pio2f(x,y);
 	    switch(n&3) {
-		case 0: return  __kernel_cosf(y[0],y[1]);
-		case 1: return -__kernel_sinf(y[0],y[1],1);
-		case 2: return -__kernel_cosf(y[0],y[1]);
+		case 0: return  __kernel_cosdf((double)y[0]+y[1]);
+		case 1: return -__kernel_sindf((double)y[0]+y[1]);
+		case 2: return -__kernel_cosdf((double)y[0]+y[1]);
 		default:
-		        return  __kernel_sinf(y[0],y[1],1);
+		        return  __kernel_sindf((double)y[0]+y[1]);
 	    }
 	}
 }
--- a/lib/msun/src/s_sinf.c
+++ b/lib/msun/src/s_sinf.c
@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$";
 #endif

 #include "math.h"
-#define	INLINE_KERNEL_COSF
-#define	INLINE_KERNEL_SINF
+#define	INLINE_KERNEL_COSDF
+#define	INLINE_KERNEL_SINDF
 #include "math_private.h"
 #include "k_cosf.c"
 #include "k_sinf.c"
@ -31,18 +31,6 @@ s2pio2 = 2*M_PI_2,			/* 0x400921FB, 0x54442D18 */
 s3pio2 = 3*M_PI_2,			/* 0x4012D97C, 0x7F3321D2 */
 s4pio2 = 4*M_PI_2;			/* 0x401921FB, 0x54442D18 */

-static inline float
-__kernel_cosdf(double x)
-{
-	return __kernel_cosf((float)x, x - (float)x);
-}
-
-static inline float
-__kernel_sindf(double x)
-{
-	return __kernel_sinf((float)x, x - (float)x, 1);
-}
-
 float
 sinf(float x)
 {
@ -55,7 +43,7 @@ sinf(float x)
 	if(ix <= 0x3f490fda) {		/* |x| ~<= pi/4 */
 	    if(ix<0x39800000)		/* |x| < 2**-12 */
 		if(((int)x)==0) return x;	/* x with inexact if x != 0 */
-	    return __kernel_sinf(x,0.0,0);
+	    return __kernel_sindf(x);
 	}
 	if(ix<=0x407b53d1) {		/* |x| <= ~5*pi/4 */
 	    if(ix<=0x4016cbe3) {	/* |x| <= ~3pi/4 */
@ -83,11 +71,11 @@ sinf(float x)
 	else {
 	    n = __ieee754_rem_pio2f(x,y);
 	    switch(n&3) {
-		case 0: return  __kernel_sinf(y[0],y[1],1);
-		case 1: return  __kernel_cosf(y[0],y[1]);
-		case 2: return -__kernel_sinf(y[0],y[1],1);
+		case 0: return  __kernel_sindf((double)y[0]+y[1]);
+		case 1: return  __kernel_cosdf((double)y[0]+y[1]);
+		case 2: return -__kernel_sindf((double)y[0]+y[1]);
 		default:
-			return -__kernel_cosf(y[0],y[1]);
+			return -__kernel_cosdf((double)y[0]+y[1]);
 	    }
 	}
 }