Use fairly optimal minimax polynomials for __kernel_cosf() and

__kernel_sinf(). The old ones were the double-precision polynomials with coefficients truncated to float. Truncation is not a good way to convert minimax polynomials to lower precision. Optimize for efficiency and use the lowest-degree polynomials that give a relative error of less than 1 ulp -- degree 8 instead of 14 for cosf and degree 9 instead of 13 for sinf. For sinf, the degree 8 polynomial happens to be 6 times more accurate than the old degree 14 one, but this only gives a tiny amount of extra accuracy in results -- we just need to use a a degree high enough to give a polynomial whose relative accuracy in infinite precision (but with float coefficients) is a small fraction of a float ulp (fdlibm generally uses 1/32 for the small fraction, and the fraction for our degree 8 polynomial is about 1/600). The maximum relative errors for cosf() and sinf() are now 0.7719 ulps and 0.7969 ulps, respectively.
2005-10-28 13:36:58 +00:00 · 2005-10-28 13:36:58 +00:00 · 8e62cdabe0
commit 8e62cdabe0
parent 54091cfc82
2 changed files with 15 additions and 16 deletions
--- a/lib/msun/src/k_cosf.c
+++ b/lib/msun/src/k_cosf.c
@ -1,5 +1,6 @@
 /* k_cosf.c -- float version of k_cos.c
 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Debugged and optimized by Bruce D. Evans.
 */

 /*
@ -20,14 +21,12 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"

+/* Range of maximum relative error in polynomial: ~[-1.15e-10, 1.169e-10]. */
 static const float
-one =  1.0000000000e+00, /* 0x3f800000 */
-C1  =  4.1666667908e-02, /* 0x3d2aaaab */
-C2  = -1.3888889225e-03, /* 0xbab60b61 */
-C3  =  2.4801587642e-05, /* 0x37d00d01 */
-C4  = -2.7557314297e-07, /* 0xb493f27c */
-C5  =  2.0875723372e-09, /* 0x310f74f6 */
-C6  = -1.1359647598e-11; /* 0xad47d74e */
+one =  1.0,
+C1  =  0xaaaaa5.0p-28,	/*  0.04166664555668830871582031250 */
+C2  = -0xb60615.0p-33,	/* -0.001388731063343584537506103516 */
+C3  =  0xccf47d.0p-39;	/*  0.00002443254288664320483803749084 */

 float
 __kernel_cosf(float x, float y)
@ -35,7 +34,7 @@ __kernel_cosf(float x, float y)
 	float hz,z,r,w;

 	z  = x*x;
-	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
+	r  = z*(C1+z*(C2+z*C3));
 	hz = (float)0.5*z;
 	w  = one-hz;
 	return w + (((one-w)-hz) + (z*r-x*y));
--- a/lib/msun/src/k_sinf.c
+++ b/lib/msun/src/k_sinf.c
@ -1,5 +1,6 @@
 /* k_sinf.c -- float version of k_sin.c
 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Optimized by Bruce D. Evans.
 */

 /*
@ -20,14 +21,13 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"

+/* Range of maximum relative error in polynomial: ~[-1.61e-10, 1.621e-10]. */
 static const float
-half =  5.0000000000e-01,/* 0x3f000000 */
-S1  = -1.6666667163e-01, /* 0xbe2aaaab */
-S2  =  8.3333337680e-03, /* 0x3c088889 */
-S3  = -1.9841270114e-04, /* 0xb9500d01 */
-S4  =  2.7557314297e-06, /* 0x3638ef1b */
-S5  = -2.5050759689e-08, /* 0xb2d72f34 */
-S6  =  1.5896910177e-10; /* 0x2f2ec9d3 */
+half = 0.5,
+S1  = -0xaaaaab.0p-26,	/* -0.1666666716337203979492187500 */
+S2  =  0x8888ba.0p-30,	/*  0.008333379402756690979003906250 */
+S3  = -0xd02cb0.0p-36,	/* -0.0001985307317227125167846679687 */
+S4  =  0xbe18ff.0p-42;	/*  0.000002832675590980215929448604584 */

 float
 __kernel_sinf(float x, float y, int iy)
@ -36,7 +36,7 @@ __kernel_sinf(float x, float y, int iy)

 	z	=  x*x;
 	v	=  z*x;
-	r	=  S2+z*(S3+z*(S4+z*(S5+z*S6)));
+	r	=  S2+z*(S3+z*S4);
 	if(iy==0) return x+v*(S1+z*r);
 	else      return x-((z*(half*y-v*r)-y)-v*S1);
 }