From 4339c67c485f0b1f7699863fc29f6c06862d1dde Mon Sep 17 00:00:00 2001
From: Bruce Evans <bde@FreeBSD.org>
Date: Mon, 24 Oct 2005 14:08:36 +0000
Subject: [PATCH] Moved the optimization for tiny x from
 __kernel_{cos,sin}[f](x) to {cos_sin}[f](x) so that x doesn't need to be
 reclassified in the "kernel" functions to determine if it is tiny (it still
 needs to be reclassified in the cosine case for other reasons that will go
 away).

This optimization is quite large for exponentially distributed x, since
x is tiny for almost half of the domain, but it is a pessimization for
uniformally distributed x since it takes a little time for all cases
but rarely applies.  Arg reduction on exponentially distributed x
rarely gives a tiny x unless the reduction is null, so it is best to
only do the optimization if the initial x is tiny, which is what this
commit arranges.  The imediate result is an average optimization of
1.4% relative to the previous version in a case that doesn't favour
the optimization (double cos(x) on all float x) and a large
pessimization for the relatively unimportant cases of lgamma[f][_r](x)
on tiny, negative, exponentially distributed x.  The optimization should
be recovered for lgamma*() as part of fixing lgamma*()'s low-quality
arg reduction.

Fixed various wrong constants for the cutoff for "tiny".  For cosine,
the cutoff is when x**2/2! == {FLT or DBL}_EPSILON/2.  We round down
to an integral power of 2 (and for cos() reduce the power by another
1) because the exact cutoff doesn't matter and would take more work
to determine.  For sine, the exact cutoff is larger due to the ration
of terms being x**2/3! instead of x**2/2!, but we use the same cutoff
as for cosine.  We now use a cutoff of 2**-27 for double precision and
2**-12 for single precision.  2**-27 was used in all cases but was
misspelled 2**27 in comments.  Wrong and sloppy cutoffs just cause
missed optimizations (provided the rounding mode is to nearest --
other modes just aren't supported).
---
 lib/msun/src/k_cos.c  | 3 ---
 lib/msun/src/k_cosf.c | 3 ---
 lib/msun/src/k_sin.c  | 6 +-----
 lib/msun/src/k_sinf.c | 6 +-----
 lib/msun/src/s_cos.c  | 6 +++++-
 lib/msun/src/s_cosf.c | 8 +++++---
 lib/msun/src/s_sin.c  | 6 +++++-
 lib/msun/src/s_sinf.c | 6 +++++-
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/lib/msun/src/k_cos.c b/lib/msun/src/k_cos.c
index ad78f8ee9cfc..39a29747daa7 100644
--- a/lib/msun/src/k_cos.c
+++ b/lib/msun/src/k_cos.c
@@ -69,9 +69,6 @@ __kernel_cos(double x, double y)
 	int32_t ix;
 	GET_HIGH_WORD(ix,x);
 	ix &= 0x7fffffff;			/* ix = |x|'s high word*/
-	if(ix<0x3e400000) {			/* if x < 2**27 */
-	    if(((int)x)==0) return one;		/* generate inexact */
-	}
 	z  = x*x;
 	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
 	if(ix < 0x3FD33333) 			/* if |x| < 0.3 */ 
diff --git a/lib/msun/src/k_cosf.c b/lib/msun/src/k_cosf.c
index 4239ae1e40b7..e1012ec1e45a 100644
--- a/lib/msun/src/k_cosf.c
+++ b/lib/msun/src/k_cosf.c
@@ -36,9 +36,6 @@ __kernel_cosf(float x, float y)
 	int32_t ix;
 	GET_FLOAT_WORD(ix,x);
 	ix &= 0x7fffffff;			/* ix = |x|'s high word*/
-	if(ix<0x32000000) {			/* if x < 2**27 */
-	    if(((int)x)==0) return one;		/* generate inexact */
-	}
 	z  = x*x;
 	r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
 	if(ix < 0x3e99999a) 			/* if |x| < 0.3 */
diff --git a/lib/msun/src/k_sin.c b/lib/msun/src/k_sin.c
index 55e2bdce1954..6db5849c4164 100644
--- a/lib/msun/src/k_sin.c
+++ b/lib/msun/src/k_sin.c
@@ -59,11 +59,7 @@ double
 __kernel_sin(double x, double y, int iy)
 {
 	double z,r,v;
-	int32_t ix;
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;			/* high word of x */
-	if(ix<0x3e400000)			/* |x| < 2**-27 */
-	   {if((int)x==0) return x;}		/* generate inexact */
+
 	z	=  x*x;
 	v	=  z*x;
 	r	=  S2+z*(S3+z*(S4+z*(S5+z*S6)));
diff --git a/lib/msun/src/k_sinf.c b/lib/msun/src/k_sinf.c
index 10a3290d2741..12b820617d2b 100644
--- a/lib/msun/src/k_sinf.c
+++ b/lib/msun/src/k_sinf.c
@@ -33,11 +33,7 @@ float
 __kernel_sinf(float x, float y, int iy)
 {
 	float z,r,v;
-	int32_t ix;
-	GET_FLOAT_WORD(ix,x);
-	ix &= 0x7fffffff;			/* high word of x */
-	if(ix<0x32000000)			/* |x| < 2**-27 */
-	   {if((int)x==0) return x;}		/* generate inexact */
+
 	z	=  x*x;
 	v	=  z*x;
 	r	=  S2+z*(S3+z*(S4+z*(S5+z*S6)));
diff --git a/lib/msun/src/s_cos.c b/lib/msun/src/s_cos.c
index b49b28aa05e3..17afa34e7757 100644
--- a/lib/msun/src/s_cos.c
+++ b/lib/msun/src/s_cos.c
@@ -59,7 +59,11 @@ cos(double x)
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffff;
-	if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
+	if(ix <= 0x3fe921fb) {
+	    if(ix<0x3e400000)			/* if x < 2**-27 */
+		if(((int)x)==0) return 1.0;	/* generate inexact */
+	    return __kernel_cos(x,z);
+	}
 
     /* cos(Inf or NaN) is NaN */
 	else if (ix>=0x7ff00000) return x-x;
diff --git a/lib/msun/src/s_cosf.c b/lib/msun/src/s_cosf.c
index 18e02990442d..dd8a759d6a44 100644
--- a/lib/msun/src/s_cosf.c
+++ b/lib/msun/src/s_cosf.c
@@ -20,8 +20,6 @@ static char rcsid[] = "$FreeBSD$";
 #include "math.h"
 #include "math_private.h"
 
-static const float one=1.0;
-
 float
 cosf(float x)
 {
@@ -32,7 +30,11 @@ cosf(float x)
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffff;
-	if(ix <= 0x3f490fd8) return __kernel_cosf(x,z);
+	if(ix <= 0x3f490fd8) {
+	    if(ix<0x39800000)			/* if x < 2**-12 */
+		if(((int)x)==0) return 1.0;	/* generate inexact */
+	    return __kernel_cosf(x,z);
+	}
 
     /* cos(Inf or NaN) is NaN */
 	else if (ix>=0x7f800000) return x-x;
diff --git a/lib/msun/src/s_sin.c b/lib/msun/src/s_sin.c
index 4d3b9d50cb9a..af9ee628b016 100644
--- a/lib/msun/src/s_sin.c
+++ b/lib/msun/src/s_sin.c
@@ -59,7 +59,11 @@ sin(double x)
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffff;
-	if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
+	if(ix <= 0x3fe921fb) {
+	    if(ix<0x3e400000)			/* |x| < 2**-27 */
+	       {if((int)x==0) return x;}	/* generate inexact */
+	    return __kernel_sin(x,z,0);
+	}
 
     /* sin(Inf or NaN) is NaN */
 	else if (ix>=0x7ff00000) return x-x;
diff --git a/lib/msun/src/s_sinf.c b/lib/msun/src/s_sinf.c
index 8ea9e559a56d..a2485e81beca 100644
--- a/lib/msun/src/s_sinf.c
+++ b/lib/msun/src/s_sinf.c
@@ -30,7 +30,11 @@ sinf(float x)
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffff;
-	if(ix <= 0x3f490fd8) return __kernel_sinf(x,z,0);
+	if(ix <= 0x3f490fd8) {
+	    if(ix<0x39800000)			/* if x < 2**-12 */
+		if(((int)x)==0) return x;	/* generate inexact */
+	    return __kernel_sinf(x,z,0);
+	}
 
     /* sin(Inf or NaN) is NaN */
 	else if (ix>=0x7f800000) return x-x;