Use double arithmetic instead of simulating it with two floats. This

results in a performance gain on the order of 10% for amd64 (sledge), ia64 (pluto1), i386+SSE (Pentium 4), and sparc64 (panther), and a negligible improvement for i386 without SSE. (The i386 port still uses the hardware instruction, though.)
2005-02-21 17:44:57 +00:00 · 2005-02-21 17:44:57 +00:00 · 96efaf6c36
commit 96efaf6c36
parent 473156220d
1 changed files with 8 additions and 13 deletions
--- a/lib/msun/src/e_expf.c
+++ b/lib/msun/src/e_expf.c
@ -27,27 +27,24 @@ huge	= 1.0e+30,
 twom100 = 7.8886090522e-31,      /* 2**-100=0x0d800000 */
 o_threshold=  8.8721679688e+01,  /* 0x42b17180 */
 u_threshold= -1.0397208405e+02,  /* 0xc2cff1b5 */
-ln2HI[2]   ={ 6.9313812256e-01,		/* 0x3f317180 */
-	     -6.9313812256e-01,},	/* 0xbf317180 */
-ln2LO[2]   ={ 9.0580006145e-06,  	/* 0x3717f7d1 */
-	     -9.0580006145e-06,},	/* 0xb717f7d1 */
 invln2 =  1.4426950216e+00, 		/* 0x3fb8aa3b */
 P1   =  1.6666667163e-01, /* 0x3e2aaaab */
 P2   = -2.7777778450e-03, /* 0xbb360b61 */
 P3   =  6.6137559770e-05, /* 0x388ab355 */
 P4   = -1.6533901999e-06, /* 0xb5ddea0e */
 P5   =  4.1381369442e-08; /* 0x3331bb4c */
+double ln2[2] = { 6.93147180369123816490e-01, -6.93147180369123816490e-01 };

 float
-__ieee754_expf(float x)	/* default IEEE double exp */
+__ieee754_expf(float x)	/* IEEE float exp */
 {
-	float y,hi=0.0,lo=0.0,c,t;
+	float y,c,t;
 	int32_t k=0,xsb;
 	u_int32_t hx;

 	GET_FLOAT_WORD(hx,x);
 	xsb = (hx>>31)&1;		/* sign bit of x */
-	hx &= 0x7fffffff;		/* high word of |x| */
+	hx &= 0x7fffffff;		/* |x| */

    /* filter out non-finite argument */
 	if(hx >= 0x42b17218) {			/* if |x|>=88.721... */
@ -62,14 +59,12 @@ __ieee754_expf(float x)	/* default IEEE double exp */
    /* argument reduction */
 	if(hx > 0x3eb17218) {		/* if  |x| > 0.5 ln2 */
 	    if(hx < 0x3F851592) {	/* and |x| < 1.5 ln2 */
-		hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
+		x = x-ln2[xsb]; k = 1-xsb-xsb;
 	    } else {
 		k  = invln2*x+halF[xsb];
 		t  = k;
-		hi = x - t*ln2HI[0];	/* t*ln2HI is exact here */
-		lo = t*ln2LO[0];
+		x = x - t*ln2[0];
 	    }
-	    x  = hi - lo;
 	}
 	else if(hx < 0x31800000)  {	/* when |x|<2**-28 */
 	    if(huge+x>one) return one+x;/* trigger inexact */
@ -79,8 +74,8 @@ __ieee754_expf(float x)	/* default IEEE double exp */
    /* x is now in primary range */
 	t  = x*x;
 	c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
-	if(k==0) 	return one-((x*c)/(c-(float)2.0)-x);
-	else 		y = one-((lo-(x*c)/((float)2.0-c))-hi);
+	y = one-(((double)x*c)/(c-2.0)-x);
+	if(k==0) 	return y;
 	if(k >= -125) {
 	    u_int32_t hy;
 	    GET_FLOAT_WORD(hy,y);