Use double arithmetic instead of simulating it with two floats. This
results in a performance gain on the order of 10% for amd64 (sledge), ia64 (pluto1), i386+SSE (Pentium 4), and sparc64 (panther), and a negligible improvement for i386 without SSE. (The i386 port still uses the hardware instruction, though.)
This commit is contained in:
parent
473156220d
commit
96efaf6c36
@ -27,27 +27,24 @@ huge = 1.0e+30,
|
||||
twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */
|
||||
o_threshold= 8.8721679688e+01, /* 0x42b17180 */
|
||||
u_threshold= -1.0397208405e+02, /* 0xc2cff1b5 */
|
||||
ln2HI[2] ={ 6.9313812256e-01, /* 0x3f317180 */
|
||||
-6.9313812256e-01,}, /* 0xbf317180 */
|
||||
ln2LO[2] ={ 9.0580006145e-06, /* 0x3717f7d1 */
|
||||
-9.0580006145e-06,}, /* 0xb717f7d1 */
|
||||
invln2 = 1.4426950216e+00, /* 0x3fb8aa3b */
|
||||
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
|
||||
P2 = -2.7777778450e-03, /* 0xbb360b61 */
|
||||
P3 = 6.6137559770e-05, /* 0x388ab355 */
|
||||
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
|
||||
P5 = 4.1381369442e-08; /* 0x3331bb4c */
|
||||
double ln2[2] = { 6.93147180369123816490e-01, -6.93147180369123816490e-01 };
|
||||
|
||||
float
|
||||
__ieee754_expf(float x) /* default IEEE double exp */
|
||||
__ieee754_expf(float x) /* IEEE float exp */
|
||||
{
|
||||
float y,hi=0.0,lo=0.0,c,t;
|
||||
float y,c,t;
|
||||
int32_t k=0,xsb;
|
||||
u_int32_t hx;
|
||||
|
||||
GET_FLOAT_WORD(hx,x);
|
||||
xsb = (hx>>31)&1; /* sign bit of x */
|
||||
hx &= 0x7fffffff; /* high word of |x| */
|
||||
hx &= 0x7fffffff; /* |x| */
|
||||
|
||||
/* filter out non-finite argument */
|
||||
if(hx >= 0x42b17218) { /* if |x|>=88.721... */
|
||||
@ -62,14 +59,12 @@ __ieee754_expf(float x) /* default IEEE double exp */
|
||||
/* argument reduction */
|
||||
if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
|
||||
if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
|
||||
hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;
|
||||
x = x-ln2[xsb]; k = 1-xsb-xsb;
|
||||
} else {
|
||||
k = invln2*x+halF[xsb];
|
||||
t = k;
|
||||
hi = x - t*ln2HI[0]; /* t*ln2HI is exact here */
|
||||
lo = t*ln2LO[0];
|
||||
x = x - t*ln2[0];
|
||||
}
|
||||
x = hi - lo;
|
||||
}
|
||||
else if(hx < 0x31800000) { /* when |x|<2**-28 */
|
||||
if(huge+x>one) return one+x;/* trigger inexact */
|
||||
@ -79,8 +74,8 @@ __ieee754_expf(float x) /* default IEEE double exp */
|
||||
/* x is now in primary range */
|
||||
t = x*x;
|
||||
c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
|
||||
if(k==0) return one-((x*c)/(c-(float)2.0)-x);
|
||||
else y = one-((lo-(x*c)/((float)2.0-c))-hi);
|
||||
y = one-(((double)x*c)/(c-2.0)-x);
|
||||
if(k==0) return y;
|
||||
if(k >= -125) {
|
||||
u_int32_t hy;
|
||||
GET_FLOAT_WORD(hy,y);
|
||||
|
Loading…
x
Reference in New Issue
Block a user