Use a better method of scaling by 2**k. Instead of adding to the

exponent bits of the reduced result, construct 2**k (hopefully in
parallel with the construction of the reduced result) and multiply by
it.  This tends to be much faster if the construction of 2**k is
actually in parallel, and might be faster even with no parallelism
since adjustment of the exponent requires a read-modify-wrtite at an
unfortunate time for pipelines.

In some cases involving exp2* on amd64 (A64), this change saves about
40 cycles or 30%.  I think it is inherently only about 12 cycles faster
in these cases and the rest of the speedup is from partly-accidentally
avoiding compiler pessimizations (the construction of 2**k is now
manually scheduled for good results, and -O2 doesn't always mess this
up).  In most cases on amd64 (A64) and i386 (A64) the speedup is about
20 cycles.  The worst case that I found is expf on ia64 where this
change is a pessimization of about 10 cycles or 5%.  The manual
scheduling for plain exp[f] is harder and not as tuned.

Details specific to expm1*:
- the saving is closer to 12 cycles than to 40 for expm1* on i386 (A64).
  For some reason it is much larger for negative args.
- also convert to __FBSDID().
This commit is contained in:
Bruce Evans 2008-02-07 09:42:19 +00:00
parent 626ac252ea
commit a00672cff9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=176082
2 changed files with 16 additions and 26 deletions

View File

@ -10,9 +10,8 @@
* ====================================================
*/
#ifndef lint
static char rcsid[] = "$FreeBSD$";
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/* expm1(x)
* Returns exp(x)-1, the exponential of x minus 1.
@ -130,7 +129,7 @@ Q5 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
double
expm1(double x)
{
double y,hi,lo,c,t,e,hxs,hfx,r1;
double y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
int32_t k,xsb;
u_int32_t hx;
@ -187,6 +186,7 @@ expm1(double x)
e = hxs*((r1-t)/(6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */
else {
INSERT_WORDS(twopk,0x3ff00000+(k<<20),0); /* 2^k */
e = (x*(e-c)-c);
e -= hxs;
if(k== -1) return 0.5*(x-e)-0.5;
@ -194,26 +194,21 @@ expm1(double x)
if(x < -0.25) return -2.0*(e-(x+0.5));
else return one+2.0*(x-e);
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
u_int32_t high;
y = one-(e-x);
GET_HIGH_WORD(high,y);
SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
if (k == 1024) y = y*2.0*0x1p1023;
else y = y*twopk;
return y-one;
}
t = one;
if(k<20) {
u_int32_t high;
SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */
y = t-(e-x);
GET_HIGH_WORD(high,y);
SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
y = y*twopk;
} else {
u_int32_t high;
SET_HIGH_WORD(t,((0x3ff-k)<<20)); /* 2^-k */
y = x-(e+t);
y += one;
GET_HIGH_WORD(high,y);
SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
y = y*twopk;
}
}
return y;

View File

@ -13,9 +13,8 @@
* ====================================================
*/
#ifndef lint
static char rcsid[] = "$FreeBSD$";
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "math.h"
#include "math_private.h"
@ -38,7 +37,7 @@ Q5 = -2.0109921195e-07; /* 0xb457edbb */
float
expm1f(float x)
{
float y,hi,lo,c,t,e,hxs,hfx,r1;
float y,hi,lo,c,t,e,hxs,hfx,r1,twopk;
int32_t k,xsb;
u_int32_t hx;
@ -92,6 +91,7 @@ expm1f(float x)
e = hxs*((r1-t)/((float)6.0 - x*t));
if(k==0) return x - (x*e-hxs); /* c is 0 */
else {
SET_FLOAT_WORD(twopk,0x3f800000+(k<<23)); /* 2^k */
e = (x*(e-c)-c);
e -= hxs;
if(k== -1) return (float)0.5*(x-e)-(float)0.5;
@ -99,26 +99,21 @@ expm1f(float x)
if(x < (float)-0.25) return -(float)2.0*(e-(x+(float)0.5));
else return one+(float)2.0*(x-e);
if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
int32_t i;
y = one-(e-x);
GET_FLOAT_WORD(i,y);
SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
if (k == 128) y = y*2.0F*0x1p127F;
else y = y*twopk;
return y-one;
}
t = one;
if(k<23) {
int32_t i;
SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */
y = t-(e-x);
GET_FLOAT_WORD(i,y);
SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
y = y*twopk;
} else {
int32_t i;
SET_FLOAT_WORD(t,((0x7f-k)<<23)); /* 2^-k */
y = x-(e+t);
y += one;
GET_FLOAT_WORD(i,y);
SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
y = y*twopk;
}
}
return y;