Extract the high and low words together. With gcc-3.4 on uniformly
distributed non-large args, this saves about 14 of 134 cycles for Athlon64s and about 5 of 199 cycles for AthlonXPs. Moved the check for x == 0 inside the check for subnormals. With gcc-3.4 on uniformly distributed non-large args, this saves another 5 cycles on Athlon64s and loses 1 cycle on AthlonXPs. Use INSERT_WORDS() and not SET_HIGH_WORD() when converting the first approximation from bits to double. With gcc-3.4 on uniformly distributed non-large args, this saves another 4 cycles on both Athlon64s and and AthlonXPs. Accessing doubles as 2 words may be an optimization on old CPUs, but on current CPUs it tends to cause extra operations and pipeline stalls, especially for writes, even when only 1 of the words needs to be accessed. Removed an unused variable.
This commit is contained in:
parent
b98bf8f1d5
commit
761a5296f9
@ -43,17 +43,13 @@ cbrt(double x)
|
||||
uint64_t bits;
|
||||
} u;
|
||||
double r,s,t=0.0,w;
|
||||
uint64_t bits;
|
||||
u_int32_t sign;
|
||||
u_int32_t high,low;
|
||||
|
||||
GET_HIGH_WORD(hx,x);
|
||||
EXTRACT_WORDS(hx,low,x);
|
||||
sign=hx&0x80000000; /* sign= sign(x) */
|
||||
hx ^=sign;
|
||||
if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */
|
||||
GET_LOW_WORD(low,x);
|
||||
if((hx|low)==0)
|
||||
return(x); /* cbrt(0) is itself */
|
||||
|
||||
/*
|
||||
* Rough cbrt to 5 bits:
|
||||
@ -70,13 +66,15 @@ cbrt(double x)
|
||||
* subtraction virtually to keep e >= 0 so that ordinary integer
|
||||
* division rounds towards minus infinity; this is also efficient.
|
||||
*/
|
||||
if(hx<0x00100000) { /* subnormal number */
|
||||
if(hx<0x00100000) { /* zero or subnormal? */
|
||||
if((hx|low)==0)
|
||||
return(x); /* cbrt(0) is itself */
|
||||
SET_HIGH_WORD(t,0x43500000); /* set t= 2**54 */
|
||||
t*=x;
|
||||
GET_HIGH_WORD(high,t);
|
||||
SET_HIGH_WORD(t,sign|((high&0x7fffffff)/3+B2));
|
||||
INSERT_WORDS(t,sign|((high&0x7fffffff)/3+B2),0);
|
||||
} else
|
||||
SET_HIGH_WORD(t,sign|(hx/3+B1));
|
||||
INSERT_WORDS(t,sign|(hx/3+B1),0);
|
||||
|
||||
/*
|
||||
* New cbrt to 23 bits:
|
||||
|
Loading…
x
Reference in New Issue
Block a user