Optimize the conversion to bits a little (by about 11 cycles or 16%

on i386 (A64), 5 cycles on amd64 (A64), and 3 cycles on ia64).  gcc
tends to generate very bad code for accessing floating point values
as bits except when the integer accesses have the same width as the
floating point values, and direct accesses to bit-fields (as is common
only for long double precision) always gives such accesses.  Use the
expsign access method, which is good for 80-bit long doubles and
hopefully no worse for 128-bit long doubles.  Now the generated code
is less bad.  There is still unnecessary copying of the arg on amd64
and i386 and mysterious extra slowness on amd64.
This commit is contained in:
Bruce Evans 2008-02-22 11:59:05 +00:00
parent cbd2c621f8
commit f839bac29c

View File

@ -32,6 +32,11 @@ __FBSDID("$FreeBSD$");
#include "fpmath.h"
#if LDBL_MAX_EXP != 0x4000
/* We also require the usual bias, min exp and expsign packing. */
#error "Unsupported long double format"
#endif
#define BIAS (LDBL_MAX_EXP - 1)
static const float
@ -50,16 +55,19 @@ long double
rintl(long double x)
{
union IEEEl2bits u;
short sign;
uint32_t expsign;
int ex, sign;
u.e = x;
expsign = u.xbits.expsign;
ex = expsign & 0x7fff;
if (u.bits.exp >= BIAS + LDBL_MANT_DIG - 1) {
if (u.bits.exp == BIAS + LDBL_MAX_EXP)
if (ex >= BIAS + LDBL_MANT_DIG - 1) {
if (ex == BIAS + LDBL_MAX_EXP)
return (x + x); /* Inf, NaN, or unsupported format */
return (x); /* finite and already an integer */
}
sign = u.bits.sign;
sign = expsign >> 15;
/*
* The following code assumes that intermediate results are
@ -75,7 +83,7 @@ rintl(long double x)
* If the result is +-0, then it must have the same sign as x, but
* the above calculation doesn't always give this. Fix up the sign.
*/
if (u.bits.exp < BIAS && x == 0.0L)
if (ex < BIAS && x == 0.0L)
return (zero[sign]);
return (x);