1994-08-19 09:40:01 +00:00
|
|
|
/* e_powf.c -- float version of e_pow.c.
|
|
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ====================================================
|
|
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
|
|
* Permission to use, copy, modify, and distribute this
|
1995-05-30 05:51:47 +00:00
|
|
|
* software is freely granted, provided that this notice
|
1994-08-19 09:40:01 +00:00
|
|
|
* is preserved.
|
|
|
|
* ====================================================
|
|
|
|
*/
|
|
|
|
|
Use the expression (x+0.0)-(y+0.0) instead of x+y when mixing NaN arg(s).
This uses 2 tricks to improve consistency so that more serious problems
aren't hidden in simple regression tests by noise for the NaNs:
- for a signaling NaN, adding 0.0 generates the invalid exception and
converts to a quiet NaN, and doesn't have too many effects for other
types of args (it converts -0 to +0 in some rounding modes, but that
hopefully doesn't change the result after adding the NaN arg). This
avoids some inconsistencies on i386 and ia64. On these arches, the
result of an operation on 2 NaNs is apparently the largest or the
smallest of the NaNs as bits (consistently largest or smallest for
each arch, but the opposite). I forget which way the comparison
goes and if the sign bit affects it. The quiet bit is is handled
poorly by not always setting it before the comparision or ignoring
it. Thus if one of the args was originally a signaling NaN and the
other was originally a quiet NaN, then the result depends too much
on whether the signaling NaN has been quieted at this point, which
in turn depends on optimizations and promotions. E.g., passing float
signaling NaNs to double functions must quiet them on conversion;
on i387, loading a signaling NaN of type float or double (but not
long double) into a register involves a conversion, so it quiets
signaling NaNs, so if the addition has 2 register operands than it
only sees quiet NaNs, but if the addition has a memory operand then
it sees a signaling NaN iff it is in the memory operand.
- subtraction instead of addition is used to avoid a dubious optimization
in old versions of gcc. For SSE operations, mixing of NaNs apparently
always gives the target operand. This is not as good as the i387
and ia64 behaviour. It doesn't mix NaNs at all, and makes addition
not quite commutative. Old versions of gcc sometimes rewrite x+y
to y+x and thus give different results (in bits) for NaNs. gcc-3.3.3
rewrites x+y to y+x for one of pow() and powf() but not the other,
so starting from float NaN args x and y, powf(x, y) was almost always
different from pow(x, y).
These tricks won't give consistency of 2-arg float and double functions
with long double ones on amd64, since long double ones use the i387
which has different semantics from SSE.
Convert to __FBSDID().
2008-02-14 09:42:24 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
1994-08-19 09:40:01 +00:00
|
|
|
|
|
|
|
#include "math.h"
|
|
|
|
#include "math_private.h"
|
|
|
|
|
|
|
|
static const float
|
|
|
|
bp[] = {1.0, 1.5,},
|
|
|
|
dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */
|
|
|
|
dp_l[] = { 0.0, 1.56322085e-06,}, /* 0x35d1cfdc */
|
|
|
|
zero = 0.0,
|
2018-07-08 16:33:58 +00:00
|
|
|
half = 0.5,
|
|
|
|
qrtr = 0.25,
|
|
|
|
thrd = 3.33333343e-01, /* 0x3eaaaaab */
|
1994-08-19 09:40:01 +00:00
|
|
|
one = 1.0,
|
|
|
|
two = 2.0,
|
|
|
|
two24 = 16777216.0, /* 0x4b800000 */
|
|
|
|
huge = 1.0e30,
|
|
|
|
tiny = 1.0e-30,
|
|
|
|
/* poly coefs for (3/2)*(log(x)-2s-2/3*s**3 */
|
|
|
|
L1 = 6.0000002384e-01, /* 0x3f19999a */
|
|
|
|
L2 = 4.2857143283e-01, /* 0x3edb6db7 */
|
|
|
|
L3 = 3.3333334327e-01, /* 0x3eaaaaab */
|
|
|
|
L4 = 2.7272811532e-01, /* 0x3e8ba305 */
|
|
|
|
L5 = 2.3066075146e-01, /* 0x3e6c3255 */
|
|
|
|
L6 = 2.0697501302e-01, /* 0x3e53f142 */
|
|
|
|
P1 = 1.6666667163e-01, /* 0x3e2aaaab */
|
|
|
|
P2 = -2.7777778450e-03, /* 0xbb360b61 */
|
|
|
|
P3 = 6.6137559770e-05, /* 0x388ab355 */
|
|
|
|
P4 = -1.6533901999e-06, /* 0xb5ddea0e */
|
|
|
|
P5 = 4.1381369442e-08, /* 0x3331bb4c */
|
|
|
|
lg2 = 6.9314718246e-01, /* 0x3f317218 */
|
|
|
|
lg2_h = 6.93145752e-01, /* 0x3f317200 */
|
|
|
|
lg2_l = 1.42860654e-06, /* 0x35bfbe8c */
|
|
|
|
ovt = 4.2995665694e-08, /* -(128-log2(ovfl+.5ulp)) */
|
|
|
|
cp = 9.6179670095e-01, /* 0x3f76384f =2/(3ln2) */
|
2008-02-14 10:23:51 +00:00
|
|
|
cp_h = 9.6191406250e-01, /* 0x3f764000 =12b cp */
|
|
|
|
cp_l = -1.1736857402e-04, /* 0xb8f623c6 =tail of cp_h */
|
1994-08-19 09:40:01 +00:00
|
|
|
ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */
|
|
|
|
ivln2_h = 1.4426879883e+00, /* 0x3fb8aa00 =16b 1/ln2*/
|
|
|
|
ivln2_l = 7.0526075433e-06; /* 0x36eca570 =1/ln2 tail*/
|
|
|
|
|
2002-05-28 18:15:04 +00:00
|
|
|
float
|
|
|
|
__ieee754_powf(float x, float y)
|
1994-08-19 09:40:01 +00:00
|
|
|
{
|
|
|
|
float z,ax,z_h,z_l,p_h,p_l;
|
2004-06-01 19:33:30 +00:00
|
|
|
float y1,t1,t2,r,s,sn,t,u,v,w;
|
1994-08-19 09:40:01 +00:00
|
|
|
int32_t i,j,k,yisint,n;
|
|
|
|
int32_t hx,hy,ix,iy,is;
|
|
|
|
|
|
|
|
GET_FLOAT_WORD(hx,x);
|
|
|
|
GET_FLOAT_WORD(hy,y);
|
|
|
|
ix = hx&0x7fffffff; iy = hy&0x7fffffff;
|
|
|
|
|
|
|
|
/* y==zero: x**0 = 1 */
|
1995-05-30 05:51:47 +00:00
|
|
|
if(iy==0) return one;
|
1994-08-19 09:40:01 +00:00
|
|
|
|
2011-10-21 06:26:07 +00:00
|
|
|
/* x==1: 1**y = 1, even if y is NaN */
|
|
|
|
if (hx==0x3f800000) return one;
|
|
|
|
|
Use the expression (x+0.0)-(y+0.0) instead of x+y when mixing NaN arg(s).
This uses 2 tricks to improve consistency so that more serious problems
aren't hidden in simple regression tests by noise for the NaNs:
- for a signaling NaN, adding 0.0 generates the invalid exception and
converts to a quiet NaN, and doesn't have too many effects for other
types of args (it converts -0 to +0 in some rounding modes, but that
hopefully doesn't change the result after adding the NaN arg). This
avoids some inconsistencies on i386 and ia64. On these arches, the
result of an operation on 2 NaNs is apparently the largest or the
smallest of the NaNs as bits (consistently largest or smallest for
each arch, but the opposite). I forget which way the comparison
goes and if the sign bit affects it. The quiet bit is is handled
poorly by not always setting it before the comparision or ignoring
it. Thus if one of the args was originally a signaling NaN and the
other was originally a quiet NaN, then the result depends too much
on whether the signaling NaN has been quieted at this point, which
in turn depends on optimizations and promotions. E.g., passing float
signaling NaNs to double functions must quiet them on conversion;
on i387, loading a signaling NaN of type float or double (but not
long double) into a register involves a conversion, so it quiets
signaling NaNs, so if the addition has 2 register operands than it
only sees quiet NaNs, but if the addition has a memory operand then
it sees a signaling NaN iff it is in the memory operand.
- subtraction instead of addition is used to avoid a dubious optimization
in old versions of gcc. For SSE operations, mixing of NaNs apparently
always gives the target operand. This is not as good as the i387
and ia64 behaviour. It doesn't mix NaNs at all, and makes addition
not quite commutative. Old versions of gcc sometimes rewrite x+y
to y+x and thus give different results (in bits) for NaNs. gcc-3.3.3
rewrites x+y to y+x for one of pow() and powf() but not the other,
so starting from float NaN args x and y, powf(x, y) was almost always
different from pow(x, y).
These tricks won't give consistency of 2-arg float and double functions
with long double ones on amd64, since long double ones use the i387
which has different semantics from SSE.
Convert to __FBSDID().
2008-02-14 09:42:24 +00:00
|
|
|
/* y!=zero: result is NaN if either arg is NaN */
|
1994-08-19 09:40:01 +00:00
|
|
|
if(ix > 0x7f800000 ||
|
|
|
|
iy > 0x7f800000)
|
2018-07-17 07:42:14 +00:00
|
|
|
return nan_mix(x, y);
|
1994-08-19 09:40:01 +00:00
|
|
|
|
|
|
|
/* determine if y is an odd int when x < 0
|
|
|
|
* yisint = 0 ... y is not an integer
|
|
|
|
* yisint = 1 ... y is an odd int
|
|
|
|
* yisint = 2 ... y is an even int
|
|
|
|
*/
|
|
|
|
yisint = 0;
|
1995-05-30 05:51:47 +00:00
|
|
|
if(hx<0) {
|
1994-08-19 09:40:01 +00:00
|
|
|
if(iy>=0x4b800000) yisint = 2; /* even integer y */
|
|
|
|
else if(iy>=0x3f800000) {
|
|
|
|
k = (iy>>23)-0x7f; /* exponent */
|
|
|
|
j = iy>>(23-k);
|
|
|
|
if((j<<(23-k))==iy) yisint = 2-(j&1);
|
1995-05-30 05:51:47 +00:00
|
|
|
}
|
|
|
|
}
|
1994-08-19 09:40:01 +00:00
|
|
|
|
|
|
|
/* special value of y */
|
|
|
|
if (iy==0x7f800000) { /* y is +-inf */
|
|
|
|
if (ix==0x3f800000)
|
2011-10-21 06:26:07 +00:00
|
|
|
return one; /* (-1)**+-inf is NaN */
|
1994-08-19 09:40:01 +00:00
|
|
|
else if (ix > 0x3f800000)/* (|x|>1)**+-inf = inf,0 */
|
|
|
|
return (hy>=0)? y: zero;
|
|
|
|
else /* (|x|<1)**-,+inf = inf,0 */
|
|
|
|
return (hy<0)?-y: zero;
|
1995-05-30 05:51:47 +00:00
|
|
|
}
|
1994-08-19 09:40:01 +00:00
|
|
|
if(iy==0x3f800000) { /* y is +-1 */
|
|
|
|
if(hy<0) return one/x; else return x;
|
|
|
|
}
|
|
|
|
if(hy==0x40000000) return x*x; /* y is 2 */
|
|
|
|
if(hy==0x3f000000) { /* y is 0.5 */
|
|
|
|
if(hx>=0) /* x >= +0 */
|
1997-03-09 16:29:29 +00:00
|
|
|
return __ieee754_sqrtf(x);
|
1994-08-19 09:40:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ax = fabsf(x);
|
|
|
|
/* special value of x */
|
|
|
|
if(ix==0x7f800000||ix==0||ix==0x3f800000){
|
|
|
|
z = ax; /*x is +-0,+-inf,+-1*/
|
|
|
|
if(hy<0) z = one/z; /* z = (1/|x|) */
|
|
|
|
if(hx<0) {
|
|
|
|
if(((ix-0x3f800000)|yisint)==0) {
|
|
|
|
z = (z-z)/(z-z); /* (-1)**non-int is NaN */
|
1995-05-30 05:51:47 +00:00
|
|
|
} else if(yisint==1)
|
1994-08-19 09:40:01 +00:00
|
|
|
z = -z; /* (x<0)**odd = -(|x|**odd) */
|
|
|
|
}
|
|
|
|
return z;
|
|
|
|
}
|
1995-05-30 05:51:47 +00:00
|
|
|
|
2004-06-01 19:33:30 +00:00
|
|
|
n = ((u_int32_t)hx>>31)-1;
|
|
|
|
|
1994-08-19 09:40:01 +00:00
|
|
|
/* (x<0)**(non-int) is NaN */
|
2004-06-01 19:33:30 +00:00
|
|
|
if((n|yisint)==0) return (x-x)/(x-x);
|
|
|
|
|
|
|
|
sn = one; /* s (sign of result -ve**odd) = -1 else = 1 */
|
|
|
|
if((n|(yisint-1))==0) sn = -one;/* (-ve)**(odd int) */
|
1994-08-19 09:40:01 +00:00
|
|
|
|
|
|
|
/* |y| is huge */
|
|
|
|
if(iy>0x4d000000) { /* if |y| > 2**27 */
|
|
|
|
/* over/underflow if x is not close to one */
|
Fix powf().
Summary:
From Steve Kargl:
Paul Zimmermann has identified a bug in Openlibm's powf(),
which is identical to FreeBSD's libm. Both derived from
fdlibm. https://github.com/JuliaMath/openlibm/issues/212.
Consider
% cat h.c
int
main(void)
{
float x, y, z;
x = 0x1.ffffecp-1F;
y = -0x1.000002p+27F;
z = 0x1.557a86p115F;
printf("%e %e %e <-- should be %e\n", x, y, powf(x,y), z);
return 0;
}
% cc -o h -fno-builtin h.c -lm && ./h
9.999994e-01 -1.342177e+08 inf <-- should be 5.540807e+34
Reviewers: manu
Subscribers: imp, andrew, emaste
Differential Revision: https://reviews.freebsd.org/D31865
2021-09-06 17:26:39 +00:00
|
|
|
if(ix<0x3f7ffff6) return (hy<0)? sn*huge*huge:sn*tiny*tiny;
|
2004-06-01 19:33:30 +00:00
|
|
|
if(ix>0x3f800007) return (hy>0)? sn*huge*huge:sn*tiny*tiny;
|
1995-05-30 05:51:47 +00:00
|
|
|
/* now |1-x| is tiny <= 2**-20, suffice to compute
|
1994-08-19 09:40:01 +00:00
|
|
|
log(x) by x-x^2/2+x^3/3-x^4/4 */
|
2002-06-17 15:28:59 +00:00
|
|
|
t = ax-1; /* t has 20 trailing zeros */
|
2018-07-08 16:33:58 +00:00
|
|
|
w = (t*t)*(half-t*(thrd-t*qrtr));
|
1994-08-19 09:40:01 +00:00
|
|
|
u = ivln2_h*t; /* ivln2_h has 16 sig. bits */
|
|
|
|
v = t*ivln2_l-w*ivln2;
|
|
|
|
t1 = u+v;
|
|
|
|
GET_FLOAT_WORD(is,t1);
|
|
|
|
SET_FLOAT_WORD(t1,is&0xfffff000);
|
|
|
|
t2 = v-(t1-u);
|
|
|
|
} else {
|
|
|
|
float s2,s_h,s_l,t_h,t_l;
|
|
|
|
n = 0;
|
|
|
|
/* take care subnormal number */
|
|
|
|
if(ix<0x00800000)
|
|
|
|
{ax *= two24; n -= 24; GET_FLOAT_WORD(ix,ax); }
|
|
|
|
n += ((ix)>>23)-0x7f;
|
|
|
|
j = ix&0x007fffff;
|
|
|
|
/* determine interval */
|
|
|
|
ix = j|0x3f800000; /* normalize ix */
|
|
|
|
if(j<=0x1cc471) k=0; /* |x|<sqrt(3/2) */
|
|
|
|
else if(j<0x5db3d7) k=1; /* |x|<sqrt(3) */
|
|
|
|
else {k=0;n+=1;ix -= 0x00800000;}
|
|
|
|
SET_FLOAT_WORD(ax,ix);
|
|
|
|
|
|
|
|
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
|
|
|
|
u = ax-bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
|
|
|
|
v = one/(ax+bp[k]);
|
|
|
|
s = u*v;
|
|
|
|
s_h = s;
|
|
|
|
GET_FLOAT_WORD(is,s_h);
|
|
|
|
SET_FLOAT_WORD(s_h,is&0xfffff000);
|
|
|
|
/* t_h=ax+bp[k] High */
|
2004-06-01 18:08:39 +00:00
|
|
|
is = ((ix>>1)&0xfffff000)|0x20000000;
|
|
|
|
SET_FLOAT_WORD(t_h,is+0x00400000+(k<<21));
|
1994-08-19 09:40:01 +00:00
|
|
|
t_l = ax - (t_h-bp[k]);
|
|
|
|
s_l = v*((u-s_h*t_h)-s_h*t_l);
|
|
|
|
/* compute log(ax) */
|
|
|
|
s2 = s*s;
|
|
|
|
r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
|
|
|
|
r += s_l*(s_h+s);
|
|
|
|
s2 = s_h*s_h;
|
2018-07-08 16:33:58 +00:00
|
|
|
t_h = 3+s2+r;
|
1994-08-19 09:40:01 +00:00
|
|
|
GET_FLOAT_WORD(is,t_h);
|
|
|
|
SET_FLOAT_WORD(t_h,is&0xfffff000);
|
2018-07-08 16:33:58 +00:00
|
|
|
t_l = r-((t_h-3)-s2);
|
1994-08-19 09:40:01 +00:00
|
|
|
/* u+v = s*(1+...) */
|
|
|
|
u = s_h*t_h;
|
|
|
|
v = s_l*t_h+t_l*s;
|
|
|
|
/* 2/(3log2)*(s+...) */
|
|
|
|
p_h = u+v;
|
|
|
|
GET_FLOAT_WORD(is,p_h);
|
|
|
|
SET_FLOAT_WORD(p_h,is&0xfffff000);
|
|
|
|
p_l = v-(p_h-u);
|
|
|
|
z_h = cp_h*p_h; /* cp_h+cp_l = 2/(3*log2) */
|
|
|
|
z_l = cp_l*p_h+p_l*cp+dp_l[k];
|
|
|
|
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
|
2018-07-08 16:33:58 +00:00
|
|
|
t = n;
|
1994-08-19 09:40:01 +00:00
|
|
|
t1 = (((z_h+z_l)+dp_h[k])+t);
|
|
|
|
GET_FLOAT_WORD(is,t1);
|
|
|
|
SET_FLOAT_WORD(t1,is&0xfffff000);
|
|
|
|
t2 = z_l-(((t1-t)-dp_h[k])-z_h);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
|
|
|
|
GET_FLOAT_WORD(is,y);
|
|
|
|
SET_FLOAT_WORD(y1,is&0xfffff000);
|
|
|
|
p_l = (y-y1)*t1+y*t2;
|
|
|
|
p_h = y1*t1;
|
|
|
|
z = p_l+p_h;
|
|
|
|
GET_FLOAT_WORD(j,z);
|
|
|
|
if (j>0x43000000) /* if z > 128 */
|
2004-06-01 19:33:30 +00:00
|
|
|
return sn*huge*huge; /* overflow */
|
1994-08-19 09:40:01 +00:00
|
|
|
else if (j==0x43000000) { /* if z == 128 */
|
2004-06-01 19:33:30 +00:00
|
|
|
if(p_l+ovt>z-p_h) return sn*huge*huge; /* overflow */
|
1994-08-19 09:40:01 +00:00
|
|
|
}
|
|
|
|
else if ((j&0x7fffffff)>0x43160000) /* z <= -150 */
|
2004-06-01 19:33:30 +00:00
|
|
|
return sn*tiny*tiny; /* underflow */
|
1994-08-19 09:40:01 +00:00
|
|
|
else if (j==0xc3160000){ /* z == -150 */
|
2004-06-01 19:33:30 +00:00
|
|
|
if(p_l<=z-p_h) return sn*tiny*tiny; /* underflow */
|
1994-08-19 09:40:01 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* compute 2**(p_h+p_l)
|
|
|
|
*/
|
|
|
|
i = j&0x7fffffff;
|
|
|
|
k = (i>>23)-0x7f;
|
|
|
|
n = 0;
|
|
|
|
if(i>0x3f000000) { /* if |z| > 0.5, set n = [z+0.5] */
|
|
|
|
n = j+(0x00800000>>(k+1));
|
|
|
|
k = ((n&0x7fffffff)>>23)-0x7f; /* new k for n */
|
|
|
|
SET_FLOAT_WORD(t,n&~(0x007fffff>>k));
|
|
|
|
n = ((n&0x007fffff)|0x00800000)>>(23-k);
|
|
|
|
if(j<0) n = -n;
|
|
|
|
p_h -= t;
|
1995-05-30 05:51:47 +00:00
|
|
|
}
|
1994-08-19 09:40:01 +00:00
|
|
|
t = p_l+p_h;
|
|
|
|
GET_FLOAT_WORD(is,t);
|
2004-06-01 19:03:31 +00:00
|
|
|
SET_FLOAT_WORD(t,is&0xffff8000);
|
1994-08-19 09:40:01 +00:00
|
|
|
u = t*lg2_h;
|
|
|
|
v = (p_l-(t-p_h))*lg2+t*lg2_l;
|
|
|
|
z = u+v;
|
|
|
|
w = v-(z-u);
|
|
|
|
t = z*z;
|
|
|
|
t1 = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
|
|
|
|
r = (z*t1)/(t1-two)-(w+z*w);
|
|
|
|
z = one-(r-z);
|
|
|
|
GET_FLOAT_WORD(j,z);
|
|
|
|
j += (n<<23);
|
|
|
|
if((j>>23)<=0) z = scalbnf(z,n); /* subnormal output */
|
|
|
|
else SET_FLOAT_WORD(z,j);
|
2004-06-01 19:33:30 +00:00
|
|
|
return sn*z;
|
1994-08-19 09:40:01 +00:00
|
|
|
}
|