From 59aad933ab2d68f9c1b68caf2344f1e81f0a13f4 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Mon, 28 Nov 2005 04:58:57 +0000 Subject: [PATCH] Use only double precision for "kernel" cosf and sinf (except for returning float). The functions are renamed from __kernel_{cos,sin}f() to __kernel_{cos,sin}df() so that misuses of them will cause link errors and not crashes. This version is an almost-routine translation with no special optimizations for accuracy or efficiency. The not-quite-routine part is that in __kernel_cosf(), regenerating the minimax polynomial with double precision coefficients gives a coefficient for the x**2 term that is not quite -0.5, so the literal 0.5 in the code and the related `hz' variable need to be modified; also, the special code for reducing the error in 1.0-x**2*0.5 is no longer needed, so it is convenient to adjust all the logic for the x**2 term a little. Note that without extra precision, it would be very bad to use a coefficient of other than -0.5 for the x**2 term -- the old version depends on multiplication by -0.5 being infinitely precise so as not to need even more special code for reducing the error in 1-x**2*0.5. This gives an unimportant increase in accuracy, from ~0.8 to ~0.501 ulps. Almost all of the error is from the final rounding step, since the choice of the minimax polynomials so that their contribution to the error is a bit less than 0.5 ulps just happens to give contributions that are significantly less (~.001 ulps). An Athlons, for uniformly distributed args in [-2pi, 2pi], this gives overall speed increases in the 10-20% range, despite giving a speed decrease of typically 19% (from 31 cycles up to 37) for sinf() on args in [-pi/4, pi/4]. --- lib/msun/src/e_lgammaf_r.c | 12 ++++++------ lib/msun/src/k_cosf.c | 23 +++++++++++------------ lib/msun/src/k_sinf.c | 24 +++++++++++------------- lib/msun/src/math_private.h | 4 ++-- lib/msun/src/s_cosf.c | 26 +++++++------------------- lib/msun/src/s_sinf.c | 26 +++++++------------------- 6 files changed, 44 insertions(+), 71 deletions(-) diff --git a/lib/msun/src/e_lgammaf_r.c b/lib/msun/src/e_lgammaf_r.c index 1c12eec42735..5968b83c7142 100644 --- a/lib/msun/src/e_lgammaf_r.c +++ b/lib/msun/src/e_lgammaf_r.c @@ -98,7 +98,7 @@ static const float zero= 0.0000000000e+00; GET_FLOAT_WORD(ix,x); ix &= 0x7fffffff; - if(ix<0x3e800000) return __kernel_sinf(pi*x,zero,0); + if(ix<0x3e800000) return __kernel_sindf(pi*x); y = -x; /* x is assume negative */ /* @@ -122,14 +122,14 @@ static const float zero= 0.0000000000e+00; } } switch (n) { - case 0: y = __kernel_sinf(pi*y,zero,0); break; + case 0: y = __kernel_sindf(pi*y); break; case 1: - case 2: y = __kernel_cosf(pi*((float)0.5-y),zero); break; + case 2: y = __kernel_cosdf(pi*((float)0.5-y)); break; case 3: - case 4: y = __kernel_sinf(pi*(one-y),zero,0); break; + case 4: y = __kernel_sindf(pi*(one-y)); break; case 5: - case 6: y = -__kernel_cosf(pi*(y-(float)1.5),zero); break; - default: y = __kernel_sinf(pi*(y-(float)2.0),zero,0); break; + case 6: y = -__kernel_cosdf(pi*(y-(float)1.5)); break; + default: y = __kernel_sindf(pi*(y-(float)2.0)); break; } return -y; } diff --git a/lib/msun/src/k_cosf.c b/lib/msun/src/k_cosf.c index 4e04fd800755..a3bf520af0f7 100644 --- a/lib/msun/src/k_cosf.c +++ b/lib/msun/src/k_cosf.c @@ -14,7 +14,7 @@ * ==================================================== */ -#ifndef INLINE_KERNEL_COSF +#ifndef INLINE_KERNEL_COSDF #ifndef lint static char rcsid[] = "$FreeBSD$"; #endif @@ -23,24 +23,23 @@ static char rcsid[] = "$FreeBSD$"; #include "math.h" #include "math_private.h" -/* |cos(x) - c(x)| < 2**-33.1 (~[-9.39e-11, 1.083e-10]). */ -static const float +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ +static const double one = 1.0, -C1 = 0xaaaaa5.0p-28, /* 0.041666645557 */ -C2 = -0xb60615.0p-33, /* -0.0013887310633 */ -C3 = 0xccf47d.0p-39; /* 0.000024432542887 */ +C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */ +C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */ +C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */ +C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */ -#ifdef INLINE_KERNEL_COSF +#ifdef INLINE_KERNEL_COSDF extern inline #endif float -__kernel_cosf(float x, float y) +__kernel_cosdf(double x) { - float hz,z,r,w; + double z,r; z = x*x; r = z*(C1+z*(C2+z*C3)); - hz = (float)0.5*z; - w = one-hz; - return w + (((one-w)-hz) + (z*r-x*y)); + return (one+z*C0) + z*r; } diff --git a/lib/msun/src/k_sinf.c b/lib/msun/src/k_sinf.c index ad8d9de4ed7a..7009f821de6e 100644 --- a/lib/msun/src/k_sinf.c +++ b/lib/msun/src/k_sinf.c @@ -14,7 +14,7 @@ * ==================================================== */ -#ifndef INLINE_KERNEL_SINF +#ifndef INLINE_KERNEL_SINDF #ifndef lint static char rcsid[] = "$FreeBSD$"; #endif @@ -23,25 +23,23 @@ static char rcsid[] = "$FreeBSD$"; #include "math.h" #include "math_private.h" -/* |sin(x)/x - s(x)| < 2**-32.5 (~[-1.57e-10, 1.572e-10]). */ -static const float -half = 0.5, -S1 = -0xaaaaab.0p-26, /* -0.16666667163 */ -S2 = 0x8888bb.0p-30, /* 0.0083333803341 */ -S3 = -0xd02de1.0p-36, /* -0.00019853517006 */ -S4 = 0xbe6dbe.0p-42; /* 0.0000028376084629 */ +/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ +static const double +S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */ +S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */ +S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */ +S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */ -#ifdef INLINE_KERNEL_SINF +#ifdef INLINE_KERNEL_SINDF extern inline #endif float -__kernel_sinf(float x, float y, int iy) +__kernel_sindf(double x) { - float z,r,v; + double z,r,v; z = x*x; v = z*x; r = S2+z*(S3+z*S4); - if(iy==0) return x+v*(S1+z*r); - else return x-((z*(half*y-v*r)-y)-v*S1); + return x+v*(S1+z*r); } diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index 257ff63fa2a4..9b38480d788d 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -264,8 +264,8 @@ int __kernel_rem_pio2(double*,double*,int,int,int,const int*); /* float versions of fdlibm kernel functions */ int __ieee754_rem_pio2f(float,float*); -float __kernel_sinf(float,float,int); -float __kernel_cosf(float,float); +float __kernel_sindf(double); +float __kernel_cosdf(double); float __kernel_tandf(double,int); int __kernel_rem_pio2f(float*,float*,int,int,int,const int*); diff --git a/lib/msun/src/s_cosf.c b/lib/msun/src/s_cosf.c index ced11eb79b27..aa90ea258c23 100644 --- a/lib/msun/src/s_cosf.c +++ b/lib/msun/src/s_cosf.c @@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$"; #endif #include "math.h" -#define INLINE_KERNEL_COSF -#define INLINE_KERNEL_SINF +#define INLINE_KERNEL_COSDF +#define INLINE_KERNEL_SINDF #include "math_private.h" #include "k_cosf.c" #include "k_sinf.c" @@ -31,18 +31,6 @@ c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */ c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */ c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ -static inline float -__kernel_cosdf(double x) -{ - return __kernel_cosf((float)x, x - (float)x); -} - -static inline float -__kernel_sindf(double x) -{ - return __kernel_sinf((float)x, x - (float)x, 1); -} - float cosf(float x) { @@ -55,7 +43,7 @@ cosf(float x) if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ if(ix<0x39800000) /* |x| < 2**-12 */ if(((int)x)==0) return 1.0; /* 1 with inexact if x != 0 */ - return __kernel_cosf(x,0.0); + return __kernel_cosdf(x); } if(ix<=0x407b53d1) { /* |x| <= ~5*pi/4 */ if(ix<=0x4016cbe3) /* |x| <= ~3pi/4 */ @@ -77,11 +65,11 @@ cosf(float x) else { n = __ieee754_rem_pio2f(x,y); switch(n&3) { - case 0: return __kernel_cosf(y[0],y[1]); - case 1: return -__kernel_sinf(y[0],y[1],1); - case 2: return -__kernel_cosf(y[0],y[1]); + case 0: return __kernel_cosdf((double)y[0]+y[1]); + case 1: return -__kernel_sindf((double)y[0]+y[1]); + case 2: return -__kernel_cosdf((double)y[0]+y[1]); default: - return __kernel_sinf(y[0],y[1],1); + return __kernel_sindf((double)y[0]+y[1]); } } } diff --git a/lib/msun/src/s_sinf.c b/lib/msun/src/s_sinf.c index 1e4270e1688d..4986d9a27c51 100644 --- a/lib/msun/src/s_sinf.c +++ b/lib/msun/src/s_sinf.c @@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$"; #endif #include "math.h" -#define INLINE_KERNEL_COSF -#define INLINE_KERNEL_SINF +#define INLINE_KERNEL_COSDF +#define INLINE_KERNEL_SINDF #include "math_private.h" #include "k_cosf.c" #include "k_sinf.c" @@ -31,18 +31,6 @@ s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */ s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */ s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ -static inline float -__kernel_cosdf(double x) -{ - return __kernel_cosf((float)x, x - (float)x); -} - -static inline float -__kernel_sindf(double x) -{ - return __kernel_sinf((float)x, x - (float)x, 1); -} - float sinf(float x) { @@ -55,7 +43,7 @@ sinf(float x) if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ if(ix<0x39800000) /* |x| < 2**-12 */ if(((int)x)==0) return x; /* x with inexact if x != 0 */ - return __kernel_sinf(x,0.0,0); + return __kernel_sindf(x); } if(ix<=0x407b53d1) { /* |x| <= ~5*pi/4 */ if(ix<=0x4016cbe3) { /* |x| <= ~3pi/4 */ @@ -83,11 +71,11 @@ sinf(float x) else { n = __ieee754_rem_pio2f(x,y); switch(n&3) { - case 0: return __kernel_sinf(y[0],y[1],1); - case 1: return __kernel_cosf(y[0],y[1]); - case 2: return -__kernel_sinf(y[0],y[1],1); + case 0: return __kernel_sindf((double)y[0]+y[1]); + case 1: return __kernel_cosdf((double)y[0]+y[1]); + case 2: return -__kernel_sindf((double)y[0]+y[1]); default: - return -__kernel_cosf(y[0],y[1]); + return -__kernel_cosdf((double)y[0]+y[1]); } } }