Use only double precision for "kernel" cosf and sinf (except for
returning float). The functions are renamed from __kernel_{cos,sin}f() to __kernel_{cos,sin}df() so that misuses of them will cause link errors and not crashes. This version is an almost-routine translation with no special optimizations for accuracy or efficiency. The not-quite-routine part is that in __kernel_cosf(), regenerating the minimax polynomial with double precision coefficients gives a coefficient for the x**2 term that is not quite -0.5, so the literal 0.5 in the code and the related `hz' variable need to be modified; also, the special code for reducing the error in 1.0-x**2*0.5 is no longer needed, so it is convenient to adjust all the logic for the x**2 term a little. Note that without extra precision, it would be very bad to use a coefficient of other than -0.5 for the x**2 term -- the old version depends on multiplication by -0.5 being infinitely precise so as not to need even more special code for reducing the error in 1-x**2*0.5. This gives an unimportant increase in accuracy, from ~0.8 to ~0.501 ulps. Almost all of the error is from the final rounding step, since the choice of the minimax polynomials so that their contribution to the error is a bit less than 0.5 ulps just happens to give contributions that are significantly less (~.001 ulps). An Athlons, for uniformly distributed args in [-2pi, 2pi], this gives overall speed increases in the 10-20% range, despite giving a speed decrease of typically 19% (from 31 cycles up to 37) for sinf() on args in [-pi/4, pi/4].
This commit is contained in:
parent
75b4a96462
commit
8fdb019b17
@ -98,7 +98,7 @@ static const float zero= 0.0000000000e+00;
|
||||
GET_FLOAT_WORD(ix,x);
|
||||
ix &= 0x7fffffff;
|
||||
|
||||
if(ix<0x3e800000) return __kernel_sinf(pi*x,zero,0);
|
||||
if(ix<0x3e800000) return __kernel_sindf(pi*x);
|
||||
y = -x; /* x is assume negative */
|
||||
|
||||
/*
|
||||
@ -122,14 +122,14 @@ static const float zero= 0.0000000000e+00;
|
||||
}
|
||||
}
|
||||
switch (n) {
|
||||
case 0: y = __kernel_sinf(pi*y,zero,0); break;
|
||||
case 0: y = __kernel_sindf(pi*y); break;
|
||||
case 1:
|
||||
case 2: y = __kernel_cosf(pi*((float)0.5-y),zero); break;
|
||||
case 2: y = __kernel_cosdf(pi*((float)0.5-y)); break;
|
||||
case 3:
|
||||
case 4: y = __kernel_sinf(pi*(one-y),zero,0); break;
|
||||
case 4: y = __kernel_sindf(pi*(one-y)); break;
|
||||
case 5:
|
||||
case 6: y = -__kernel_cosf(pi*(y-(float)1.5),zero); break;
|
||||
default: y = __kernel_sinf(pi*(y-(float)2.0),zero,0); break;
|
||||
case 6: y = -__kernel_cosdf(pi*(y-(float)1.5)); break;
|
||||
default: y = __kernel_sindf(pi*(y-(float)2.0)); break;
|
||||
}
|
||||
return -y;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#ifndef INLINE_KERNEL_COSF
|
||||
#ifndef INLINE_KERNEL_COSDF
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$FreeBSD$";
|
||||
#endif
|
||||
@ -23,24 +23,23 @@ static char rcsid[] = "$FreeBSD$";
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
/* |cos(x) - c(x)| < 2**-33.1 (~[-9.39e-11, 1.083e-10]). */
|
||||
static const float
|
||||
/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
|
||||
static const double
|
||||
one = 1.0,
|
||||
C1 = 0xaaaaa5.0p-28, /* 0.041666645557 */
|
||||
C2 = -0xb60615.0p-33, /* -0.0013887310633 */
|
||||
C3 = 0xccf47d.0p-39; /* 0.000024432542887 */
|
||||
C0 = -0x1ffffffd0c5e81.0p-54, /* -0.499999997251031003120 */
|
||||
C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */
|
||||
C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */
|
||||
C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */
|
||||
|
||||
#ifdef INLINE_KERNEL_COSF
|
||||
#ifdef INLINE_KERNEL_COSDF
|
||||
extern inline
|
||||
#endif
|
||||
float
|
||||
__kernel_cosf(float x, float y)
|
||||
__kernel_cosdf(double x)
|
||||
{
|
||||
float hz,z,r,w;
|
||||
double z,r;
|
||||
|
||||
z = x*x;
|
||||
r = z*(C1+z*(C2+z*C3));
|
||||
hz = (float)0.5*z;
|
||||
w = one-hz;
|
||||
return w + (((one-w)-hz) + (z*r-x*y));
|
||||
return (one+z*C0) + z*r;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#ifndef INLINE_KERNEL_SINF
|
||||
#ifndef INLINE_KERNEL_SINDF
|
||||
#ifndef lint
|
||||
static char rcsid[] = "$FreeBSD$";
|
||||
#endif
|
||||
@ -23,25 +23,23 @@ static char rcsid[] = "$FreeBSD$";
|
||||
#include "math.h"
|
||||
#include "math_private.h"
|
||||
|
||||
/* |sin(x)/x - s(x)| < 2**-32.5 (~[-1.57e-10, 1.572e-10]). */
|
||||
static const float
|
||||
half = 0.5,
|
||||
S1 = -0xaaaaab.0p-26, /* -0.16666667163 */
|
||||
S2 = 0x8888bb.0p-30, /* 0.0083333803341 */
|
||||
S3 = -0xd02de1.0p-36, /* -0.00019853517006 */
|
||||
S4 = 0xbe6dbe.0p-42; /* 0.0000028376084629 */
|
||||
/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
|
||||
static const double
|
||||
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
|
||||
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
|
||||
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
|
||||
S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
|
||||
|
||||
#ifdef INLINE_KERNEL_SINF
|
||||
#ifdef INLINE_KERNEL_SINDF
|
||||
extern inline
|
||||
#endif
|
||||
float
|
||||
__kernel_sinf(float x, float y, int iy)
|
||||
__kernel_sindf(double x)
|
||||
{
|
||||
float z,r,v;
|
||||
double z,r,v;
|
||||
|
||||
z = x*x;
|
||||
v = z*x;
|
||||
r = S2+z*(S3+z*S4);
|
||||
if(iy==0) return x+v*(S1+z*r);
|
||||
else return x-((z*(half*y-v*r)-y)-v*S1);
|
||||
return x+v*(S1+z*r);
|
||||
}
|
||||
|
@ -264,8 +264,8 @@ int __kernel_rem_pio2(double*,double*,int,int,int,const int*);
|
||||
|
||||
/* float versions of fdlibm kernel functions */
|
||||
int __ieee754_rem_pio2f(float,float*);
|
||||
float __kernel_sinf(float,float,int);
|
||||
float __kernel_cosf(float,float);
|
||||
float __kernel_sindf(double);
|
||||
float __kernel_cosdf(double);
|
||||
float __kernel_tandf(double,int);
|
||||
int __kernel_rem_pio2f(float*,float*,int,int,int,const int*);
|
||||
|
||||
|
@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$";
|
||||
#endif
|
||||
|
||||
#include "math.h"
|
||||
#define INLINE_KERNEL_COSF
|
||||
#define INLINE_KERNEL_SINF
|
||||
#define INLINE_KERNEL_COSDF
|
||||
#define INLINE_KERNEL_SINDF
|
||||
#include "math_private.h"
|
||||
#include "k_cosf.c"
|
||||
#include "k_sinf.c"
|
||||
@ -31,18 +31,6 @@ c2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
|
||||
c3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
|
||||
c4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
|
||||
|
||||
static inline float
|
||||
__kernel_cosdf(double x)
|
||||
{
|
||||
return __kernel_cosf((float)x, x - (float)x);
|
||||
}
|
||||
|
||||
static inline float
|
||||
__kernel_sindf(double x)
|
||||
{
|
||||
return __kernel_sinf((float)x, x - (float)x, 1);
|
||||
}
|
||||
|
||||
float
|
||||
cosf(float x)
|
||||
{
|
||||
@ -55,7 +43,7 @@ cosf(float x)
|
||||
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
|
||||
if(ix<0x39800000) /* |x| < 2**-12 */
|
||||
if(((int)x)==0) return 1.0; /* 1 with inexact if x != 0 */
|
||||
return __kernel_cosf(x,0.0);
|
||||
return __kernel_cosdf(x);
|
||||
}
|
||||
if(ix<=0x407b53d1) { /* |x| <= ~5*pi/4 */
|
||||
if(ix<=0x4016cbe3) /* |x| <= ~3pi/4 */
|
||||
@ -77,11 +65,11 @@ cosf(float x)
|
||||
else {
|
||||
n = __ieee754_rem_pio2f(x,y);
|
||||
switch(n&3) {
|
||||
case 0: return __kernel_cosf(y[0],y[1]);
|
||||
case 1: return -__kernel_sinf(y[0],y[1],1);
|
||||
case 2: return -__kernel_cosf(y[0],y[1]);
|
||||
case 0: return __kernel_cosdf((double)y[0]+y[1]);
|
||||
case 1: return -__kernel_sindf((double)y[0]+y[1]);
|
||||
case 2: return -__kernel_cosdf((double)y[0]+y[1]);
|
||||
default:
|
||||
return __kernel_sinf(y[0],y[1],1);
|
||||
return __kernel_sindf((double)y[0]+y[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,8 +18,8 @@ static char rcsid[] = "$FreeBSD$";
|
||||
#endif
|
||||
|
||||
#include "math.h"
|
||||
#define INLINE_KERNEL_COSF
|
||||
#define INLINE_KERNEL_SINF
|
||||
#define INLINE_KERNEL_COSDF
|
||||
#define INLINE_KERNEL_SINDF
|
||||
#include "math_private.h"
|
||||
#include "k_cosf.c"
|
||||
#include "k_sinf.c"
|
||||
@ -31,18 +31,6 @@ s2pio2 = 2*M_PI_2, /* 0x400921FB, 0x54442D18 */
|
||||
s3pio2 = 3*M_PI_2, /* 0x4012D97C, 0x7F3321D2 */
|
||||
s4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
|
||||
|
||||
static inline float
|
||||
__kernel_cosdf(double x)
|
||||
{
|
||||
return __kernel_cosf((float)x, x - (float)x);
|
||||
}
|
||||
|
||||
static inline float
|
||||
__kernel_sindf(double x)
|
||||
{
|
||||
return __kernel_sinf((float)x, x - (float)x, 1);
|
||||
}
|
||||
|
||||
float
|
||||
sinf(float x)
|
||||
{
|
||||
@ -55,7 +43,7 @@ sinf(float x)
|
||||
if(ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
|
||||
if(ix<0x39800000) /* |x| < 2**-12 */
|
||||
if(((int)x)==0) return x; /* x with inexact if x != 0 */
|
||||
return __kernel_sinf(x,0.0,0);
|
||||
return __kernel_sindf(x);
|
||||
}
|
||||
if(ix<=0x407b53d1) { /* |x| <= ~5*pi/4 */
|
||||
if(ix<=0x4016cbe3) { /* |x| <= ~3pi/4 */
|
||||
@ -83,11 +71,11 @@ sinf(float x)
|
||||
else {
|
||||
n = __ieee754_rem_pio2f(x,y);
|
||||
switch(n&3) {
|
||||
case 0: return __kernel_sinf(y[0],y[1],1);
|
||||
case 1: return __kernel_cosf(y[0],y[1]);
|
||||
case 2: return -__kernel_sinf(y[0],y[1],1);
|
||||
case 0: return __kernel_sindf((double)y[0]+y[1]);
|
||||
case 1: return __kernel_cosdf((double)y[0]+y[1]);
|
||||
case 2: return -__kernel_sindf((double)y[0]+y[1]);
|
||||
default:
|
||||
return -__kernel_cosf(y[0],y[1]);
|
||||
return -__kernel_cosdf((double)y[0]+y[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user