k_tanf.c but with different details. The polynomial is odd with degree 13 for tanf() and odd with degree 9 for sinf(), so the details are not very different for sinf() -- the term with the x**11 and x**13 coefficients goes awaym and (mysteriously) it helps to do the evaluation of w = z*z early although moving it later was a key optimization for tanf(). The details are different but simpler for cosf() because the polynomial is even and of lower degree. On Athlons, for uniformly distributed args in [-2pi, 2pi], this gives an optimization of about 4 cycles (10%) in most cases (13% for sinf() on AXP, but 0% for cosf() with gcc-3.3 -O1 on AXP). The best case (sinf() with gcc-3.4 -O1 -fcaller-saves on A64) now takes 33-39 cycles (was 37-45 cycles). Hardware sinf takes 74-129 cycles. Despite being fine tuned for Athlons, the optimization is even larger on some other arches (about 15% on ia64 (pluto2) and 20% on alpha (beast) with gcc -O2 -fomit-frame-pointer).
48 lines
1.2 KiB
C
48 lines
1.2 KiB
C
/* k_sinf.c -- float version of k_sin.c
|
|
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
|
* Optimized by Bruce D. Evans.
|
|
*/
|
|
|
|
/*
|
|
* ====================================================
|
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
|
*
|
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
|
* Permission to use, copy, modify, and distribute this
|
|
* software is freely granted, provided that this notice
|
|
* is preserved.
|
|
* ====================================================
|
|
*/
|
|
|
|
#ifndef INLINE_KERNEL_SINDF
|
|
#ifndef lint
|
|
static char rcsid[] = "$FreeBSD$";
|
|
#endif
|
|
#endif
|
|
|
|
#include "math.h"
|
|
#include "math_private.h"
|
|
|
|
/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
|
|
static const double
|
|
S1 = -0x15555554cbac77.0p-55, /* -0.166666666416265235595 */
|
|
S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */
|
|
S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */
|
|
S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */
|
|
|
|
#ifdef INLINE_KERNEL_SINDF
|
|
extern inline
|
|
#endif
|
|
float
|
|
__kernel_sindf(double x)
|
|
{
|
|
double r, s, w, z;
|
|
|
|
/* Try to optimize for parallel evaluation as in k_tanf.c. */
|
|
z = x*x;
|
|
w = z*z;
|
|
r = S3+z*S4;
|
|
s = z*x;
|
|
return (x + s*(S1+z*S2)) + s*w*r;
|
|
}
|