From f77d7dfd70ab217884f2b4f8904f2c9a605ecd48 Mon Sep 17 00:00:00 2001 From: bde Date: Mon, 25 Feb 2008 22:19:17 +0000 Subject: [PATCH] Inline __ieee754__rem_pio2f(). On amd64 (A64) and i386 (A64), this gives an average speedup of about 12 cycles or 17% for 9pi/4 < |x| <= 2**19pi/2 and a smaller speedup for larger x, and a small speeddown for |x| <= 9pi/4 (only 1-2 cycles average, but that is 4%). Inlining this is less likely to bust caches than inlining the float version since it is much smaller (about 220 bytes text and rodata) and has many fewer branches. However, the float version was already large due to its manual inlining of the branches and also the polynomial evaluations. --- lib/msun/src/e_rem_pio2f.c | 3 +++ lib/msun/src/s_cosf.c | 4 ++++ lib/msun/src/s_sinf.c | 4 ++++ lib/msun/src/s_tanf.c | 4 ++++ 4 files changed, 15 insertions(+) diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c index ac197ba1ec69..2cdf78a05ab0 100644 --- a/lib/msun/src/e_rem_pio2f.c +++ b/lib/msun/src/e_rem_pio2f.c @@ -41,6 +41,9 @@ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1t = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ +#ifdef INLINE_REM_PIO2F +extern inline +#endif int __ieee754_rem_pio2f(float x, double *y) { diff --git a/lib/msun/src/s_cosf.c b/lib/msun/src/s_cosf.c index 3a3b8918cae8..b701fd27ac57 100644 --- a/lib/msun/src/s_cosf.c +++ b/lib/msun/src/s_cosf.c @@ -17,10 +17,14 @@ #include __FBSDID("$FreeBSD$"); +#include + #include "math.h" #define INLINE_KERNEL_COSDF #define INLINE_KERNEL_SINDF +#define INLINE_REM_PIO2F #include "math_private.h" +#include "e_rem_pio2f.c" #include "k_cosf.c" #include "k_sinf.c" diff --git a/lib/msun/src/s_sinf.c b/lib/msun/src/s_sinf.c index 69fb3842f11b..41b5dc11827a 100644 --- a/lib/msun/src/s_sinf.c +++ b/lib/msun/src/s_sinf.c @@ -17,10 +17,14 @@ #include __FBSDID("$FreeBSD$"); +#include + #include "math.h" #define INLINE_KERNEL_COSDF #define INLINE_KERNEL_SINDF +#define INLINE_REM_PIO2F #include "math_private.h" +#include "e_rem_pio2f.c" #include "k_cosf.c" #include "k_sinf.c" diff --git a/lib/msun/src/s_tanf.c b/lib/msun/src/s_tanf.c index 3d8ee47ebc4e..4fe8c17c0acd 100644 --- a/lib/msun/src/s_tanf.c +++ b/lib/msun/src/s_tanf.c @@ -17,9 +17,13 @@ #include __FBSDID("$FreeBSD$"); +#include + #include "math.h" #define INLINE_KERNEL_TANDF +#define INLINE_REM_PIO2F #include "math_private.h" +#include "e_rem_pio2f.c" #include "k_tanf.c" /* Small multiples of pi/2 rounded to double precision. */