From 046e2d5db1e8afd2d09ea28e5d2a7550535d4b77 Mon Sep 17 00:00:00 2001 From: Steve Kargl Date: Fri, 5 Nov 2021 04:04:01 +0200 Subject: [PATCH] Implementations of cexpl() The change implements cexpl() for both ld80 and ld128 architectures. Testing was done on x86_64 and aarch64 systems. Along the way sincos[fl]() use an optimization that reduces the argument to being done one rather than twice. This optimization actually pointed to a bug in the ld128 version of sincosl(), which is now fixed. In addition, the minmax polynomial coefficients for sincosl() have been updated. A concise log of the file-by-file changes follows. * include/complex.h: . Add a prototype for cexpl(). * lib/msun/Makefile: . Add s_cexpl.c to the build. . Setup a link for cexpl.3 to cexp.3. * lib/msun/Symbol.map: . Expose cexpl symbol in libm shared library. * lib/msun/ld128/s_cexpl.c: * Implementation of cexpl() for 128-bit long double architectures. Tested on an aarch64 system. * lib/msun/ld80/s_cexpl.c: * Implementation of cexpl() for Intel 80-bit long double. * lib/msun/man/cexp.3: . Document cexpl(). * lib/msun/man/complex.3: . Add a BUGS section about cpow[fl]. * lib/msun/src/s_cexp.c: . Include float.h for weak references on 53-bit long double targets. . Use sincos() to reduce argument reduction cost. * lib/msun/src/s_cexpf.c: . Use sincosf() to reduce argument reduction cost. * lib/msun/src/k_sincosl.h: . Catch up with the new minmax polynomial coefficients for the kernel for the 128-bit cosl() implementation. . BUG FIX: *cs was used where *sn should have been. This means that sinl() was no computed correctly when iy != 0. * lib/msun/src/s_cosl.c: . Include fpmath.h to get access to IEEEl2bits. . Replace M_PI_4 with pio4, a 64-bit or 113-bit approximation for pi / 4. PR: 216862 MFC after: 1 week --- include/complex.h | 2 + lib/msun/Makefile | 4 +- lib/msun/Symbol.map | 1 + lib/msun/ld128/s_cexpl.c | 94 ++++++++++++++++++++++++++++++++++ lib/msun/ld80/s_cexpl.c | 107 +++++++++++++++++++++++++++++++++++++++ lib/msun/man/cexp.3 | 17 ++++--- lib/msun/man/complex.3 | 8 ++- lib/msun/src/k_sincosl.h | 29 ++++++----- lib/msun/src/s_cexp.c | 16 ++++-- lib/msun/src/s_cexpf.c | 11 ++-- lib/msun/src/s_cosl.c | 7 ++- 11 files changed, 265 insertions(+), 31 deletions(-) create mode 100644 lib/msun/ld128/s_cexpl.c create mode 100644 lib/msun/ld80/s_cexpl.c diff --git a/include/complex.h b/include/complex.h index 892bc55e5145..c31c15d9da4b 100644 --- a/include/complex.h +++ b/include/complex.h @@ -98,6 +98,8 @@ double complex ccosh(double complex); float complex ccoshf(float complex); double complex cexp(double complex); float complex cexpf(float complex); +long double complex + cexpl(long double complex); double cimag(double complex) __pure2; float cimagf(float complex) __pure2; long double cimagl(long double complex) __pure2; diff --git a/lib/msun/Makefile b/lib/msun/Makefile index 1d94e371e61f..d7c0e2f88358 100644 --- a/lib/msun/Makefile +++ b/lib/msun/Makefile @@ -117,7 +117,7 @@ COMMON_SRCS+= catrigl.c \ e_lgammal.c e_lgammal_r.c e_powl.c \ e_remainderl.c e_sinhl.c e_sqrtl.c \ invtrig.c k_cosl.c k_sinl.c k_tanl.c \ - s_asinhl.c s_atanl.c s_cbrtl.c s_ceill.c \ + s_asinhl.c s_atanl.c s_cbrtl.c s_ceill.c s_cexpl.c \ s_clogl.c s_cosl.c s_cospil.c s_cprojl.c \ s_csqrtl.c s_erfl.c s_exp2l.c s_expl.c s_floorl.c s_fmal.c \ s_fmaxl.c s_fminl.c s_frexpl.c s_logbl.c s_logl.c s_nanl.c \ @@ -189,7 +189,7 @@ MLINKS+=ccos.3 ccosf.3 ccos.3 csin.3 ccos.3 csinf.3 ccos.3 ctan.3 ccos.3 ctanf.3 MLINKS+=ccosh.3 ccoshf.3 ccosh.3 csinh.3 ccosh.3 csinhf.3 \ ccosh.3 ctanh.3 ccosh.3 ctanhf.3 MLINKS+=ceil.3 ceilf.3 ceil.3 ceill.3 -MLINKS+=cexp.3 cexpf.3 +MLINKS+=cexp.3 cexpf.3 cexp.3 cexpl.3 MLINKS+=cimag.3 cimagf.3 cimag.3 cimagl.3 \ cimag.3 conj.3 cimag.3 conjf.3 cimag.3 conjl.3 \ cimag.3 cproj.3 cimag.3 cprojf.3 cimag.3 cprojl.3 \ diff --git a/lib/msun/Symbol.map b/lib/msun/Symbol.map index 7229e7ef31fd..8650d56eb9d5 100644 --- a/lib/msun/Symbol.map +++ b/lib/msun/Symbol.map @@ -307,6 +307,7 @@ FBSD_1.5 { /* First added in 14.0-CURRENT */ FBSD_1.7 { + cexpl; cospi; cospif; cospil; diff --git a/lib/msun/ld128/s_cexpl.c b/lib/msun/ld128/s_cexpl.c new file mode 100644 index 000000000000..24f5e3792115 --- /dev/null +++ b/lib/msun/ld128/s_cexpl.c @@ -0,0 +1,94 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include "fpmath.h" +#include "math_private.h" +#include "k_expl.h" + +/* XXX cexpl() should be converted to use bits likeo src/s_cexp.c. */ + +static const long double +cexp_ovfl = 2.27892930024498818830197576893019292e+04L, +exp_ovfl = 1.13565234062941439494919310779707649e+04L; + +long double complex +cexpl(long double complex z) +{ + long double c, exp_x, s, x, y; + + x = creall(z); + y = cimagl(z); + + /* cexp(x + I 0) = exp(x) + I 0 */ + if (y == 0) + return (CMPLXL(expl(x), y)); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if (x == 0) { + sincosl(y, &s, &c); + return (CMPLXL(c, s)); + } + + if (!isfinite(y)) { + if (isfinite(x) || isnan(x)) { + /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ + return (CMPLXL(y - y, y - y)); + } else if (isinf(x) && copysignl(1.L, x) < 0) { + /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ + return (CMPLXL(0.0, 0.0)); + } else { + /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ + return (CMPLXL(x, y - y)); + } + } + + if (x > exp_ovfl && x < cexp_ovfl) { + /* + * x is between exp_ovfl and cexp_ovfl, so we must scale to + * avoid overflow in exp(x). + */ + return (__ldexp_cexpl(z, 0)); + } else { + /* + * Cases covered here: + * - x < exp_ovfl and exp(x) won't overflow (common case) + * - x > cexp_ovfl, so exp(x) * s overflows for all s > 0 + * - x = +-Inf (generated by exp()) + * - x = NaN (spurious inexact exception from y) + */ + exp_x = expl(x); + sincosl(y, &s, &c); + return (CMPLXL(exp_x * c, exp_x * s)); + } +} diff --git a/lib/msun/ld80/s_cexpl.c b/lib/msun/ld80/s_cexpl.c new file mode 100644 index 000000000000..5cc35f6d2514 --- /dev/null +++ b/lib/msun/ld80/s_cexpl.c @@ -0,0 +1,107 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * src/s_cexp.c converted to long double complex by Steven G. Kargl + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#ifdef __i386__ +#include +#endif + +#include "fpmath.h" +#include "math.h" +#include "math_private.h" +#include "k_expl.h" + +long double complex +cexpl (long double complex z) +{ + long double c, exp_x, s, x, y; + uint64_t lx, ly; + uint16_t hx, hy; + + ENTERI(); + + x = creall(z); + y = cimagl(z); + + EXTRACT_LDBL80_WORDS(hy, ly, y); + hy &= 0x7fff; + + /* cexp(x + I 0) = exp(x) + I 0 */ + if ((hy | ly) == 0) + RETURNI(CMPLXL(expl(x), y)); + EXTRACT_LDBL80_WORDS(hx, lx, x); + /* cexp(0 + I y) = cos(y) + I sin(y) */ + if (((hx & 0x7fff) | lx) == 0) { + sincosl(y, &s, &c); + RETURNI(CMPLXL(c, s)); + } + + if (hy >= 0x7fff) { + if ((hx & 0x7fff) < 0x7fff || ((hx & 0x7fff) == 0x7fff && + (lx & 0x7fffffffffffffffULL) != 0)) { + /* cexp(finite|NaN +- I Inf|NaN) = NaN + I NaN */ + RETURNI(CMPLXL(y - y, y - y)); + } else if (hx & 0x8000) { + /* cexp(-Inf +- I Inf|NaN) = 0 + I 0 */ + RETURNI(CMPLXL(0.0, 0.0)); + } else { + /* cexp(+Inf +- I Inf|NaN) = Inf + I NaN */ + RETURNI(CMPLXL(x, y - y)); + } + } + + /* + * exp_ovfl = 11356.5234062941439497 + * cexp_ovfl = 22755.3287906024445633 + */ + if ((hx == 0x400c && lx > 0xb17217f7d1cf79acULL) || + (hx == 0x400d && lx < 0xb1c6a8573de9768cULL)) { + /* + * x is between exp_ovfl and cexp_ovfl, so we must scale to + * avoid overflow in exp(x). + */ + RETURNI(__ldexp_cexpl(z, 0)); + } else { + /* + * Cases covered here: + * - x < exp_ovfl and exp(x) won't overflow (common case) + * - x > cexp_ovfl, so exp(x) * s overflows for all s > 0 + * - x = +-Inf (generated by exp()) + * - x = NaN (spurious inexact exception from y) + */ + exp_x = expl(x); + sincosl(y, &s, &c); + RETURNI(CMPLXL(exp_x * c, exp_x * s)); + } +} diff --git a/lib/msun/man/cexp.3 b/lib/msun/man/cexp.3 index 776e6cee823e..27ab3e9c2098 100644 --- a/lib/msun/man/cexp.3 +++ b/lib/msun/man/cexp.3 @@ -24,12 +24,13 @@ .\" .\" $FreeBSD$ .\" -.Dd March 6, 2011 +.Dd November 3, 2021 .Dt CEXP 3 .Os .Sh NAME .Nm cexp , -.Nm cexpf +.Nm cexpf , +.Nm cexpl .Nd complex exponential functions .Sh LIBRARY .Lb libm @@ -39,11 +40,14 @@ .Fn cexp "double complex z" .Ft float complex .Fn cexpf "float complex z" +.Ft long double complex +.Fn cexpl "long double complex z" .Sh DESCRIPTION The -.Fn cexp +.Fn cexp , +.Fn cexpf , and -.Fn cexpf +.Fn cexpl functions compute the complex exponential of .Fa z , also known as @@ -106,8 +110,9 @@ is not finite, the sign of the result is indeterminate. .Xr math 3 .Sh STANDARDS The -.Fn cexp +.Fn cexp , +.Fn cexpf , and -.Fn cexpf +.Fn cexpl functions conform to .St -isoC-99 . diff --git a/lib/msun/man/complex.3 b/lib/msun/man/complex.3 index f1acfbe6da74..8cc0b7f97c52 100644 --- a/lib/msun/man/complex.3 +++ b/lib/msun/man/complex.3 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 19, 2018 +.Dd November 3, 2021 .Dt COMPLEX 3 .Os .Sh NAME @@ -121,3 +121,9 @@ The .In complex.h functions described here conform to .St -isoC-99 . +.Sh BUGS +The power functions, +.Fn cpowf, cpow , +and +.Fn cpowl , +are implemented, but the code was neither reviewed nor tested. diff --git a/lib/msun/src/k_sincosl.h b/lib/msun/src/k_sincosl.h index 4d4dc69f7820..6425f14a1ea0 100644 --- a/lib/msun/src/k_sincosl.h +++ b/lib/msun/src/k_sincosl.h @@ -76,13 +76,6 @@ __kernel_sincosl(long double x, long double y, int iy, long double *sn, #elif LDBL_MANT_DIG == 113 /* ld128 version of k_sincosl.c. */ static const long double -C1 = 0.04166666666666666666666666666666658424671L, -C2 = -0.001388888888888888888888888888863490893732L, -C3 = 0.00002480158730158730158730158600795304914210L, -C4 = -0.2755731922398589065255474947078934284324e-6L, -C5 = 0.2087675698786809897659225313136400793948e-8L, -C6 = -0.1147074559772972315817149986812031204775e-10L, -C7 = 0.4779477332386808976875457937252120293400e-13L, S1 = -0.16666666666666666666666666666666666606732416116558L, S2 = 0.0083333333333333333333333333333331135404851288270047L, S3 = -0.00019841269841269841269841269839935785325638310428717L, @@ -93,15 +86,25 @@ S7 = -0.76471637318198151807063387954939213287488216303768e-12L, S8 = 0.28114572543451292625024967174638477283187397621303e-14L; static const double -C8 = -0.1561920696721507929516718307820958119868e-15, -C9 = 0.4110317413744594971475941557607804508039e-18, -C10 = -0.8896592467191938803288521958313920156409e-21, -C11 = 0.1601061435794535138244346256065192782581e-23, S9 = -0.82206352458348947812512122163446202498005154296863e-17, S10 = 0.19572940011906109418080609928334380560135358385256e-19, S11 = -0.38680813379701966970673724299207480965452616911420e-22, S12 = 0.64038150078671872796678569586315881020659912139412e-25; +static const long double +C1 = 4.16666666666666666666666666666666667e-02L, +C2 = -1.38888888888888888888888888888888834e-03L, +C3 = 2.48015873015873015873015873015446795e-05L, +C4 = -2.75573192239858906525573190949988493e-07L, +C5 = 2.08767569878680989792098886701451072e-09L, +C6 = -1.14707455977297247136657111139971865e-11L, +C7 = 4.77947733238738518870113294139830239e-14L, +C8 = -1.56192069685858079920640872925306403e-16L, +C9 = 4.11031762320473354032038893429515732e-19L, +C10= -8.89679121027589608738005163931958096e-22L, +C11= 1.61171797801314301767074036661901531e-24L, +C12= -2.46748624357670948912574279501044295e-27L; + static inline void __kernel_sincosl(long double x, long double y, int iy, long double *sn, long double *cs) @@ -120,12 +123,12 @@ __kernel_sincosl(long double x, long double y, int iy, long double *sn, if (iy == 0) *sn = x + v * (S1 + z * r); else - *cs = x - ((z * (y / 2 - v * r) - y) - v * S1); + *sn = x - ((z * (y / 2 - v * r) - y) - v * S1); hz = z / 2; w = 1 - hz; r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * (C6 + - z * (C7 + z * (C8 + z * (C9 + z * (C10 + z * C11)))))))))); + z * (C7 + z * (C8 + z * (C9 + z * (C10 + z * (C11+z*C12))))))))))); *cs = w + (((1 - w) - hz) + (z * r - x * y)); } diff --git a/lib/msun/src/s_cexp.c b/lib/msun/src/s_cexp.c index 2ef8ba1972ca..a1f853eca4cc 100644 --- a/lib/msun/src/s_cexp.c +++ b/lib/msun/src/s_cexp.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include "math_private.h" @@ -41,7 +42,7 @@ cexp_ovfl = 0x4096b8e4; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ double complex cexp(double complex z) { - double x, y, exp_x; + double c, exp_x, s, x, y; uint32_t hx, hy, lx, ly; x = creal(z); @@ -55,8 +56,10 @@ cexp(double complex z) return (CMPLX(exp(x), y)); EXTRACT_WORDS(hx, lx, x); /* cexp(0 + I y) = cos(y) + I sin(y) */ - if (((hx & 0x7fffffff) | lx) == 0) - return (CMPLX(cos(y), sin(y))); + if (((hx & 0x7fffffff) | lx) == 0) { + sincos(y, &s, &c); + return (CMPLX(c, s)); + } if (hy >= 0x7ff00000) { if (lx != 0 || (hx & 0x7fffffff) != 0x7ff00000) { @@ -86,6 +89,11 @@ cexp(double complex z) * - x = NaN (spurious inexact exception from y) */ exp_x = exp(x); - return (CMPLX(exp_x * cos(y), exp_x * sin(y))); + sincos(y, &s, &c); + return (CMPLX(exp_x * c, exp_x * s)); } } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(cexp, cexpl); +#endif diff --git a/lib/msun/src/s_cexpf.c b/lib/msun/src/s_cexpf.c index b815c99af89f..d905b74ff4bd 100644 --- a/lib/msun/src/s_cexpf.c +++ b/lib/msun/src/s_cexpf.c @@ -41,7 +41,7 @@ cexp_ovfl = 0x43400074; /* (MAX_EXP - MIN_DENORM_EXP) * ln2 */ float complex cexpf(float complex z) { - float x, y, exp_x; + float c, exp_x, s, x, y; uint32_t hx, hy; x = crealf(z); @@ -55,8 +55,10 @@ cexpf(float complex z) return (CMPLXF(expf(x), y)); GET_FLOAT_WORD(hx, x); /* cexp(0 + I y) = cos(y) + I sin(y) */ - if ((hx & 0x7fffffff) == 0) - return (CMPLXF(cosf(y), sinf(y))); + if ((hx & 0x7fffffff) == 0) { + sincosf(y, &s, &c); + return (CMPLXF(c, s)); + } if (hy >= 0x7f800000) { if ((hx & 0x7fffffff) != 0x7f800000) { @@ -86,6 +88,7 @@ cexpf(float complex z) * - x = NaN (spurious inexact exception from y) */ exp_x = expf(x); - return (CMPLXF(exp_x * cosf(y), exp_x * sinf(y))); + sincosf(y, &s, &c); + return (CMPLXF(exp_x * c, exp_x * s)); } } diff --git a/lib/msun/src/s_cosl.c b/lib/msun/src/s_cosl.c index 46a2e8620a22..3d066483f227 100644 --- a/lib/msun/src/s_cosl.c +++ b/lib/msun/src/s_cosl.c @@ -39,12 +39,17 @@ __FBSDID("$FreeBSD$"); #include #endif +#include "fpmath.h" #include "math.h" #include "math_private.h" #if LDBL_MANT_DIG == 64 #include "../ld80/e_rem_pio2l.h" +static const union IEEEl2bits +pio4u = LD80C(0xc90fdaa22168c235, -00001, 7.85398163397448309628e-01L); +#define pio4 (pio4u.e) #elif LDBL_MANT_DIG == 113 #include "../ld128/e_rem_pio2l.h" +long double pio4 = 7.85398163397448309615660845819875721e-1L; #else #error "Unsupported long double format" #endif @@ -71,7 +76,7 @@ cosl(long double x) ENTERI(); /* Optimize the case where x is already within range. */ - if (z.e < M_PI_4) + if (z.e < pio4) RETURNI(__kernel_cosl(z.e, 0)); e0 = __ieee754_rem_pio2l(x, y);