Add fma() and fmaf(), which implement a fused multiply-add operation.

2005-01-22 09:53:18 +00:00 · 2005-01-22 09:53:18 +00:00 · d5580d091a
commit d5580d091a
parent c81d2c74ac
8 changed files with 412 additions and 3 deletions
--- a/lib/msun/Makefile
+++ b/lib/msun/Makefile
@ -41,6 +41,7 @@
 #					    default standard
 #

+# XXX MD crud should be in separate makefiles
 .if ${MACHINE_ARCH} == "alpha"
 ARCH_SRCS = s_copysign.S s_copysignf.S
 # XXX Comment from NetBSD/Alpha:
@ -50,6 +51,8 @@ ARCH_SRCS = s_copysign.S s_copysignf.S
 #CFLAGS += -mtrap-precision=i -mfp-trap-mode=su
 .elif ${MACHINE_ARCH} == "amd64"
 ARCH_SRCS = e_sqrt.S s_lrint.S s_llrint.S
+.elif ${MACHINE_ARCH} == "ia64"
+ARCH_SRCS = s_fma.S s_fmaf.S
 .elif ${MACHINE_ARCH} == "i386"
 ARCH_SUBDIR= i387
 ARCH_SRCS = e_acos.S e_asin.S e_atan2.S e_exp.S e_fmod.S e_log.S e_log10.S \
@ -88,7 +91,8 @@ COMMON_SRCS= b_exp.c b_log.c b_tgamma.c \
 	s_ceil.c s_ceilf.c s_ceill.c \
 	s_copysign.c s_copysignf.c s_cos.c s_cosf.c s_erf.c s_erff.c \
 	s_expm1.c s_expm1f.c s_fabsf.c s_fdim.c s_finite.c s_finitef.c \
-	s_floor.c s_floorf.c s_floorl.c s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
+	s_floor.c s_floorf.c s_floorl.c s_fma.c s_fmaf.c \
+	s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
 	s_fminf.c s_fminl.c s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \
 	s_ilogbl.c s_isfinite.c s_isnan.c s_isnormal.c s_ldexpf.c \
 	s_lib_version.c s_llrint.c s_llrintf.c s_llround.c s_llroundf.c \
@ -137,7 +141,8 @@ INCS=	fenv.h math.h

 MAN=	acos.3 acosh.3 asin.3 asinh.3 atan.3 atan2.3 atanh.3 ceil.3 \
 	cimag.3 cos.3 cosh.3 erf.3 exp.3 fabs.3 fdim.3 feclearexcept.3 \
-	fegetenv.3 fegetround.3 fenv.3 floor.3 fmax.3 fmod.3 hypot.3 ieee.3 \
+	fegetenv.3 fegetround.3 fenv.3 floor.3 \
+	fma.3 fmax.3 fmod.3 hypot.3 ieee.3 \
 	ieee_test.3 j0.3 lgamma.3 lrint.3 lround.3 math.3 rint.3 round.3 \
 	signbit.3 sin.3 sinh.3 sqrt.3 tan.3 tanh.3 trunc.3

@ -167,6 +172,7 @@ MLINKS+=fegetenv.3 feholdexcept.3 fegetenv.3 fesetenv.3 \
 	fegetenv.3 feupdateenv.3
 MLINKS+=fegetround.3 fesetround.3
 MLINKS+=floor.3 floorf.3 floor.3 floorl.3
+MLINKS+=fma.3 fmaf.3
 MLINKS+=fmax.3 fmaxf.3 fmax.3 fmaxl.3 \
 	fmax.3 fmin.3 fmax.3 fminf.3 fmax.3 fminl.3
 MLINKS+=fmod.3 fmodf.3
--- a/lib/msun/ia64/s_fma.S
+++ b/lib/msun/ia64/s_fma.S
@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$")
+
+ENTRY(fma, 3)
+{
+	fma.d		f8 = f8, f9, f10
+	br.ret.sptk	b0
+}
--- a/lib/msun/ia64/s_fmaf.S
+++ b/lib/msun/ia64/s_fmaf.S
@ -0,0 +1,34 @@
+/*-
+ * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$")
+
+ENTRY(fmaf, 3)
+{
+	fma.s		f8 = f8, f9, f10
+	br.ret.sptk	b0
+}
--- a/lib/msun/man/fma.3
+++ b/lib/msun/man/fma.3
@ -0,0 +1,105 @@
+.\" Copyright (c) 2005 David Schultz <das@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd January 22, 2005
+.Dt FMA 3
+.Os
+.Sh NAME
+.Nm fma ,
+.Nm fmaf
+.Nd fused multiply-add
+.Sh LIBRARY
+.Lb libm
+.Sh SYNOPSIS
+.In math.h
+.Ft double
+.Fn fma "double x" "double y" "double z"
+.Ft float
+.Fn fmaf "float x" "float y" "float z"
+.Sh DESCRIPTION
+The
+.Fn fma
+and
+.Fn fmaf
+functions return
+.No "(x * y) + z" ,
+computed with only one rounding error.
+Using the ordinary multiplication and addition operators, by contrast,
+results in two roundings: one for the intermediate product and one for
+the final result.
+.Pp
+For instance, the expression
+.No "1.2e100 * 2.0e208 - 1.4e308"
+produces \*(If due to overflow in the intermediate product, whereas
+.No "fma(1.2e100, 2.0e208, -1.4e308)"
+returns approximately 1.0e308.
+.Pp
+The fused multiply-add operation is often used to improve the
+accuracy of calculations such as dot products.
+It may also be used to improve performance on machines that implement
+it natively.
+The macros
+.Dv FP_FAST_FMA
+and
+.Dv FP_FAST_FMAF
+may be defined in
+.In math.h
+to indicate that
+.Fn fma
+and
+.Fn fmaf
+(respectively) have comparable or faster speed than a multiply
+operation followed by an add operation.
+.Sh IMPLEMENTATION NOTES
+In general,
+.Fn fma
+and
+.Fn fmaf
+will behave as one would expect if
+.No "x * y + z"
+were computed with unbounded precision and range,
+then rounded to the precision of the return type.
+However, on some platforms, if
+.Fa z
+is \*(Na, these functions may not raise an exception even
+when the computation of
+.No "x * y"
+would have otherwise generated an invalid exception.
+.Sh SEE ALSO
+.Xr fenv 3 ,
+.Xr math 3
+.Sh STANDARDS
+The
+.Fn fma
+and
+.Fn fmaf
+functions conform to
+.St -isoC-99 .
+A fused multiply-add operation with virtually identical
+characteristics appears in IEEE draft standard 754R.
+.Sh HISTORY
+These routines first appeared in
+.Fx 5.4 .
--- a/lib/msun/man/math.3
+++ b/lib/msun/man/math.3
@ -107,7 +107,7 @@ expm1	exp(x)\-1	1
 fabs	absolute value	0
 fdim	positive difference	1
 floor	integer no greater than	0
-.\" fma	multiply-add	???
+fma	multiply-add	1
 fmax	maximum function	0
 fmin	minimum function	0
 fmod	remainder function	???
--- a/lib/msun/src/math.h
+++ b/lib/msun/src/math.h
@ -68,6 +68,11 @@ extern const union __nan_un {
 #define	MATH_ERREXCEPT	2
 #define	math_errhandling	MATH_ERREXCEPT

+#ifdef	__ia64__
+#define	FP_FAST_FMA
+#endif
+#define	FP_FAST_FMAF
+
 /* Symbolic constants to classify floating point numbers. */
 #define	FP_INFINITE	0x01
 #define	FP_NAN		0x02
--- a/lib/msun/src/s_fma.c
+++ b/lib/msun/src/s_fma.c
@ -0,0 +1,178 @@
+/*-
+ * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <fenv.h>
+#include <float.h>
+#include <math.h>
+
+/*
+ * Fused multiply-add: Compute x * y + z with a single rounding error.
+ *
+ * We use scaling to avoid overflow/underflow, along with the
+ * canonical precision-doubling technique adapted from:
+ *
+ *	Dekker, T.  A Floating-Point Technique for Extending the
+ *	Available Precision.  Numer. Math. 18, 224-242 (1971).
+ *
+ * This algorithm is sensitive to the rounding precision.  FPUs such
+ * as the i387 must be set in double-precision mode if variables are
+ * to be stored in FP registers in order to avoid incorrect results.
+ * This is the default on FreeBSD, but not on many other systems.
+ *
+ * Tests on an Itanium 2 indicate that the hardware's FMA instruction
+ * is almost twice as fast as this implementation.  The hardware
+ * instruction should be used on platforms that support it.
+ *
+ * XXX May incur an absolute error of 0x1p-1074 for subnormal results
+ *     due to double rounding induced by the final scaling operation.
+ *
+ * XXX On machines supporting quad precision, we should use that, but
+ *     see the caveat in s_fmaf.c.
+ */
+double
+fma(double x, double y, double z)
+{
+	static const double split = 0x1p27 + 1.0;
+	double xs, ys, zs;
+	double c, cc, hx, hy, p, q, tx, ty;
+	double r, rr, s;
+	int oround;
+	int ex, ey, ez;
+	int spread;
+
+	if (x == 0.0 || y == 0.0)
+		return (z);
+	if (z == 0.0)
+		return (x * y);
+
+	/* Results of frexp() are undefined for these cases. */
+	if (!isfinite(x) || !isfinite(y) || !isfinite(z))
+		return (x * y + z);
+
+	xs = frexp(x, &ex);
+	ys = frexp(y, &ey);
+	zs = frexp(z, &ez);
+	oround = fegetround();
+	spread = ex + ey - ez;
+
+	/*
+	 * If x * y and z are many orders of magnitude apart, the scaling
+	 * will overflow, so we handle these cases specially.  Rounding
+	 * modes other than FE_TONEAREST are painful.
+	 */
+	if (spread > DBL_MANT_DIG * 2) {
+		fenv_t env;
+		feraiseexcept(FE_INEXACT);
+		switch(oround) {
+		case FE_TONEAREST:
+			return (x * y);
+		case FE_TOWARDZERO:
+			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
+				return (x * y);
+			feholdexcept(&env);
+			r = x * y;
+			if (!fetestexcept(FE_INEXACT))
+				r = nextafter(r, 0);
+			feupdateenv(&env);
+			return (r);
+		case FE_DOWNWARD:
+			if (z > 0.0)
+				return (x * y);
+			feholdexcept(&env);
+			r = x * y;
+			if (!fetestexcept(FE_INEXACT))
+				r = nextafter(r, -INFINITY);
+			feupdateenv(&env);
+			return (r);
+		default:	/* FE_UPWARD */
+			if (z < 0.0)
+				return (x * y);
+			feholdexcept(&env);
+			r = x * y;
+			if (!fetestexcept(FE_INEXACT))
+				r = nextafter(r, INFINITY);
+			feupdateenv(&env);
+			return (r);
+		}
+	}
+	if (spread < -DBL_MANT_DIG) {
+		feraiseexcept(FE_INEXACT);
+		if (!isnormal(z))
+			feraiseexcept(FE_UNDERFLOW);
+		switch (oround) {
+		case FE_TONEAREST:
+			return (z);
+		case FE_TOWARDZERO:
+			if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
+				return (z);
+			else
+				return (nextafter(z, 0));
+		case FE_DOWNWARD:
+			if (x > 0.0 ^ y < 0.0)
+				return (z);
+			else
+				return (nextafter(z, -INFINITY));
+		default:	/* FE_UPWARD */
+			if (x > 0.0 ^ y < 0.0)
+				return (nextafter(z, INFINITY));
+			else
+				return (z);
+		}
+	}
+
+	/*
+	 * Use Dekker's algorithm to perform the multiplication and
+	 * subsequent addition in twice the machine precision.
+	 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
+	 */
+	fesetround(FE_TONEAREST);
+
+	p = xs * split;
+	hx = xs - p;
+	hx += p;
+	tx = xs - hx;
+
+	p = ys * split;
+	hy = ys - p;
+	hy += p;
+	ty = ys - hy;
+
+	p = hx * hy;
+	q = hx * ty + tx * hy;
+	c = p + q;
+	cc = p - c + q + tx * ty;
+
+	zs = ldexp(zs, -spread);
+	r = c + zs;
+	s = r - c;
+	rr = (c - (r - s)) + (zs - s) + cc;
+
+	fesetround(oround);
+	return (ldexp(r + rr, ex + ey));
+}
--- a/lib/msun/src/s_fmaf.c
+++ b/lib/msun/src/s_fmaf.c
@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Fused multiply-add: Compute x * y + z with a single rounding error.
+ *
+ * A double has more than twice as much precision than a float, so
+ * direct double-precision arithmetic suffices.
+ *
+ * XXX We are relying on the compiler to convert from double to float
+ *     using the current rounding mode and with the appropriate
+ *     side-effects.  But on at least one platform (gcc 3.4.2/sparc64),
+ *     this appears to be too much to ask for.  The precision
+ *     reduction should be done manually.
+ */
+float
+fmaf(float x, float y, float z)
+{
+
+	return ((double)x * y + z);
+}