From ce9e405fdbd5a1b8c6ddd721f6cd1e7f95ab9d6e Mon Sep 17 00:00:00 2001
From: bde <bde@FreeBSD.org>
Date: Mon, 25 Feb 2008 11:43:20 +0000
Subject: [PATCH] Fix some off-by-1 errors.

e_rem_pio2.c:
Float and double precision didn't work because init_jk[] was 1 too small.
It needs to be 2 larger than you might expect, and 1 larger than it was
for these precisions, since its test for recomputing needs a margin of
47 bits (almost 2 24-bit units).

init_jk[] seems to be barely enough for extended and quad precisions.
This hasn't been completely verified.  Callers now get about 24 bits
of extra precision for float, and about 19 for double, but only about
8 for extended and quad.  8 is not enough for callers that want to
produce extra-precision results, but current callers have rounding
errors of at least 0.8 ulps, so another 1/2**8 ulps of error from the
reduction won't affect them much.

Add a comment about some of the magic for init_jk[].

e_rem_pio2.c:
Double precision worked in practice because of a compensating off-by-1
error here.  Extended precision was asked for, and it executed exactly
the same code as the unbroken double precision.

e_rem_pio2f.c:
Float precision worked in practice because of a compensating off-by-1
error here.  Double precision was asked for, and was almost needed,
since the cosf() and sinf() callers want to produce extra-precision
results, at least internally so that their error is only 0.5009 ulps.
However, the extra precision provided by unbroken float precision is
enough, and the double-precision code has extra overheads, so the
off-by-1 error cost about 5% in efficiency on amd64 and i386.
---
 lib/msun/src/e_rem_pio2.c  |  2 +-
 lib/msun/src/e_rem_pio2f.c |  4 ++--
 lib/msun/src/k_rem_pio2.c  | 11 ++++++++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c
index 96fc5c48361e..faf3a4659086 100644
--- a/lib/msun/src/e_rem_pio2.c
+++ b/lib/msun/src/e_rem_pio2.c
@@ -182,7 +182,7 @@ __ieee754_rem_pio2(double x, double *y)
 	tx[2] = z;
 	nx = 3;
 	while(tx[nx-1]==zero) nx--;	/* skip zero term */
-	n  =  __kernel_rem_pio2(tx,y,e0,nx,2);
+	n  =  __kernel_rem_pio2(tx,y,e0,nx,1);
 	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
 	return n;
 }
diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c
index 60f8256eed42..2cc466964d93 100644
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@@ -45,7 +45,7 @@ int
 __ieee754_rem_pio2f(float x, float *y)
 {
 	double w,r,fn;
-	double tx[1],ty[2];
+	double tx[1],ty[1];
 	float z;
 	int32_t e0,n,ix,hx;
 
@@ -77,7 +77,7 @@ __ieee754_rem_pio2f(float x, float *y)
 	e0 = (ix>>23)-150;		/* e0 = ilogb(|x|)-23; */
 	SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23)));
 	tx[0] = z;
-	n  =  __kernel_rem_pio2(tx,ty,e0,1,1);
+	n  =  __kernel_rem_pio2(tx,ty,e0,1,0);
 	y[0] = ty[0];
 	y[1] = ty[0] - y[0];
 	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
diff --git a/lib/msun/src/k_rem_pio2.c b/lib/msun/src/k_rem_pio2.c
index 8abe6f8eadab..a2ffca60767a 100644
--- a/lib/msun/src/k_rem_pio2.c
+++ b/lib/msun/src/k_rem_pio2.c
@@ -78,8 +78,13 @@ __FBSDID("$FreeBSD$");
  * Here is the description of some local variables:
  *
  * 	jk	jk+1 is the initial number of terms of ipio2[] needed
- *		in the computation. The recommended value is 2,3,4,
- *		6 for single, double, extended,and quad.
+ *		in the computation. The minimum and recommended value
+ *		for jk is 3,4,4,6 for single, double, extended, and quad.
+ *		jk+1 must be 2 larger than you might expect so that our
+ *		recomputation test works. (Up to 24 bits in the integer
+ *		part (the 24 bits of it that we compute) and 23 bits in
+ *		the fraction part may be lost to cancelation before we
+ *		recompute.)
  *
  * 	jz	local integer variable indicating the number of 
  *		terms of ipio2[] used. 
@@ -129,7 +134,7 @@ __FBSDID("$FreeBSD$");
 #include "math.h"
 #include "math_private.h"
 
-static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
+static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
 
 /*
  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi