Oops, the last-minute optimization in rev.1.8 wasn't a good idea. The

17+17+24 bit pi/2 must only be used when subtraction of the first 2 terms in it from the arg is exact. This happens iff the the arg in bits is one of the 2**17[-1] values on each side of (float)(pi/2). Revert to the algorithm in rev.1.7 and only fix its threshold for using the 3-term pi/2. Use the threshold that maximizes the number of values for which the 3-term pi/2 is used, subject to not changing the algorithm for comparing with the threshold. The 3-term pi/2 ends up being used for about half of its usable range (about 64K values on each side).
2005-10-09 04:29:08 +00:00 · 2005-10-09 04:29:08 +00:00 · 485c06b5bb
commit 485c06b5bb
parent 63c5e35623
1 changed files with 18 additions and 7 deletions
--- a/lib/msun/src/e_rem_pio2f.c
+++ b/lib/msun/src/e_rem_pio2f.c
@ -98,16 +98,27 @@ pio2_3t =  6.1232342629e-17; /* 0x248d3132 */
 	if(ix<=0x3f490fd8)   /* |x| ~<= pi/4 , no need for reduction */
 	    {y[0] = x; y[1] = 0; return 0;}
 	if(ix<0x4016cbe4) {  /* |x| < 3pi/4, special case with n=+-1 */
-	    /* 17+17+24 bit pi has sufficient precision and best efficiency */
 	    if(hx>0) {
-		z = (x - pio2_1) - pio2_2;
-		y[0] = z - pio2_2t;
-		y[1] = (z-y[0])-pio2_2t;
+		z = x - pio2_1;
+		if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
+		    y[0] = z - pio2_1t;
+		    y[1] = (z-y[0])-pio2_1t;
+		} else {		/* near pi/2, use 17+17+24 bit pi */
+		    z -= pio2_2;
+		    y[0] = z - pio2_2t;
+		    y[1] = (z-y[0])-pio2_2t;
+		}
 		return 1;
 	    } else {	/* negative x */
-		z = (x + pio2_1) + pio2_2;
-		y[0] = z + pio2_2t;
-		y[1] = (z-y[0])+pio2_2t;
+		z = x + pio2_1;
+		if((ix&0xfffe0000)!=0x3fc80000) { /* 17+24 bit pi OK */
+		    y[0] = z + pio2_1t;
+		    y[1] = (z-y[0])+pio2_1t;
+		} else {		/* near pi/2, use 17+17+24 bit pi */
+		    z += pio2_2;
+		    y[0] = z + pio2_2t;
+		    y[1] = (z-y[0])+pio2_2t;
+		}
 		return -1;
 	    }
 	}