Use double precision to simplify and optimize a long division.

On athlons, this gives a speedup of 10-20% for tanf() on uniformly distributed args in [-2Pi, 2Pi]. (It only directly applies for 43% of the args and gives a 16-20% speedup for these (more for AXP than A64) and this gives an overall speedup of 10-12% which is all that it should; however, it gives an overall speedup of 17-20% with gcc-3.3 on AXP-A64 by mysteriously effected cases where it isn't executed.) I originally intended to use double precision for all internals of float trig functions and will probably still do this, but benchmarking showed that converting to double precision and back is a pessimization in cases where a simple float precision calculation works, so it may be optimal to switch precisions only when using extra precision is much simpler.
2005-11-21 00:38:21 +00:00 · 2005-11-21 00:38:21 +00:00 · d96648954f
commit d96648954f
parent 01155bb235
1 changed files with 1 additions and 15 deletions
--- a/lib/msun/src/k_tanf.c
+++ b/lib/msun/src/k_tanf.c
@ -63,19 +63,5 @@ __kernel_tanf(float x, float y, int iy)
 	    return (float)(1-((hx>>30)&2))*(v-(float)2.0*(x-(w*w/(w+v)-r)));
 	}
 	if(iy==1) return w;
-	else {		/* if allow error up to 2 ulp,
-			   simply return -1.0/(x+r) here */
-     /*  compute -1.0/(x+r) accurately */
-	    float a,t;
-	    int32_t i;
-	    z  = w;
-	    GET_FLOAT_WORD(i,z);
-	    SET_FLOAT_WORD(z,i&0xfffff000);
-	    v  = r-(z - x); 	/* z+v = r+x */
-	    t = a  = -(float)1.0/w;	/* a = -1.0/w */
-	    GET_FLOAT_WORD(i,t);
-	    SET_FLOAT_WORD(t,i&0xfffff000);
-	    s  = (float)1.0+t*z;
-	    return t+a*(s+t*v);
-	}
+	else return -1.0/((double)x+r);
 }