The value small=2**-(p+3), where p is the precision, can be determine from

lgamma(x) = -log(x) - log(1+x) + x*(1-g) + x**2*P(x) with g = 0.57... being the Euler constant and P(x) a polynomial. Substitution of small into the RHS shows that the last 3 terms are negligible in comparison to the leading term. The choice of 3 may be conservative. The value large=2**(p+3) is detemined from Stirling's approximation lgamma(x) = x*(log(x)-1) - log(x)/2 + log(2*pi)/2 + P(1/x)/x Again, substitution of large into the RHS reveals the last 3 terms are negligible in comparison to the leading term. Move the x=+-0 special case into the |x|<small block. In the ld80 and ld128 implementaion, use fdlibm compatible comparisons involving ix, lx, and llx. This replaces several floating point comparisons (some involving fabsl()) and also fixes the special cases x=1 and x=2. While here . Remove unnecessary parentheses. . Fix/improve comments due to the above changes. . Fix nearby whitespace. * src/e_lgamma_r.c: . Sort declaration. . Remove unneeded explicit cast for type conversion. . Replace a double literal constant by an integer literal constant. * src/e_lgammaf_r.c: . Sort declaration. * ld128/e_lgammal_r.c: . Replace a long double literal constant by a double literal constant. * ld80/e_lgammal_r.c: . Remove unused '#include float.h' . Replace a long double literal constant by a double literal constant. Requested by: bde
2014-10-09 22:39:52 +00:00 · 2014-10-09 22:39:52 +00:00 · a4e4b355f4
commit a4e4b355f4
parent a0a9e1b57c
4 changed files with 128 additions and 120 deletions
--- a/lib/msun/ld128/e_lgammal_r.c
+++ b/lib/msun/ld128/e_lgammal_r.c
@ -206,13 +206,13 @@ sin_pil(long double x)
 	    n--;
 	}
 	n &= 7;
-	y = y - z + n * 0.25L;
+	y = y - z + n * 0.25;

 	switch (n) {
 	    case 0:   y =  __kernel_sinl(pi*y,zero,0); break;
 	    case 1:
 	    case 2:   y =  __kernel_cosl(pi*(0.5-y),zero); break;
-	    case 3: 
+	    case 3:
 	    case 4:   y =  __kernel_sinl(pi*(one-y),zero,0); break;
 	    case 5:
 	    case 6:   y = -__kernel_cosl(pi*(y-1.5),zero); break;
@ -221,41 +221,33 @@ sin_pil(long double x)
 	return -y;
 }

-
 long double
 lgammal_r(long double x, int *signgamp)
 {
 	long double nadj,p,p1,p2,p3,q,r,t,w,y,z;
 	uint64_t llx,lx;
 	int i;
-	uint16_t hx;
+	uint16_t hx,ix;

-	EXTRACT_LDBL128_WORDS(hx, lx, llx, x);
+	EXTRACT_LDBL128_WORDS(hx,lx,llx,x);

-	if((hx & 0x7fff) == 0x7fff) {	/* erfl(nan)=nan */
-		i = (hx>>15)<<1;
-		return (1-i)+one/x;	/* erfl(+-inf)=+-1 */
-	}
-
-    /* purge off +-inf, NaN, +-0, tiny and negative arguments */
+    /* purge +-Inf and NaNs */
 	*signgamp = 1;
-	if((hx & 0x7fff) == 0x7fff)	/* x is +-Inf or NaN */
-		return x*x;
-	if((hx==0||hx==0x8000)&&lx==0) {
-	    if (hx&0x8000)
-		*signgamp = -1;
-	    return one/vzero;
+	ix = hx&0x7fff;
+	if(ix==0x7fff) return x*x;
+
+   /* purge +-0 and tiny arguments */
+	*signgamp = 1-2*(hx>>15);
+	if(ix<0x3fff-116) {		/* |x|<2**-(p+3), return -log(|x|) */
+	    if((ix|lx|llx)==0)
+		return one/vzero;
+	    return -logl(fabsl(x));
 	}

-   /* purge off tiny and negative arguments */
-	if(fabsl(x)<0x1p-119L) {
-	    if(hx&0x8000) {
-	        *signgamp = -1;
-	        return -logl(-x);
-	    } else return -logl(x);
-	}
+    /* purge negative integers and start evaluation for other x < 0 */
 	if(hx&0x8000) {
-	    if(fabsl(x)>=0x1p112)
+	    *signgamp = 1;
+	    if(ix>=0x3fff+112) 		/* |x|>=2**(p-1), must be -integer */
 		return one/vzero;
 	    t = sin_pil(x);
 	    if(t==zero) return one/vzero;
@ -264,17 +256,19 @@ lgammal_r(long double x, int *signgamp)
 	    x = -x;
 	}

-	if(x == 1 || x ==2) r = 0;
-	else if(x<2) {
-	    if(x<=0.8999996185302734) {
+    /* purge 1 and 2 */
+	if((ix==0x3fff || ix==0x4000) && (lx|llx)==0) r = 0;
+    /* for x < 2.0 */
+	else if(ix<0x4000) {
+	    if(x<=8.9999961853027344e-01) {
 		r = -logl(x);
-		if(x>=0.7315998077392578) {y = 1-x; i= 0;}
-		else if(x>=0.2316399812698364) {y= x-(tc-1); i=1;}
+		if(x>=7.3159980773925781e-01) {y = 1-x; i= 0;}
+		else if(x>=2.3163998126983643e-01) {y= x-(tc-1); i=1;}
 	  	else {y = x; i=2;}
 	    } else {
-	  	r = 0;
-	        if(x>=1.7316312789916992) {y=2-x;i=0;}
-	        else if(x>=1.2316322326660156) {y=x-tc;i=1;}
+		r = 0;
+	        if(x>=1.7316312789916992e+00) {y=2-x;i=0;}
+	        else if(x>=1.2316322326660156e+00) {y=x-tc;i=1;}
 		else {y=x-1;i=2;}
 	    }
 	    switch(i) {
@ -285,23 +279,24 @@ lgammal_r(long double x, int *signgamp)
 		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*(a11+z*(a13+z*(a15+
 		    z*(a17+z*(a19+z*(a21+z*a23)))))))))));
 		p  = y*p1+p2;
-		r  += (p-y/2); break;
+		r  += p-y/2; break;
 	      case 1:
 		p = t0+y*t1+tt+y*y*(t2+y*(t3+y*(t4+y*(t5+y*(t6+y*(t7+y*(t8+
 		    y*(t9+y*(t10+y*(t11+y*(t12+y*(t13+y*(t14+y*(t15+y*(t16+
 		    y*(t17+y*(t18+y*(t19+y*(t20+y*(t21+y*(t22+y*(t23+
 		    y*(t24+y*(t25+y*(t26+y*(t27+y*(t28+y*(t29+y*(t30+
 		    y*(t31+y*t32))))))))))))))))))))))))))))));
-		r += (tf + p); break;
+		r += tf + p; break;
 	      case 2:
 		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*(u5+y*(u6+y*(u7+
 		    y*(u8+y*(u9+y*u10))))))))));
 		p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*(v5+y*(v6+y*(v7+
 		    y*(v8+y*(v9+y*(v10+y*v11))))))))));
-		r += (-y/2 + p1/p2);
+		r += p1/p2-y/2;
 	    }
 	}
-	else if(x<8) {
+    /* x < 8.0 */
+	else if(ix<0x4002) {
 	    i = x;
 	    y = x-i;
 	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*(s6+y*(s7+y*(s8+
@ -318,7 +313,8 @@ lgammal_r(long double x, int *signgamp)
 	    case 3: z *= (y+2);		/* FALLTHRU */
 		    r += logl(z); break;
 	    }
-	} else if (x < 0x1p119L) {
+    /* 8.0 <= x < 2**(p+3) */
+	} else if (ix<0x3fff+116) {
 	    t = logl(x);
 	    z = one/x;
 	    y = z*z;
@ -326,6 +322,7 @@ lgammal_r(long double x, int *signgamp)
 		y*(w9+y*(w10+y*(w11+y*(w12+y*(w13+y*(w14+y*(w15+y*(w16+
 		y*(w17+y*w18)))))))))))))))));
 	    r = (x-half)*(t-one)+w;
+    /* 2**(p+3) <= x <= inf */
 	} else 
 	    r =  x*(logl(x)-1);
 	if(hx&0x8000) r = nadj - r;
--- a/lib/msun/ld80/e_lgammal_r.c
+++ b/lib/msun/ld80/e_lgammal_r.c
@ -14,12 +14,11 @@
 __FBSDID("$FreeBSD$");

 /*
- * See s_lgamma_r.c for complete comments.
+ * See e_lgamma_r.c for complete comments.
 *
 * Converted to long double by Steven G. Kargl.
 */

-#include <float.h>
 #ifdef __i386__
 #include <ieeefp.h>
 #endif
@ -219,8 +218,8 @@ sin_pil(long double x)

 	y = -x;

-	vz = y+0x1p63L;
-	z = vz-0x1p63L;
+	vz = y+0x1p63;
+	z = vz-0x1p63;
 	if (z == y)
 	    return zero;

@ -243,7 +242,7 @@ sin_pil(long double x)
 	    case 5:
 	    case 6:   y = -__kernel_cosl(pi*(y-1.5),zero); break;
 	    default:  y =  __kernel_sinl(pi*(y-2.0),zero,0); break;
-	}
+	    }
 	return -y;
 }

@ -253,31 +252,29 @@ lgammal_r(long double x, int *signgamp)
 	long double nadj,p,p1,p2,p3,q,r,t,w,y,z;
 	uint64_t lx;
 	int i;
-	uint16_t hx;
+	uint16_t hx,ix;

 	EXTRACT_LDBL80_WORDS(hx,lx,x);

-    /* purge off +-inf, NaN, +-0 */
+    /* purge +-Inf and NaNs */
 	*signgamp = 1;
-	if((hx & 0x7fff) == 0x7fff)	/* x is +-Inf or NaN */
-		return x*x;
-	if((hx==0||hx==0x8000)&&lx==0) {
-	    if (hx&0x8000)
-		*signgamp = -1;
-	    return one/vzero;
-	}
+	ix = hx&0x7fff;
+	if(ix==0x7fff) return x*x;

 	ENTERI();

-    /* purge off tiny and negative arguments */
-	if(fabsl(x)<0x1p-70L) {		/* |x|<2**-70, return -log(|x|) */
-	    if(hx&0x8000) {
-	        *signgamp = -1;
-	        RETURNI(-logl(-x));
-	    } else RETURNI(-logl(x));
+    /* purge +-0 and tiny arguments */
+	*signgamp = 1-2*(hx>>15);
+	if(ix<0x3fff-67) {		/* |x|<2**-(p+3), return -log(|x|) */
+	    if((ix|lx)==0)
+		RETURNI(one/vzero);
+	    RETURNI(-logl(fabsl(x)));
 	}
+
+    /* purge negative integers and start evaluation for other x < 0 */
 	if(hx&0x8000) {
-	    if(fabsl(x)>=0x1p63) 	/* |x|>=2**(p-1), must be -integer */
+	    *signgamp = 1;
+	    if(ix>=0x3fff+63) 		/* |x|>=2**(p-1), must be -integer */
 		RETURNI(one/vzero);
 	    t = sin_pil(x);
 	    if(t==zero) RETURNI(one/vzero); /* -integer */
@ -286,19 +283,30 @@ lgammal_r(long double x, int *signgamp)
 	    x = -x;
 	}

-    /* purge off 1 and 2 */
-	if(x == 1 || x == 2) r = 0;
+    /* purge 1 and 2 */
+	if((ix==0x3fff || ix==0x4000) && lx==0x8000000000000000ULL) r = 0;
    /* for x < 2.0 */
-	else if(x<2) {
-	    if(x<=0.8999996185302734) {	/* lgamma(x) = lgamma(x+1)-log(x) */
-		r = - logl(x);
-		if(x>=0.7315998077392578) {y = 1-x; i= 0;}
-		else if(x>=0.2316399812698364) {y= x-(tc-1); i=1;}
-	  	else {y = x; i=2;}
+	else if(ix<0x4000) {
+    /*
+     * XXX Supposedly, one can use the following information to replace the
+     * XXX FP rational expressions.  A similar approach is appropriate
+     * XXX for ld128, but one (may need?) needs to consider llx, too.
+     *
+     * 8.9999961853027344e-01 3ffe e666600000000000
+     * 7.3159980773925781e-01 3ffe bb4a200000000000
+     * 2.3163998126983643e-01 3ffc ed33080000000000
+     * 1.7316312789916992e+00 3fff dda6180000000000
+     * 1.2316322326660156e+00 3fff 9da6200000000000
+     */
+	    if(x<8.9999961853027344e-01) {
+		r = -logl(x);
+		if(x>=7.3159980773925781e-01) {y = 1-x; i= 0;}
+		else if(x>=2.3163998126983643e-01) {y= x-(tc-1); i=1;}
+		else {y = x; i=2;}
 	    } else {
 		r = 0;
-	        if(x>=1.7316312789916992) {y=2-x;i=0;}
-	        else if(x>=1.2316322326660156) {y=x-tc;i=1;}
+		if(x>=1.7316312789916992e+00) {y=2-x;i=0;}
+		else if(x>=1.2316322326660156e+00) {y=x-tc;i=1;}
 		else {y=x-1;i=2;}
 	    }
 	    switch(i) {
@ -307,19 +315,20 @@ lgammal_r(long double x, int *signgamp)
 		p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*(a10+z*a12)))));
 		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*(a11+z*a13))))));
 		p  = y*p1+p2;
-		r  += (p-y/2); break;
+		r  += p-y/2; break;
 	      case 1:
 		p = t0+y*t1+tt+y*y*(t2+y*(t3+y*(t4+y*(t5+y*(t6+y*(t7+y*(t8+
 		    y*(t9+y*(t10+y*(t11+y*(t12+y*(t13+y*(t14+y*(t15+y*(t16+
 		    y*(t17+y*t18))))))))))))))));
-		r += (tf + p); break;
+		r += tf + p; break;
 	      case 2:
 		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*(u5+y*u6))))));
 		p2 = 1+y*(v1+y*(v2+y*(v3+y*(v4+y*(v5+y*v6)))));
-		r += (-y/2 + p1/p2);
+		r += p1/p2-y/2;
 	    }
 	}
-	else if(x<8) {
+    /* x < 8.0 */
+	else if(ix<0x4002) {
 	    i = x;
 	    y = x-i;
 	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
@ -334,15 +343,15 @@ lgammal_r(long double x, int *signgamp)
 	    case 3: z *= (y+2);		/* FALLTHRU */
 		    r += logl(z); break;
 	    }
-    /* 8.0 <= x < 2**70 */
-	} else if (x < 0x1p70L) {
+    /* 8.0 <= x < 2**(p+3) */
+	} else if (ix<0x3fff+67) {
 	    t = logl(x);
 	    z = one/x;
 	    y = z*z;
 	    w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*(w6+y*(w7+y*w8)))))));
 	    r = (x-half)*(t-one)+w;
+    /* 2**(p+3) <= x <= inf */
 	} else 
-    /* 2**70 <= x <= inf */
 	    r =  x*(logl(x)-1);
 	if(hx&0x8000) r = nadj - r;
 	RETURNI(r);
--- a/lib/msun/src/e_lgamma_r.c
+++ b/lib/msun/src/e_lgamma_r.c
@ -201,28 +201,28 @@ sin_pi(double x)
 double
 __ieee754_lgamma_r(double x, int *signgamp)
 {
-	double t,y,z,nadj,p,p1,p2,p3,q,r,w;
+	double nadj,p,p1,p2,p3,q,r,t,w,y,z;
 	int32_t hx;
 	int i,ix,lx;

 	EXTRACT_WORDS(hx,lx,x);

-    /* purge off +-inf, NaN, +-0, tiny and negative arguments */
+    /* purge +-Inf and NaNs */
 	*signgamp = 1;
 	ix = hx&0x7fffffff;
 	if(ix>=0x7ff00000) return x*x;
-	if((ix|lx)==0) {
-	   if(hx<0)
-		*signgamp = -1;
-	    return one/vzero;
-	}
-	if(ix<0x3b900000) {	/* |x|<2**-70, return -log(|x|) */
-	    if(hx<0) {
-	        *signgamp = -1;
-	        return -__ieee754_log(-x);
-	    } else return -__ieee754_log(x);
+
+    /* purge +-0 and tiny arguments */
+	*signgamp = 1-2*((uint32_t)hx>>31);
+	if(ix<0x3c700000) {	/* |x|<2**-56, return -log(|x|) */
+	    if((ix|lx)==0)
+	        return one/vzero;
+	    return -__ieee754_log(fabs(x));
 	}
+
+    /* purge negative integers and start evaluation for other x < 0 */
 	if(hx<0) {
+	    *signgamp = 1;
 	    if(ix>=0x43300000) 	/* |x|>=2**52, must be -integer */
 		return one/vzero;
 	    t = sin_pi(x);
@ -232,7 +232,7 @@ __ieee754_lgamma_r(double x, int *signgamp)
 	    x = -x;
 	}

-    /* purge off 1 and 2 */
+    /* purge 1 and 2 */
 	if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
    /* for x < 2.0 */
 	else if(ix<0x40000000) {
@ -253,7 +253,7 @@ __ieee754_lgamma_r(double x, int *signgamp)
 		p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
 		p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
 		p  = y*p1+p2;
-		r  += (p-y/2); break;
+		r  += p-y/2; break;
 	      case 1:
 		z = y*y;
 		w = z*y;
@ -261,19 +261,20 @@ __ieee754_lgamma_r(double x, int *signgamp)
 		p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
 		p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
 		p  = z*p1-(tt-w*(p2+y*p3));
-		r += (tf + p); break;
+		r += tf + p; break;
 	      case 2:
 		p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
 		p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
-		r += (-0.5*y + p1/p2);
+		r += p1/p2-y/2;
 	    }
 	}
-	else if(ix<0x40200000) { 			/* x < 8.0 */
-	    i = (int)x;
-	    y = x-(double)i;
+    /* x < 8.0 */
+	else if(ix<0x40200000) {
+	    i = x;
+	    y = x-i;
 	    p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
 	    q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
-	    r = half*y+p/q;
+	    r = y/2+p/q;
 	    z = one;	/* lgamma(1+s) = log(s) + lgamma(s) */
 	    switch(i) {
 	    case 7: z *= (y+6);		/* FALLTHRU */
@ -283,15 +284,15 @@ __ieee754_lgamma_r(double x, int *signgamp)
 	    case 3: z *= (y+2);		/* FALLTHRU */
 		    r += __ieee754_log(z); break;
 	    }
-    /* 8.0 <= x < 2**58 */
-	} else if (ix < 0x43900000) {
+    /* 8.0 <= x < 2**56 */
+	} else if (ix < 0x43700000) {
 	    t = __ieee754_log(x);
 	    z = one/x;
 	    y = z*z;
 	    w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
 	    r = (x-half)*(t-one)+w;
 	} else
-    /* 2**58 <= x <= inf */
+    /* 2**56 <= x <= inf */
 	    r =  x*(__ieee754_log(x)-one);
 	if(hx<0) r = nadj - r;
 	return r;
--- a/lib/msun/src/e_lgammaf_r.c
+++ b/lib/msun/src/e_lgammaf_r.c
@ -122,29 +122,29 @@ sin_pif(float x)
 float
 __ieee754_lgammaf_r(float x, int *signgamp)
 {
-	float t,y,z,nadj,p,p1,p2,p3,q,r,w;
+	float nadj,p,p1,p2,p3,q,r,t,w,y,z;
 	int32_t hx;
 	int i,ix;

 	GET_FLOAT_WORD(hx,x);

-    /* purge off +-inf, NaN, +-0, tiny and negative arguments */
+    /* purge +-Inf and NaNs */
 	*signgamp = 1;
 	ix = hx&0x7fffffff;
 	if(ix>=0x7f800000) return x*x;
-	if(ix==0) {
-	    if(hx<0)
-		*signgamp = -1;
-	    return one/vzero;
-	}
-	if(ix<0x35000000) {	/* |x|<2**-21, return -log(|x|) */
-	    if(hx<0) {
-	        *signgamp = -1;
-	        return -__ieee754_logf(-x);
-	    } else return -__ieee754_logf(x);
+
+    /* purge +-0 and tiny arguments */
+	*signgamp = 1-2*((uint32_t)hx>>31);
+	if(ix<0x32000000) {		/* |x|<2**-27, return -log(|x|) */
+	    if(ix==0)
+	        return one/vzero;
+	    return -__ieee754_logf(fabsf(x));
 	}
+
+    /* purge negative integers and start evaluation for other x < 0 */
 	if(hx<0) {
-	    if(ix>=0x4b000000) 	/* |x|>=2**23, must be -integer */
+	    *signgamp = 1;
+	    if(ix>=0x4b000000) 		/* |x|>=2**23, must be -integer */
 		return one/vzero;
 	    t = sin_pif(x);
 	    if(t==zero) return one/vzero; /* -integer */
@ -153,7 +153,7 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 	    x = -x;
 	}

-    /* purge off 1 and 2 */
+    /* purge 1 and 2 */
 	if (ix==0x3f800000||ix==0x40000000) r = 0;
    /* for x < 2.0 */
 	else if(ix<0x40000000) {
@ -174,17 +174,18 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 		p1 = a0+z*(a2+z*a4);
 		p2 = z*(a1+z*(a3+z*a5));
 		p  = y*p1+p2;
-		r  += (p-y/2); break;
+		r  += p-y/2; break;
 	      case 1:
 		p = t0+y*t1+y*y*(t2+y*(t3+y*(t4+y*(t5+y*(t6+y*t7)))));
-		r += (tf + p); break;
+		r += tf + p; break;
 	      case 2:
 		p1 = y*(u0+y*(u1+y*u2));
 		p2 = one+y*(v1+y*(v2+y*v3));
-		r += (p1/p2-y/2);
+		r += p1/p2-y/2;
 	    }
 	}
-	else if(ix<0x41000000) { 			/* x < 8.0 */
+    /* x < 8.0 */
+	else if(ix<0x41000000) {
 	    i = x;
 	    y = x-i;
 	    p = y*(s0+y*(s1+y*(s2+y*s3)));
@ -199,15 +200,15 @@ __ieee754_lgammaf_r(float x, int *signgamp)
 	    case 3: z *= (y+2);		/* FALLTHRU */
 		    r += __ieee754_logf(z); break;
 	    }
-    /* 8.0 <= x < 2**24 */
-	} else if (ix < 0x4b800000) {
+    /* 8.0 <= x < 2**27 */
+	} else if (ix < 0x4d000000) {
 	    t = __ieee754_logf(x);
 	    z = one/x;
 	    y = z*z;
 	    w = w0+z*(w1+y*w2);
 	    r = (x-half)*(t-one)+w;
 	} else
-    /* 2**24 <= x <= inf */
+    /* 2**27 <= x <= inf */
 	    r =  x*(__ieee754_logf(x)-one);
 	if(hx<0) r = nadj - r;
 	return r;