Make /bin/expr support 64bit numeric range and range checks by default,

again. This brings back the behaviour of expr in FreeBSD-4, which had been reverted due to an assumed incompatbility with POSIX.1 for FreeBSD-5. This issue has been discussed in the freebsd-standards list, and the consensus was, that POSIX.1 is in fact not violated by this extension, since it affects only cases of POSIX undefined behaviour (overflow of signed long). Other operating systems did upgrade their versions of expr to support 64bit range, after it had been initially brought to FreeBSD. They have used it for a decade without problems, meanwhile. The -e option is retained, but it will only select less strict checking of numeric parameters (leading white-space, leading "+" are allowed and skipped, an empty string is considered to represent 0 in numeric context.) The call of check_utility_compat() as a means of establishing backwards compatibility with FreeBSD-4 is considered obsolete, but preserved in this commit. It is expected to be removed in a later revision of this file. Reviewed by: bde, das, jilles MFC after: 2 month (those parts that do not violate POLA)
svn path=/head/; revision=223881
2011-07-09 12:05:53 +00:00 · 2011-07-09 12:05:53 +00:00 · fa717604a4 · 2020-12-20 02:59:44 +00:00
commit fa717604a4
parent 55f0bfbadc
2 changed files with 154 additions and 142 deletions
--- a/bin/expr/expr.1
+++ b/bin/expr/expr.1
@ -50,25 +50,25 @@ and writes the result on standard output.
 All operators and operands must be passed as separate arguments.
 Several of the operators have special meaning to command interpreters
 and must therefore be quoted appropriately.
-All integer operands are interpreted in base 10.
-.Pp
-Arithmetic operations are performed using signed integer math.
-If the
-.Fl e
-flag is specified, arithmetic uses the C
-.Vt intmax_t
-data type (the largest integral type available), and
+All integer operands are interpreted in base 10 and must consist of only
+an optional leading minus sign followed by one or more digits (unless
+less strict parsing has been enabled for backwards compatibilty with
+prior versions of
 .Nm
-will detect arithmetic overflow and return an error indication.
-If a numeric operand is specified which is so large as to overflow
-conversion to an integer, it is parsed as a string instead.
-If
+in
+.Fx ) .
+.Pp
+Arithmetic operations are performed using signed integer math with a
+range according to the C
+.Vt intmax_t
+data type (the largest signed integral type available).
+All conversions and operations are checked for overflow.
+Overflow results in program termination with an error message on stdout
+and with an error status.
+.Pp
+The
 .Fl e
-is not specified, arithmetic operations and parsing of integer
-arguments will overflow silently according to the rules of the C
-standard, using the
-.Vt long
-data type.
+option enables backwards compatible behaviour as detailed below.
 .Pp
 Operators are listed below in order of increasing precedence; all
 are left-associative.
@ -82,7 +82,9 @@ Return the evaluation of
 .Ar expr1
 if it is neither an empty string nor zero;
 otherwise, returns the evaluation of
-.Ar expr2 .
+.Ar expr2
+if it is not an empty string;
+otherwise, returns zero.
 .It Ar expr1 Li & Ar expr2
 Return the evaluation of
 .Ar expr1
@ -163,25 +165,26 @@ function (with a
 .Fa utility
 argument of
 .Dq Li expr )
-is used to determine whether compatibility mode should be enabled.
+is used to determine whether backwards compatibility mode should be enabled.
 This feature is intended for use as a transition and debugging aid, when
 .Nm
 is used in complex scripts which cannot easily be recast to avoid the
 non-portable usage.
-Enabling compatibility mode
-also implicitly enables the
+Enabling backwards compatibility mode also implicitly enables the
 .Fl e
 option, since this matches the historic behavior of
 .Nm
 in
-.Fx .
+.Fx . This option makes number parsing less strict and permits leading
+white space and an optional leading plus sign. In addition, empty operands
+have an implied value of zero in numeric context.
 For historical reasons, defining the environment variable
 .Ev EXPR_COMPAT
-also enables compatibility mode.
+also enables backwards compatibility mode.
 .Sh ENVIRONMENT
 .Bl -tag -width ".Ev EXPR_COMPAT"
 .It Ev EXPR_COMPAT
-If set, enables compatibility mode.
+If set, enables backwards compatibility mode.
 .El
 .Sh EXIT STATUS
 The
@ -270,8 +273,37 @@ expands to the required number.
 The
 .Nm
 utility conforms to
-.St -p1003.1-2001 ,
-provided that compatibility mode is not enabled.
+.St -p1003.1-2008 ,
+provided that backwards compatibility mode is not enabled.
+.Pp
+Backwards compatibility mode performs less strict checks of numeric arguments:
+.Bl -bullet
+.It
+An empty operand string is interpreted as 0.
+.El
+.Bl -bullet
+.It
+Leading white space and/or a plus sign before an otherwise valid positive
+numberic operand are allowed and will be ignored.
+.El
+.Pp
+The extended arithmetic range and overflow checks do not conflict with
+POSIX's requirement that arithmetic be done using signed longs, since
+they only make a difference to the result in cases where using signed
+longs would give undefined behavior.
+.Pp
+According to the
+.Tn POSIX
+standard, the use of string arguments
+.Va length ,
+.Va substr ,
+.Va index ,
+or
+.Va match
+produces undefined results. In this version of
+.Nm ,
+these arguments are treated just as their respective string values.
+.Pp
 The
 .Fl e
 flag is an extension.
--- a/bin/expr/expr.y
+++ b/bin/expr/expr.y
@ -42,13 +42,15 @@ struct val {

 struct val *result;

+void		assert_to_integer(struct val *);
 int		chk_div(intmax_t, intmax_t);
 int		chk_minus(intmax_t, intmax_t, intmax_t);
 int		chk_plus(intmax_t, intmax_t, intmax_t);
 int		chk_times(intmax_t, intmax_t, intmax_t);
 void		free_value(struct val *);
-int		is_zero_or_null(struct val *);
+int		is_integer(const char *);
 int		isstring(struct val *);
+int		is_zero_or_null(struct val *);
 struct val	*make_integer(intmax_t);
 struct val	*make_str(const char *);
 struct val	*op_and(struct val *, struct val *);
@ -65,13 +67,13 @@ struct val	*op_or(struct val *, struct val *);
 struct val	*op_plus(struct val *, struct val *);
 struct val	*op_rem(struct val *, struct val *);
 struct val	*op_times(struct val *, struct val *);
-intmax_t	to_integer(struct val *);
+int		to_integer(struct val *);
 void		to_string(struct val *);
 int		yyerror(const char *);
 int		yylex(void);
 int		yyparse(void);

-static int	eflag;
+static int	nonposix;
 char **av;
 %}

@ -134,37 +136,16 @@ struct val *
 make_str(const char *s)
 {
 	struct val *vp;
-	char *ep;

 	vp = (struct val *) malloc (sizeof (*vp));
 	if (vp == NULL || ((vp->u.s = strdup (s)) == NULL)) {
 		errx(ERR_EXIT, "malloc() failed");
 	}

-	/*
-	 * Previously we tried to scan the string to see if it ``looked like''
-	 * an integer (erroneously, as it happened).  Let strtoimax() do the
-	 * dirty work.  We could cache the value, except that we are using
-	 * a union and need to preserve the original string form until we
-	 * are certain that it is not needed.
-	 *
-	 * IEEE Std.1003.1-2001 says:
-	 * /integer/ An argument consisting only of an (optional) unary minus  
-	 *	     followed by digits.          
-	 *
-	 * This means that arguments which consist of digits followed by
-	 * non-digits MUST NOT be considered integers.  strtoimax() will
-	 * figure this out for us.
-	 */
-	if (eflag)
-		(void)strtoimax(s, &ep, 10);
-	else
-		(void)strtol(s, &ep, 10);
-
-	if (*ep != '\0')
-		vp->type = string;
-	else	
+	if (is_integer(s))
 		vp->type = numeric_string;
+	else
+		vp->type = string;

 	return vp;
 }
@ -178,31 +159,33 @@ free_value(struct val *vp)
 }


-intmax_t
+int
 to_integer(struct val *vp)
 {
 	intmax_t i;

-	if (vp->type == integer)
-		return 1;
-
-	if (vp->type == string)
-		return 0;
-
-	/* vp->type == numeric_string, make it numeric */
-	errno = 0;
-	if (eflag) {
+	/* we can only convert numeric_string to integer, here */
+	if (vp->type == numeric_string) {
+		errno = 0;
 		i  = strtoimax(vp->u.s, (char **)NULL, 10);
-		if (errno == ERANGE)
-			err(ERR_EXIT, NULL);
-	} else {
-		i = strtol(vp->u.s, (char **)NULL, 10);
+		/* just keep as numeric_string, if the conversion fails */
+		if (errno != ERANGE) {
+			free (vp->u.s);
+			vp->u.i = i;
+			vp->type = integer;
+		}
 	}
+	return (vp->type == integer);
+}

-	free (vp->u.s);
-	vp->u.i = i;
-	vp->type = integer;
-	return 1;
+
+void
+assert_to_integer(struct val *vp)
+{
+	if (vp->type == string)
+		errx(ERR_EXIT, "not a decimal number: '%s'", vp->u.s);
+	if (!to_integer(vp))
+		errx(ERR_EXIT, "operand too large: '%s'", vp->u.s);
 }

 void
@ -229,6 +212,25 @@ to_string(struct val *vp)
 }


+int
+is_integer(const char *s)
+{
+	if (nonposix) {
+		if (*s == '\0')
+			return (1);
+		while (isspace((unsigned char)*s))
+			s++;
+	}
+	if (*s == '-' || (nonposix && *s == '+'))
+		s++;
+	if (*s == '\0')
+		return (0);
+	while (isdigit((unsigned char)*s))
+		s++;
+	return (*s == '\0');
+}
+
+
 int
 isstring(struct val *vp)
 {
@ -282,12 +284,12 @@ main(int argc, char *argv[])
 	if (getenv("EXPR_COMPAT") != NULL
 	    || check_utility_compat("expr")) {
 		av = argv + 1;
-		eflag = 1;
+		nonposix = 1;
 	} else {
 		while ((c = getopt(argc, argv, "e")) != -1)
 			switch (c) {
 			case 'e':
-				eflag = 1;
+				nonposix = 1;
 				break;

 			default:
@ -318,15 +320,17 @@ yyerror(const char *s __unused)
 struct val *
 op_or(struct val *a, struct val *b)
 {
-	if (is_zero_or_null (a)) {
-		free_value (a);
-		return (b);
-	} else {
-		free_value (b);
+	if (!is_zero_or_null(a)) {
+		free_value(b);
 		return (a);
 	}
+	free_value(a);
+	if (!is_zero_or_null(b))
+		return (b);
+	free_value(b);
+	return (make_integer((intmax_t)0));
 }
-		
+
 struct val *
 op_and(struct val *a, struct val *b)
 {
@ -350,8 +354,8 @@ op_eq(struct val *a, struct val *b)
 		to_string (b);	
 		r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) == 0));
 	} else {
-		(void)to_integer(a);
-		(void)to_integer(b);
+		assert_to_integer(a);
+		assert_to_integer(b);
 		r = make_integer ((intmax_t)(a->u.i == b->u.i));
 	}

@ -370,8 +374,8 @@ op_gt(struct val *a, struct val *b)
 		to_string (b);
 		r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) > 0));
 	} else {
-		(void)to_integer(a);
-		(void)to_integer(b);
+		assert_to_integer(a);
+		assert_to_integer(b);
 		r = make_integer ((intmax_t)(a->u.i > b->u.i));
 	}

@ -390,8 +394,8 @@ op_lt(struct val *a, struct val *b)
 		to_string (b);
 		r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) < 0));
 	} else {
-		(void)to_integer(a);
-		(void)to_integer(b);
+		assert_to_integer(a);
+		assert_to_integer(b);
 		r = make_integer ((intmax_t)(a->u.i < b->u.i));
 	}

@ -410,8 +414,8 @@ op_ge(struct val *a, struct val *b)
 		to_string (b);
 		r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) >= 0));
 	} else {
-		(void)to_integer(a);
-		(void)to_integer(b);
+		assert_to_integer(a);
+		assert_to_integer(b);
 		r = make_integer ((intmax_t)(a->u.i >= b->u.i));
 	}

@ -430,8 +434,8 @@ op_le(struct val *a, struct val *b)
 		to_string (b);
 		r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) <= 0));
 	} else {
-		(void)to_integer(a);
-		(void)to_integer(b);
+		assert_to_integer(a);
+		assert_to_integer(b);
 		r = make_integer ((intmax_t)(a->u.i <= b->u.i));
 	}

@ -450,8 +454,8 @@ op_ne(struct val *a, struct val *b)
 		to_string (b);
 		r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) != 0));
 	} else {
-		(void)to_integer(a);
-		(void)to_integer(b);
+		assert_to_integer(a);
+		assert_to_integer(b);
 		r = make_integer ((intmax_t)(a->u.i != b->u.i));
 	}

@ -479,17 +483,13 @@ op_plus(struct val *a, struct val *b)
 {
 	struct val *r;

-	if (!to_integer(a) || !to_integer(b)) {
-		errx(ERR_EXIT, "non-numeric argument");
-	}
+	assert_to_integer(a);
+	assert_to_integer(b);

-	if (eflag) {
-		r = make_integer(a->u.i + b->u.i);
-		if (chk_plus(a->u.i, b->u.i, r->u.i)) {
-			errx(ERR_EXIT, "overflow");
-		}
-	} else
-		r = make_integer((long)a->u.i + (long)b->u.i);
+	r = make_integer(a->u.i + b->u.i);
+	if (chk_plus(a->u.i, b->u.i, r->u.i)) {
+		errx(ERR_EXIT, "overflow");
+	}

 	free_value (a);
 	free_value (b);
@ -516,17 +516,13 @@ op_minus(struct val *a, struct val *b)
 {
 	struct val *r;

-	if (!to_integer(a) || !to_integer(b)) {
-		errx(ERR_EXIT, "non-numeric argument");
-	}
+	assert_to_integer(a);
+	assert_to_integer(b);

-	if (eflag) {
-		r = make_integer(a->u.i - b->u.i);
-		if (chk_minus(a->u.i, b->u.i, r->u.i)) {
-			errx(ERR_EXIT, "overflow");
-		}
-	} else
-		r = make_integer((long)a->u.i - (long)b->u.i);
+	r = make_integer(a->u.i - b->u.i);
+	if (chk_minus(a->u.i, b->u.i, r->u.i)) {
+		errx(ERR_EXIT, "overflow");
+	}

 	free_value (a);
 	free_value (b);
@ -550,17 +546,13 @@ op_times(struct val *a, struct val *b)
 {
 	struct val *r;

-	if (!to_integer(a) || !to_integer(b)) {
-		errx(ERR_EXIT, "non-numeric argument");
-	}
+	assert_to_integer(a);
+	assert_to_integer(b);

-	if (eflag) {
-		r = make_integer(a->u.i * b->u.i);
-		if (chk_times(a->u.i, b->u.i, r->u.i)) {
-			errx(ERR_EXIT, "overflow");
-		}
-	} else
-		r = make_integer((long)a->u.i * (long)b->u.i);
+	r = make_integer(a->u.i * b->u.i);
+	if (chk_times(a->u.i, b->u.i, r->u.i)) {
+		errx(ERR_EXIT, "overflow");
+	}

 	free_value (a);
 	free_value (b);
@ -583,21 +575,16 @@ op_div(struct val *a, struct val *b)
 {
 	struct val *r;

-	if (!to_integer(a) || !to_integer(b)) {
-		errx(ERR_EXIT, "non-numeric argument");
-	}
+	assert_to_integer(a);
+	assert_to_integer(b);

 	if (b->u.i == 0) {
 		errx(ERR_EXIT, "division by zero");
 	}
-
-	if (eflag) {
-		r = make_integer(a->u.i / b->u.i);
-		if (chk_div(a->u.i, b->u.i)) {
-			errx(ERR_EXIT, "overflow");
-		}
-	} else
-		r = make_integer((long)a->u.i / (long)b->u.i);
+	if (chk_div(a->u.i, b->u.i)) {
+		errx(ERR_EXIT, "overflow");
+	}
+	r = make_integer(a->u.i / b->u.i);

 	free_value (a);
 	free_value (b);
@ -609,19 +596,12 @@ op_rem(struct val *a, struct val *b)
 {
 	struct val *r;

-	if (!to_integer(a) || !to_integer(b)) {
-		errx(ERR_EXIT, "non-numeric argument");
-	}
-
+	assert_to_integer(a);
+	assert_to_integer(b);
 	if (b->u.i == 0) {
 		errx(ERR_EXIT, "division by zero");
 	}
-
-	if (eflag)
-		r = make_integer(a->u.i % b->u.i);
-	        /* chk_rem necessary ??? */
-	else
-		r = make_integer((long)a->u.i % (long)b->u.i);
+	r = make_integer(a->u.i % b->u.i);

 	free_value (a);
 	free_value (b);