Make expr POSIX-compliant, and fix some bugs. Specifically:

- expr must conform to the Utility Syntax Guidelines, so use
getopt() to eat the (non-existent) options.

- Use the Standard type intmax_t for arithmetic.

- If an argument cannot be *completely* converted to an integer, then
it is a string.

Additionally make some style cleanups near the modified lines.  This
utility is still not completely style-compliant.
This commit is contained in:
wollman 2002-03-22 20:18:26 +00:00
parent 7a9725738b
commit 4e608fc32e
2 changed files with 214 additions and 103 deletions

View File

@ -30,7 +30,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd July 3, 1993
.Dd March 22, 2002
.Dt EXPR 1
.Os
.Sh NAME
@ -38,6 +38,7 @@
.Nd evaluate expression
.Sh SYNOPSIS
.Nm
.Op Fl \&-
.Ar expression
.Sh DESCRIPTION
The
@ -46,12 +47,21 @@ utility evaluates
.Ar expression
and writes the result on standard output.
.Pp
All operators are separate arguments to the
.Nm
utility.
Characters special to the command interpreter must be escaped.
All operators and operands must be passed as separate arguments.
Several of the operators have special meaning to command interpreters
and must therefore be quoted appropriately.
.Pp
Operators are listed below in order of increasing precedence.
Arithmetic operations are performed using signed integer math,
in the largest integral type available in the C language. The
.Nm
utility will detect arithmetic overflow and division by zero, and
returns with an exit status of 2 in those cases. If a numeric operand
is specified which is so large as to overflow conversion to an integer,
it is parsed as a string instead. All numeric operands are interpreted
in base 10.
.Pp
Operators are listed below in order of increasing precedence; all
are left-associative.
Operators with equal precedence are grouped within { } symbols.
.Bl -tag -width indent
.It Ar expr1 Li | Ar expr2
@ -82,8 +92,9 @@ operator matches
.Ar expr1
against
.Ar expr2 ,
which must be a regular expression. The regular expression is anchored
to the beginning of the string with an implicit
which must be a basic regular expression.
The regular expression is anchored
to the beginning of the string with an implicit
.Dq ^ .
.Pp
If the match succeeds and the pattern contains at least one regular
@ -99,19 +110,89 @@ otherwise 0.
.El
.Pp
Parentheses are used for grouping in the usual manner.
.Pp
This version of
.Nm
adheres to the
.Tn POSIX
Utility Syntax Guidelines, which require that a leading argument beginning
with a minus sign be considered an option to the program.
The standard
.Ql \&--
syntax may be used to prevent this interpretation.
However, many historic implementations of
.Nm ,
including the one in previous versions of
.Fx ,
will not permit this syntax.
See the examples below for portable ways to guarantee the correct
interpretation.
.Pp
The
.Nm
utility makes no lexical distinction between arguments which may be
operators and arguments which may be operands.
An operand which is lexically identical to an operator will be considered a
syntax error.
See the examples below for a work-around.
.Pp
The syntax of the
.Nm
command in general is historic and inconvenient.
New applications are advised to use shell arithmetic rather than
.Nm .
.Sh EXAMPLES
.Bl -enum
.It
The following example adds one to the variable a.
.Dl a=`expr $a + 1`
The following example (in
.Xr sh 1
syntax) adds one to the variable
.Va a .
.Dl a=$(expr $a + 1)
.Li
This will fail if the value if
.Va a
is a negative number.
To protect negative values of
.Va a
from being interpreted as options to the
.Nm
command, one might rearrange the expression:
.Dl a=$(expr 1 + $a)
.Li
More generally, parenthesize possibly-negative values:
.Dl a=$(expr \e( $a \e) + 1)
.It
The following example returns the filename portion of a pathname stored
in variable a. The // characters act to eliminate ambiguity with the
division operator.
.Dl expr "//$a" Li : '.*/\e(.*\e)'
The following example prints the filename portion of a pathname stored
in variable
.Va a .
Since
.Va a
might represent the path
.Pa / ,
it is necessary to prevent it from being interpreted as the division operator.
The
.Li //
characters resolve this ambiguity.
.Dl expr \*q//$a\*q \&: '.*/\e(.*\e)'
.It
The following example returns the number of characters in variable a.
.Dl expr $a Li : '.*'
The following examples output the number of characters in variable
.Va a .
Again, if
.Va a
might begin with a hyphen, it is necessary to prevent it from being
interpreted as an option to
.Nm .
If the
.Nm
command conforms to
.St -p1003.1-2001 ,
this is simple:
.Dl expr -- \*q$a\*q \&: \*q.*\*q
.Li
For portability to older systems, however, a more complicated command
is required:
.Dl expr \e( \*qX$a\*q \&: \*q.*\*q \e) - 1
.El
.Sh DIAGNOSTICS
The
@ -132,4 +213,4 @@ the expression is invalid.
The
.Nm
utility conforms to
.St -p1003.2 .
.St -p1003.1-2001 .

View File

@ -8,16 +8,25 @@
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <regex.h>
#include <inttypes.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <unistd.h>
/*
* POSIX specifies a specific error code for syntax errors. We exit
* with this code for all errors.
*/
#define ERR_EXIT 2
enum valtype {
integer, numeric_string, string
} ;
@ -26,20 +35,20 @@ struct val {
enum valtype type;
union {
char *s;
quad_t i;
intmax_t i;
} u;
} ;
struct val *result;
int chk_div(quad_t, quad_t);
int chk_minus(quad_t, quad_t, quad_t);
int chk_plus(quad_t, quad_t, quad_t);
int chk_times(quad_t, quad_t, quad_t);
int chk_div(intmax_t, intmax_t);
int chk_minus(intmax_t, intmax_t, intmax_t);
int chk_plus(intmax_t, intmax_t, intmax_t);
int chk_times(intmax_t, intmax_t, intmax_t);
void free_value(struct val *);
int is_zero_or_null(struct val *);
int isstring(struct val *);
struct val *make_integer(quad_t);
struct val *make_integer(intmax_t);
struct val *make_str(const char *);
struct val *op_and(struct val *, struct val *);
struct val *op_colon(struct val *, struct val *);
@ -55,7 +64,7 @@ struct val *op_or(struct val *, struct val *);
struct val *op_plus(struct val *, struct val *);
struct val *op_rem(struct val *, struct val *);
struct val *op_times(struct val *, struct val *);
quad_t to_integer(struct val *);
intmax_t to_integer(struct val *);
void to_string(struct val *);
int yyerror(const char *);
int yylex(void);
@ -105,13 +114,13 @@ expr: TOKEN
%%
struct val *
make_integer(quad_t i)
make_integer(intmax_t i)
{
struct val *vp;
vp = (struct val *) malloc (sizeof (*vp));
if (vp == NULL) {
errx (2, "malloc() failed");
errx(ERR_EXIT, "malloc() failed");
}
vp->type = integer;
@ -123,26 +132,34 @@ struct val *
make_str(const char *s)
{
struct val *vp;
size_t i;
int isint;
char *ep;
vp = (struct val *) malloc (sizeof (*vp));
if (vp == NULL || ((vp->u.s = strdup (s)) == NULL)) {
errx (2, "malloc() failed");
errx(ERR_EXIT, "malloc() failed");
}
for(i = 1, isint = isdigit(s[0]) || s[0] == '-';
isint && i < strlen(s);
i++)
{
if(!isdigit(s[i]))
isint = 0;
}
/*
* Previously we tried to scan the string to see if it ``looked like''
* an integer (erroneously, as it happened). Let strtoimax() do the
* dirty work. We could cache the value, except that we are using
* a union and need to preserve the original string form until we
* are certain that it is not needed.
*
* IEEE Std.1003.1-2001 says:
* /integer/ An argument consisting only of an (optional) unary minus
* followed by digits.
*
* This means that arguments which consist of digits followed by
* non-digits MUST NOT be considered integers. strtoimax() will
* figure this out for us.
*/
(void)strtoimax(s, &ep, 10);
if (isint)
vp->type = numeric_string;
else
if (*ep != '\0')
vp->type = string;
else
vp->type = numeric_string;
return vp;
}
@ -156,10 +173,10 @@ free_value(struct val *vp)
}
quad_t
intmax_t
to_integer(struct val *vp)
{
quad_t i;
intmax_t i;
if (vp->type == integer)
return 1;
@ -169,10 +186,10 @@ to_integer(struct val *vp)
/* vp->type == numeric_string, make it numeric */
errno = 0;
i = strtoq(vp->u.s, (char**)NULL, 10);
if (errno != 0) {
errx (2, "overflow");
}
i = strtoimax(vp->u.s, (char **)NULL, 10);
if (errno == ERANGE)
err(ERR_EXIT, NULL);
free (vp->u.s);
vp->u.i = i;
vp->type = integer;
@ -187,12 +204,17 @@ to_string(struct val *vp)
if (vp->type == string || vp->type == numeric_string)
return;
tmp = malloc ((size_t)25);
if (tmp == NULL) {
errx (2, "malloc() failed");
}
/*
* log_10(x) ~= 0.3 * log_2(x). Rounding up gives the number
* of digits; add one each for the sign and terminating null
* character, respectively.
*/
#define NDIGITS(x) (3 * (sizeof(x) * CHAR_BIT) / 10 + 1 + 1 + 1)
tmp = malloc(NDIGITS(vp->u.i));
if (tmp == NULL)
errx(ERR_EXIT, "malloc() failed");
sprintf (tmp, "%lld", (long long)vp->u.i);
sprintf(tmp, "%jd", vp->u.i);
vp->type = string;
vp->u.s = tmp;
}
@ -243,26 +265,34 @@ is_zero_or_null(struct val *vp)
}
int
main(int argc __unused, char *argv[])
main(int argc, char *argv[])
{
int c;
setlocale (LC_ALL, "");
while ((c = getopt(argc, argv, "")) != -1)
switch (c) {
default:
fprintf(stderr, "usage: expr [--] expression\n");
exit(ERR_EXIT);
}
av = argv + 1;
av = argv + optind;
yyparse ();
yyparse();
if (result->type == integer)
printf ("%lld\n", (long long)result->u.i);
printf("%jd\n", result->u.i);
else
printf ("%s\n", result->u.s);
printf("%s\n", result->u.s);
return (is_zero_or_null (result));
return (is_zero_or_null(result));
}
int
yyerror(const char *s __unused)
{
errx (2, "syntax error");
errx(ERR_EXIT, "syntax error");
}
@ -284,7 +314,7 @@ op_and(struct val *a, struct val *b)
if (is_zero_or_null (a) || is_zero_or_null (b)) {
free_value (a);
free_value (b);
return (make_integer ((quad_t)0));
return (make_integer ((intmax_t)0));
} else {
free_value (b);
return (a);
@ -299,11 +329,11 @@ op_eq(struct val *a, struct val *b)
if (isstring (a) || isstring (b)) {
to_string (a);
to_string (b);
r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) == 0));
r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) == 0));
} else {
(void)to_integer(a);
(void)to_integer(b);
r = make_integer ((quad_t)(a->u.i == b->u.i));
r = make_integer ((intmax_t)(a->u.i == b->u.i));
}
free_value (a);
@ -319,11 +349,11 @@ op_gt(struct val *a, struct val *b)
if (isstring (a) || isstring (b)) {
to_string (a);
to_string (b);
r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) > 0));
r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) > 0));
} else {
(void)to_integer(a);
(void)to_integer(b);
r = make_integer ((quad_t)(a->u.i > b->u.i));
r = make_integer ((intmax_t)(a->u.i > b->u.i));
}
free_value (a);
@ -339,11 +369,11 @@ op_lt(struct val *a, struct val *b)
if (isstring (a) || isstring (b)) {
to_string (a);
to_string (b);
r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) < 0));
r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) < 0));
} else {
(void)to_integer(a);
(void)to_integer(b);
r = make_integer ((quad_t)(a->u.i < b->u.i));
r = make_integer ((intmax_t)(a->u.i < b->u.i));
}
free_value (a);
@ -359,11 +389,11 @@ op_ge(struct val *a, struct val *b)
if (isstring (a) || isstring (b)) {
to_string (a);
to_string (b);
r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) >= 0));
r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) >= 0));
} else {
(void)to_integer(a);
(void)to_integer(b);
r = make_integer ((quad_t)(a->u.i >= b->u.i));
r = make_integer ((intmax_t)(a->u.i >= b->u.i));
}
free_value (a);
@ -379,11 +409,11 @@ op_le(struct val *a, struct val *b)
if (isstring (a) || isstring (b)) {
to_string (a);
to_string (b);
r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) <= 0));
r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) <= 0));
} else {
(void)to_integer(a);
(void)to_integer(b);
r = make_integer ((quad_t)(a->u.i <= b->u.i));
r = make_integer ((intmax_t)(a->u.i <= b->u.i));
}
free_value (a);
@ -399,11 +429,11 @@ op_ne(struct val *a, struct val *b)
if (isstring (a) || isstring (b)) {
to_string (a);
to_string (b);
r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) != 0));
r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) != 0));
} else {
(void)to_integer(a);
(void)to_integer(b);
r = make_integer ((quad_t)(a->u.i != b->u.i));
r = make_integer ((intmax_t)(a->u.i != b->u.i));
}
free_value (a);
@ -412,7 +442,7 @@ op_ne(struct val *a, struct val *b)
}
int
chk_plus(quad_t a, quad_t b, quad_t r)
chk_plus(intmax_t a, intmax_t b, intmax_t r)
{
/* sum of two positive numbers must be positive */
if (a > 0 && b > 0 && r <= 0)
@ -430,12 +460,12 @@ op_plus(struct val *a, struct val *b)
struct val *r;
if (!to_integer (a) || !to_integer (b)) {
errx (2, "non-numeric argument");
errx(ERR_EXIT, "non-numeric argument");
}
r = make_integer (/*(quad_t)*/(a->u.i + b->u.i));
r = make_integer (/*(intmax_t)*/(a->u.i + b->u.i));
if (chk_plus (a->u.i, b->u.i, r->u.i)) {
errx (2, "overflow");
errx(ERR_EXIT, "overflow");
}
free_value (a);
free_value (b);
@ -443,16 +473,16 @@ op_plus(struct val *a, struct val *b)
}
int
chk_minus(quad_t a, quad_t b, quad_t r)
chk_minus(intmax_t a, intmax_t b, intmax_t r)
{
/* special case subtraction of QUAD_MIN */
if (b == QUAD_MIN) {
/* special case subtraction of INTMAX_MIN */
if (b == INTMAX_MIN) {
if (a >= 0)
return 1;
else
return 0;
}
/* this is allowed for b != QUAD_MIN */
/* this is allowed for b != INTMAX_MIN */
return chk_plus (a, -b, r);
}
@ -462,12 +492,12 @@ op_minus(struct val *a, struct val *b)
struct val *r;
if (!to_integer (a) || !to_integer (b)) {
errx (2, "non-numeric argument");
errx(ERR_EXIT, "non-numeric argument");
}
r = make_integer (/*(quad_t)*/(a->u.i - b->u.i));
r = make_integer (/*(intmax_t)*/(a->u.i - b->u.i));
if (chk_minus (a->u.i, b->u.i, r->u.i)) {
errx (2, "overflow");
errx(ERR_EXIT, "overflow");
}
free_value (a);
free_value (b);
@ -475,7 +505,7 @@ op_minus(struct val *a, struct val *b)
}
int
chk_times(quad_t a, quad_t b, quad_t r)
chk_times(intmax_t a, intmax_t b, intmax_t r)
{
/* special case: first operand is 0, no overflow possible */
if (a == 0)
@ -492,12 +522,12 @@ op_times(struct val *a, struct val *b)
struct val *r;
if (!to_integer (a) || !to_integer (b)) {
errx (2, "non-numeric argument");
errx(ERR_EXIT, "non-numeric argument");
}
r = make_integer (/*(quad_t)*/(a->u.i * b->u.i));
r = make_integer (/*(intmax_t)*/(a->u.i * b->u.i));
if (chk_times (a->u.i, b->u.i, r->u.i)) {
errx (2, "overflow");
errx(ERR_EXIT, "overflow");
}
free_value (a);
free_value (b);
@ -505,11 +535,11 @@ op_times(struct val *a, struct val *b)
}
int
chk_div(quad_t a, quad_t b)
chk_div(intmax_t a, intmax_t b)
{
/* div by zero has been taken care of before */
/* only QUAD_MIN / -1 causes overflow */
if (a == QUAD_MIN && b == -1)
/* only INTMAX_MIN / -1 causes overflow */
if (a == INTMAX_MIN && b == -1)
return 1;
/* everything else is OK */
return 0;
@ -521,16 +551,16 @@ op_div(struct val *a, struct val *b)
struct val *r;
if (!to_integer (a) || !to_integer (b)) {
errx (2, "non-numeric argument");
errx(ERR_EXIT, "non-numeric argument");
}
if (b->u.i == 0) {
errx (2, "division by zero");
errx(ERR_EXIT, "division by zero");
}
r = make_integer (/*(quad_t)*/(a->u.i / b->u.i));
r = make_integer (/*(intmax_t)*/(a->u.i / b->u.i));
if (chk_div (a->u.i, b->u.i)) {
errx (2, "overflow");
errx(ERR_EXIT, "overflow");
}
free_value (a);
free_value (b);
@ -543,14 +573,14 @@ op_rem(struct val *a, struct val *b)
struct val *r;
if (!to_integer (a) || !to_integer (b)) {
errx (2, "non-numeric argument");
errx(ERR_EXIT, "non-numeric argument");
}
if (b->u.i == 0) {
errx (2, "division by zero");
errx(ERR_EXIT, "division by zero");
}
r = make_integer (/*(quad_t)*/(a->u.i % b->u.i));
r = make_integer (/*(intmax_t)*/(a->u.i % b->u.i));
/* chk_rem necessary ??? */
free_value (a);
free_value (b);
@ -573,7 +603,7 @@ op_colon(struct val *a, struct val *b)
/* compile regular expression */
if ((eval = regcomp (&rp, b->u.s, 0)) != 0) {
regerror (eval, &rp, errbuf, sizeof(errbuf));
errx (2, "%s", errbuf);
errx(ERR_EXIT, "%s", errbuf);
}
/* compare string against pattern */
@ -584,11 +614,11 @@ op_colon(struct val *a, struct val *b)
v = make_str (a->u.s + rm[1].rm_so);
} else {
v = make_integer ((quad_t)(rm[0].rm_eo - rm[0].rm_so));
v = make_integer ((intmax_t)(rm[0].rm_eo - rm[0].rm_so));
}
} else {
if (rp.re_nsub == 0) {
v = make_integer ((quad_t)0);
v = make_integer ((intmax_t)0);
} else {
v = make_str ("");
}