diff --git a/usr.bin/tr/extern.h b/usr.bin/tr/extern.h index a5ed577c09a6..f84d382f014f 100644 --- a/usr.bin/tr/extern.h +++ b/usr.bin/tr/extern.h @@ -40,7 +40,7 @@ typedef struct { enum { STRING1, STRING2 } which; - enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, + enum { EOS, INFINITE, NORMAL, SEQUENCE, SET, SET_UPPER, SET_LOWER } state; int cnt; /* character count */ int lastch; /* last character */ diff --git a/usr.bin/tr/str.c b/usr.bin/tr/str.c index f8a71378d5fa..7d91e7bc1754 100644 --- a/usr.bin/tr/str.c +++ b/usr.bin/tr/str.c @@ -92,13 +92,6 @@ next(s) if (s->str[0] == '-' && genrange(s)) return (next(s)); return (1); - case RANGE: - if (s->cnt-- == 0) { - s->state = NORMAL; - return (next(s)); - } - ++s->lastch; - return (1); case SEQUENCE: if (s->cnt-- == 0) { s->state = NORMAL; @@ -204,7 +197,7 @@ genclass(s) errx(1, "unknown class %s", s->str); if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) - err(1, "malloc"); + err(1, "genclass() malloc"); bzero(p, (NCHARS + 1) * sizeof(int)); for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) if ((func)(cnt)) @@ -282,16 +275,28 @@ genrange(s) { int stopval; char *savestart; + int n, cnt, *p; savestart = s->str; stopval = *++s->str == '\\' ? backslash(s) : (u_char)*s->str++; - if (stopval < (u_char)s->lastch) { + if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) { s->str = savestart; return (0); } - s->cnt = stopval - s->lastch + 1; - s->state = RANGE; - --s->lastch; + if ((s->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) + err(1, "genrange() malloc"); + bzero(p, (NCHARS + 1) * sizeof(int)); + for (cnt = 0; cnt < NCHARS; ++cnt) + if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 && + charcoll((const void *)&cnt, (const void *)&stopval) <= 0) + *p++ = cnt; + *p = OOBCH; + n = p - s->set; + + s->cnt = 0; + s->state = SET; + if (n > 1) + mergesort(s->set, n, sizeof(*(s->set)), charcoll); return (1); } diff --git a/usr.bin/tr/tr.1 b/usr.bin/tr/tr.1 index 77f2fea05640..9188685ab3f7 100644 --- a/usr.bin/tr/tr.1 +++ b/usr.bin/tr/tr.1 @@ -162,7 +162,9 @@ values. .Pp A backslash followed by any other character maps to that character. .It c-c -Represents the range of characters between the range endpoints, inclusively. +Represents the range of characters between the range endpoints, inclusive, +in ascending order, +as defined by the collation sequence. .It [:class:] Represents all characters belonging to the defined character class. Class names are: @@ -199,7 +201,8 @@ Class names are: With the exception of the ``upper'' and ``lower'' classes, characters in the classes are in unspecified order. In the ``upper'' and ``lower'' classes, characters are entered in -ascending order. +ascending order, +as defined by the collation sequence. .Pp For specific information as to which .Tn ASCII