POSIX requires 'c-c' must conform collate and be in collation order

This commit is contained in:
Andrey A. Chernov 2003-08-03 03:51:27 +00:00
parent 00611f0457
commit a508a04d43
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=118372
3 changed files with 23 additions and 15 deletions

View File

@ -40,7 +40,7 @@
typedef struct {
enum { STRING1, STRING2 } which;
enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
enum { EOS, INFINITE, NORMAL, SEQUENCE,
SET, SET_UPPER, SET_LOWER } state;
int cnt; /* character count */
int lastch; /* last character */

View File

@ -92,13 +92,6 @@ next(s)
if (s->str[0] == '-' && genrange(s))
return (next(s));
return (1);
case RANGE:
if (s->cnt-- == 0) {
s->state = NORMAL;
return (next(s));
}
++s->lastch;
return (1);
case SEQUENCE:
if (s->cnt-- == 0) {
s->state = NORMAL;
@ -204,7 +197,7 @@ genclass(s)
errx(1, "unknown class %s", s->str);
if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
err(1, "malloc");
err(1, "genclass() malloc");
bzero(p, (NCHARS + 1) * sizeof(int));
for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
if ((func)(cnt))
@ -282,16 +275,28 @@ genrange(s)
{
int stopval;
char *savestart;
int n, cnt, *p;
savestart = s->str;
stopval = *++s->str == '\\' ? backslash(s) : (u_char)*s->str++;
if (stopval < (u_char)s->lastch) {
if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
s->str = savestart;
return (0);
}
s->cnt = stopval - s->lastch + 1;
s->state = RANGE;
--s->lastch;
if ((s->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
err(1, "genrange() malloc");
bzero(p, (NCHARS + 1) * sizeof(int));
for (cnt = 0; cnt < NCHARS; ++cnt)
if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 &&
charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
*p++ = cnt;
*p = OOBCH;
n = p - s->set;
s->cnt = 0;
s->state = SET;
if (n > 1)
mergesort(s->set, n, sizeof(*(s->set)), charcoll);
return (1);
}

View File

@ -162,7 +162,9 @@ values.
.Pp
A backslash followed by any other character maps to that character.
.It c-c
Represents the range of characters between the range endpoints, inclusively.
Represents the range of characters between the range endpoints, inclusive,
in ascending order,
as defined by the collation sequence.
.It [:class:]
Represents all characters belonging to the defined character class.
Class names are:
@ -199,7 +201,8 @@ Class names are:
With the exception of the ``upper'' and ``lower'' classes, characters
in the classes are in unspecified order.
In the ``upper'' and ``lower'' classes, characters are entered in
ascending order.
ascending order,
as defined by the collation sequence.
.Pp
For specific information as to which
.Tn ASCII