This patch address two problems.

1st one is relatively minor: according our own manpage, upper and lower
classes must be sorted, but currently not.

2nd one is serious:
	tr '[:lower:]' '[:upper:]'
	(and vice versa) currently works only if upper and lower classes
	have exact the same number of elements. When it is not true, like for
	many ISO8859-x locales which have bigger amount of lowercase letters,
	tr may do nasty things.

	See this page
	http://www.opengroup.org/onlinepubs/007908799/xcu/tr.html
	for detailed description of desired tr behaviour in such cases.
This commit is contained in:
Andrey A. Chernov 2003-08-03 02:23:39 +00:00
parent 0d5a03997a
commit 00611f0457
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=118371
3 changed files with 71 additions and 21 deletions

View File

@ -40,7 +40,8 @@
typedef struct {
enum { STRING1, STRING2 } which;
enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
SET, SET_UPPER, SET_LOWER } state;
int cnt; /* character count */
int lastch; /* last character */
int equiv[NCHARS]; /* equivalence set */
@ -49,3 +50,5 @@ typedef struct {
} STR;
int next(STR *);
int charcoll(const void *, const void *);

View File

@ -106,6 +106,8 @@ next(s)
}
return (1);
case SET:
case SET_UPPER:
case SET_LOWER:
if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
s->state = NORMAL;
return (next(s));
@ -194,7 +196,7 @@ genclass(s)
{
int cnt, (*func)(int);
CLASS *cp, tmp;
int *p;
int *p, n;
tmp.name = s->str;
if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
@ -208,10 +210,18 @@ genclass(s)
if ((func)(cnt))
*p++ = cnt;
*p = OOBCH;
n = p - cp->set;
s->cnt = 0;
s->state = SET;
s->set = cp->set;
if (strcmp(s->str, "upper") == 0)
s->state = SET_UPPER;
else if (strcmp(s->str, "lower") == 0) {
s->state = SET_LOWER;
} else
s->state = SET;
if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1)
mergesort(s->set, n, sizeof(*(s->set)), charcoll);
}
static int

View File

@ -101,7 +101,6 @@ static int string1[NCHARS] = {
STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
static int charcoll(const void *, const void *);
static void setup(int *, char *, STR *, int, int);
static void usage(void);
@ -224,20 +223,55 @@ main(int argc, char **argv)
if (!next(&s2))
errx(1, "empty string2");
ch = s2.lastch;
/* If string2 runs out of characters, use the last one specified. */
if (sflag)
while (next(&s1)) {
string1[s1.lastch] = ch = s2.lastch;
string2[ch] = 1;
(void)next(&s2);
}
else
while (next(&s1)) {
string1[s1.lastch] = ch = s2.lastch;
(void)next(&s2);
}
/*
* For -s result will contain only those characters defined
* as the second characters in each of the toupper or tolower
* pairs.
*/
/* If string2 runs out of characters, use the last one specified. */
while (next(&s1)) {
again:
if (s1.state == SET_LOWER &&
s2.state == SET_UPPER &&
s1.cnt == 1 && s2.cnt == 1) {
do {
string1[s1.lastch] = ch = toupper(s1.lastch);
if (sflag && isupper(ch))
string2[ch] = 1;
if (!next(&s1))
goto endloop;
} while (s1.state == SET_LOWER && s1.cnt > 1);
/* skip upper set */
do {
if (!next(&s2))
break;
} while (s2.state == SET_UPPER && s2.cnt > 1);
goto again;
} else if (s1.state == SET_UPPER &&
s2.state == SET_LOWER &&
s1.cnt == 1 && s2.cnt == 1) {
do {
string1[s1.lastch] = ch = tolower(s1.lastch);
if (sflag && islower(ch))
string2[ch] = 1;
if (!next(&s1))
goto endloop;
} while (s1.state == SET_UPPER && s1.cnt > 1);
/* skip lower set */
do {
if (!next(&s2))
break;
} while (s2.state == SET_LOWER && s2.cnt > 1);
goto again;
} else {
string1[s1.lastch] = s2.lastch;
if (sflag)
string2[s2.lastch] = 1;
}
(void)next(&s2);
}
endloop:
if (cflag || Cflag) {
s2.str = argv[1];
s2.state = NORMAL;
@ -294,15 +328,18 @@ setup(int *string, char *arg, STR *str, int cflag, int Cflag)
string[cnt] = !string[cnt] && ISCHAR(cnt);
}
static int
int
charcoll(const void *a, const void *b)
{
char sa[2], sb[2];
static char sa[2], sb[2];
int r;
sa[0] = *(const int *)a;
sb[0] = *(const int *)b;
sa[1] = sb[1] = '\0';
return (strcoll(sa, sb));
r = strcoll(sa, sb);
if (r == 0)
r = *(const int *)a - *(const int *)b;
return (r);
}
static void