Implement support for equivalence classes ([=e=]) when the mapping is

one-to-one (SUSv3)
This commit is contained in:
Tim J. Robbins 2002-06-14 07:37:08 +00:00
parent fa3b8ffb32
commit 85f6c317ea
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=98210
3 changed files with 45 additions and 14 deletions

View File

@ -216,14 +216,13 @@ c_class(a, b)
return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name));
}
/*
* English doesn't have any equivalence classes, so for now
* we just syntax check and grab the character.
*/
static void
genequiv(s)
STR *s;
{
int i, p, pri;
char src[2], dst[3];
if (*s->str == '\\') {
s->equiv[0] = backslash(s);
if (*s->str != '=')
@ -233,6 +232,28 @@ genequiv(s)
if (s->str[1] != '=')
errx(1, "misplaced equivalence equals sign");
}
/*
* Calculate the set of all characters in the same equivalence class
* as the specified character (they will have the same primary
* collation weights).
* XXX Knows too much about how strxfrm() is implemented. Assumes
* it fills the string with primary collation weight bytes. Only one-
* to-one mappings are supported.
*/
src[0] = s->equiv[0];
src[1] = '\0';
if (strxfrm(dst, src, sizeof(dst)) == 1) {
pri = (unsigned char)*dst;
for (p = 1, i = 1; i < NCHARS; i++) {
*src = i;
if (strxfrm(dst, src, sizeof(dst)) == 1 && pri &&
pri == (unsigned char)*dst)
s->equiv[p++] = i;
}
s->equiv[p] = OOBCH;
}
s->str += 2;
s->cnt = 0;
s->state = SET;

View File

@ -200,15 +200,9 @@ in these classes, see
.Xr ctype 3
and related manual pages.
.It [=equiv=]
Represents all characters or collating (sorting) elements belonging to
the same equivalence class as
.Ar equiv .
If
there is a secondary ordering within the equivalence class, the characters
are ordered in ascending sequence.
Otherwise, they are ordered after their encoded values.
An example of an equivalence class might be ``c'' and ``ch'' in Spanish;
English has no equivalence classes.
Represents all characters belonging to the same equivalence class as
.Ar equiv ,
ordered by their encoded values.
.It [#*n]
Represents
.Ar n
@ -228,6 +222,17 @@ If
has a leading zero, it is interpreted as an octal value, otherwise,
it's interpreted as a decimal value.
.El
.Sh ENVIRONMENT
The
.Ev LANG ,
.Ev LC_ALL ,
.Ev LC_CTYPE
and
.Ev LC_COLLATE
environment variables affect the execution of
.Nm
as described in
.Xr environ 7 .
.Sh DIAGNOSTICS
.Ex -std
.Sh EXAMPLES
@ -245,6 +250,11 @@ Translate the contents of file1 to upper-case.
Strip out non-printable characters from file1.
.Pp
.D1 Li "tr -cd \*q[:print:]\*q < file1"
.Pp
Remove diacritical marks from all accented variants of the letter
.Sq e :
.Pp
.Dl "tr \*q[=e=]\*q \*qe\*q"
.Sh COMPATIBILITY
System V has historically implemented character ranges using the syntax
``[c-c]'' instead of the ``c-c'' used by historic

View File

@ -105,7 +105,7 @@ main(argc, argv)
int ch, cnt, lastch, *p;
int cflag, dflag, sflag, isstring2;
(void) setlocale(LC_CTYPE, "");
(void)setlocale(LC_ALL, "");
cflag = dflag = sflag = 0;
while ((ch = getopt(argc, argv, "cdsu")) != -1)