Add a function to iterate over all characters in a particular character

class. This is necessary in order to implement tr(1) efficiently in
multibyte locales, since the brute force method of finding all characters
in a class is infeasible with a 32-bit (or wider) wchar_t.
This commit is contained in:
Tim J. Robbins 2004-07-08 06:43:37 +00:00
parent 72c94845c6
commit ee446de0b1
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=131787
4 changed files with 151 additions and 2 deletions

View File

@ -85,6 +85,7 @@ wint_t iswnumber(wint_t);
wint_t iswphonogram(wint_t);
wint_t iswrune(wint_t);
wint_t iswspecial(wint_t);
wint_t nextwctype(wint_t, wctype_t);
#endif
__END_DECLS

View File

@ -9,7 +9,7 @@ SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \
ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \
mbrlen.c \
mbrtowc.c mbrune.c mbsinit.c mbsrtowcs.c mbtowc.c mbstowcs.c \
mskanji.c nl_langinfo.c nomacros.c none.c rune.c \
mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rune.c \
runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \
table.c \
tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsrtombs.c wcsftime.c \
@ -27,7 +27,7 @@ MAN+= btowc.3 \
mbrtowc.3 \
mbrune.3 mbsinit.3 \
mbsrtowcs.3 mbstowcs.3 mbtowc.3 multibyte.3 \
nl_langinfo.3 \
nextwctype.3 nl_langinfo.3 \
rune.3 \
setlocale.3 toascii.3 tolower.3 toupper.3 towlower.3 towupper.3 \
wcsftime.3 \

View File

@ -0,0 +1,58 @@
.\"
.\" Copyright (c) 2004 Tim J. Robbins
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd July 8, 2004
.Dt NEXTWCTYPE 3
.Os
.Sh NAME
.Nm nextwctype
.Nd "iterate through character classes"
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In wctype.h
.Ft wint_t
.Fo nextwctype
.Fa "wint_t ch" "wctype_t wct"
.Fc
.Sh DESCRIPTION
The
.Fn nextwctype
function determines the next character after
.Fa ch
that is a member of character class
.Fa wct .
If
.Fa ch
is \-1, the search begins at the first member of
.Fa wct .
.Sh RETURN VALUES
The
.Fn nextwctype
functions returns the next character, or \-1 if there are no more.
.Sh SEE ALSO
.Xr wctype 3

View File

@ -0,0 +1,90 @@
/*-
* Copyright (c) 2004 Tim J. Robbins.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <runetype.h>
#include <wchar.h>
#include <wctype.h>
wint_t
nextwctype(wint_t wc, wctype_t wct)
{
size_t lim;
_RuneRange *rr = &_CurrentRuneLocale->__runetype_ext;
_RuneEntry *base, *re;
int noinc;
noinc = 0;
if (wc < _CACHED_RUNES) {
wc++;
while (wc < _CACHED_RUNES) {
if (_CurrentRuneLocale->__runetype[wc] & wct)
return (wc);
wc++;
}
wc--;
}
if (rr->__ranges != NULL && wc < rr->__ranges[0].__min) {
wc = rr->__ranges[0].__min;
noinc = 1;
}
/* Binary search -- see bsearch.c for explanation. */
base = rr->__ranges;
for (lim = rr->__nranges; lim != 0; lim >>= 1) {
re = base + (lim >> 1);
if (re->__min <= wc && wc <= re->__max)
goto found;
else if (wc > re->__max) {
base = re + 1;
lim--;
}
}
return (-1);
found:
if (!noinc)
wc++;
if (re->__min <= wc && wc <= re->__max) {
if (re->__types != NULL) {
for (; wc <= re->__max; wc++)
if (re->__types[wc - re->__min] & wct)
return (wc);
} else if (re->__map & wct)
return (wc);
}
while (++re < rr->__ranges + rr->__nranges) {
wc = re->__min;
if (re->__types != NULL) {
for (; wc <= re->__max; wc++)
if (re->__types[wc - re->__min] & wct)
return (wc);
} else if (re->__map & wct)
return (wc);
}
return (-1);
}