From ee446de0b1835f78de8dc70a135410d1b75dff5f Mon Sep 17 00:00:00 2001 From: "Tim J. Robbins" Date: Thu, 8 Jul 2004 06:43:37 +0000 Subject: [PATCH] Add a function to iterate over all characters in a particular character class. This is necessary in order to implement tr(1) efficiently in multibyte locales, since the brute force method of finding all characters in a class is infeasible with a 32-bit (or wider) wchar_t. --- include/wctype.h | 1 + lib/libc/locale/Makefile.inc | 4 +- lib/libc/locale/nextwctype.3 | 58 +++++++++++++++++++++++ lib/libc/locale/nextwctype.c | 90 ++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 lib/libc/locale/nextwctype.3 create mode 100644 lib/libc/locale/nextwctype.c diff --git a/include/wctype.h b/include/wctype.h index 3e2f694bce2c..330b5ed0dc2c 100644 --- a/include/wctype.h +++ b/include/wctype.h @@ -85,6 +85,7 @@ wint_t iswnumber(wint_t); wint_t iswphonogram(wint_t); wint_t iswrune(wint_t); wint_t iswspecial(wint_t); +wint_t nextwctype(wint_t, wctype_t); #endif __END_DECLS diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index a3b85b3e984a..1e8a36322288 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -9,7 +9,7 @@ SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ mbrtowc.c mbrune.c mbsinit.c mbsrtowcs.c mbtowc.c mbstowcs.c \ - mskanji.c nl_langinfo.c nomacros.c none.c rune.c \ + mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rune.c \ runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \ table.c \ tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsrtombs.c wcsftime.c \ @@ -27,7 +27,7 @@ MAN+= btowc.3 \ mbrtowc.3 \ mbrune.3 mbsinit.3 \ mbsrtowcs.3 mbstowcs.3 mbtowc.3 multibyte.3 \ - nl_langinfo.3 \ + nextwctype.3 nl_langinfo.3 \ rune.3 \ setlocale.3 toascii.3 tolower.3 toupper.3 towlower.3 towupper.3 \ wcsftime.3 \ diff --git a/lib/libc/locale/nextwctype.3 b/lib/libc/locale/nextwctype.3 new file mode 100644 index 000000000000..2d9acd1f9018 --- /dev/null +++ b/lib/libc/locale/nextwctype.3 @@ -0,0 +1,58 @@ +.\" +.\" Copyright (c) 2004 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd July 8, 2004 +.Dt NEXTWCTYPE 3 +.Os +.Sh NAME +.Nm nextwctype +.Nd "iterate through character classes" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wctype.h +.Ft wint_t +.Fo nextwctype +.Fa "wint_t ch" "wctype_t wct" +.Fc +.Sh DESCRIPTION +The +.Fn nextwctype +function determines the next character after +.Fa ch +that is a member of character class +.Fa wct . +If +.Fa ch +is \-1, the search begins at the first member of +.Fa wct . +.Sh RETURN VALUES +The +.Fn nextwctype +functions returns the next character, or \-1 if there are no more. +.Sh SEE ALSO +.Xr wctype 3 diff --git a/lib/libc/locale/nextwctype.c b/lib/libc/locale/nextwctype.c new file mode 100644 index 000000000000..9363b0a700cc --- /dev/null +++ b/lib/libc/locale/nextwctype.c @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +wint_t +nextwctype(wint_t wc, wctype_t wct) +{ + size_t lim; + _RuneRange *rr = &_CurrentRuneLocale->__runetype_ext; + _RuneEntry *base, *re; + int noinc; + + noinc = 0; + if (wc < _CACHED_RUNES) { + wc++; + while (wc < _CACHED_RUNES) { + if (_CurrentRuneLocale->__runetype[wc] & wct) + return (wc); + wc++; + } + wc--; + } + if (rr->__ranges != NULL && wc < rr->__ranges[0].__min) { + wc = rr->__ranges[0].__min; + noinc = 1; + } + + /* Binary search -- see bsearch.c for explanation. */ + base = rr->__ranges; + for (lim = rr->__nranges; lim != 0; lim >>= 1) { + re = base + (lim >> 1); + if (re->__min <= wc && wc <= re->__max) + goto found; + else if (wc > re->__max) { + base = re + 1; + lim--; + } + } + return (-1); +found: + if (!noinc) + wc++; + if (re->__min <= wc && wc <= re->__max) { + if (re->__types != NULL) { + for (; wc <= re->__max; wc++) + if (re->__types[wc - re->__min] & wct) + return (wc); + } else if (re->__map & wct) + return (wc); + } + while (++re < rr->__ranges + rr->__nranges) { + wc = re->__min; + if (re->__types != NULL) { + for (; wc <= re->__max; wc++) + if (re->__types[wc - re->__min] & wct) + return (wc); + } else if (re->__map & wct) + return (wc); + } + return (-1); +}