From d4f6cd06dd4e99170b0039b7b4c72585992322aa Mon Sep 17 00:00:00 2001 From: "Tim J. Robbins" Date: Sat, 1 Nov 2003 05:13:13 +0000 Subject: [PATCH] Allow mbrtowc() and wcrtomb() to be implemented directly, instead of as wrappers around the deprecated 4.4BSD rune functions. This paves the way for state-dependent encodings, which the rune API does not support. - Add __emulated_sgetrune() and __emulated_sputrune(), which are implementations of sgetrune() and sputrune() in terms of mbrtowc() and wcrtomb(). - Rename the old rune-wrapper mbrtowc() and wcrtomb() functions to __emulated_mbrtowc() and __emulated_wcrtomb(). - Add __mbrtowc and __wcrtomb function pointers, which point to the current locale's conversion functions, or the __emulated versions. - Implement mbrtowc() and wcrtomb() as calls to these function pointers. - Make the "NONE" encoding implement mbrtowc() and wcrtomb() directly. All of this emulation mess will be removed, together with rune support, in FreeBSD 6. --- lib/libc/locale/Makefile.inc | 3 +- lib/libc/locale/mbrtowc.c | 21 ++++++-- lib/libc/locale/none.c | 81 ++++++++++++++-------------- lib/libc/locale/setrunelocale.c | 15 ++++++ lib/libc/locale/srune.c | 94 +++++++++++++++++++++++++++++++++ lib/libc/locale/table.c | 22 ++++++-- lib/libc/locale/wcrtomb.c | 18 ++++++- 7 files changed, 204 insertions(+), 50 deletions(-) create mode 100644 lib/libc/locale/srune.c diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index 23ea555d792e..90033213ae38 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -10,7 +10,8 @@ SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \ mbrlen.c \ mbrtowc.c mbrune.c mbsinit.c mbsrtowcs.c mbtowc.c mbstowcs.c \ mskanji.c nl_langinfo.c nomacros.c none.c rune.c \ - runetype.c setinvalidrune.c setlocale.c setrunelocale.c table.c \ + runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \ + table.c \ tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsrtombs.c wcsftime.c \ wcstof.c wcstod.c \ wcstoimax.c wcstol.c wcstold.c wcstoll.c \ diff --git a/lib/libc/locale/mbrtowc.c b/lib/libc/locale/mbrtowc.c index 7e4d90ef12ab..e99308242a8c 100644 --- a/lib/libc/locale/mbrtowc.c +++ b/lib/libc/locale/mbrtowc.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002 Tim J. Robbins. + * Copyright (c) 2002, 2003 Tim J. Robbins. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,9 +32,24 @@ __FBSDID("$FreeBSD$"); #include #include +extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); + size_t -mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) +mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps) +{ + + return (__mbrtowc(pwc, s, n, ps)); +} + +/* + * Emulate the ISO C mbrtowc() function in terms of the deprecated + * 4.4BSD sgetrune() function. + */ +size_t +__emulated_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, + size_t n, mbstate_t * __restrict ps __unused) { const char *e; rune_t r; diff --git a/lib/libc/locale/none.c b/lib/libc/locale/none.c index 1b1dc704d706..3c4d8c7c47f9 100644 --- a/lib/libc/locale/none.c +++ b/lib/libc/locale/none.c @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * @@ -40,60 +41,62 @@ static char sccsid[] = "@(#)none.c 8.1 (Berkeley) 6/4/93"; #include __FBSDID("$FreeBSD$"); +#include #include +#include #include #include -#include #include +#include -rune_t _none_sgetrune(const char *, size_t, char const **); -int _none_sputrune(rune_t, char *, size_t, char **); +extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); + +int _none_init(_RuneLocale *); +size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict); +size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); int -_none_init(rl) - _RuneLocale *rl; +_none_init(_RuneLocale *rl) { - rl->sgetrune = _none_sgetrune; - rl->sputrune = _none_sputrune; + + __mbrtowc = _none_mbrtowc; + __wcrtomb = _none_wcrtomb; _CurrentRuneLocale = rl; __mb_cur_max = 1; return(0); } -rune_t -_none_sgetrune(string, n, result) - const char *string; - size_t n; - char const **result; +size_t +_none_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps __unused) { - if (n < 1) { - if (result) - *result = string; - return(_INVALID_RUNE); - } - if (result) - *result = string + 1; - return(*string & 0xff); + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (0); + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + if (pwc != NULL) + *pwc = (unsigned char)*s; + return (*s == '\0' ? 0 : 1); } -int -_none_sputrune(c, string, n, result) - rune_t c; - char *string, **result; - size_t n; +size_t +_none_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps __unused) { - if (n >= 1) { - if (string) { - if (c < 0 || c > UCHAR_MAX) { - if (result) - *result = NULL; - return (0); - } - *string = c; - } - if (result) - *result = string + 1; - } else if (result) - *result = (char *)0; - return(1); + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc < 0 || wc > UCHAR_MAX) { + errno = EILSEQ; + return ((size_t)-1); + } + *s = (unsigned char)wc; + return (1); } diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index 9cdd7e7c0a05..0037f9ce8456 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "ldpart.h" #include "setlocale.h" @@ -60,6 +61,16 @@ extern int _BIG5_init(_RuneLocale *); extern int _MSKanji_init(_RuneLocale *); extern _RuneLocale *_Read_RuneMagi(FILE *); +extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); +extern size_t __emulated_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict ps); +extern size_t __emulated_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict ps); +extern rune_t __emulated_sgetrune(const char *, size_t, const char **); +extern int __emulated_sputrune(rune_t, char *, size_t, char **); + static int __setrunelocale(const char *); __warn_references(setrunelocale, "warning: setrunelocale() is deprecated. See setrunelocale(3)."); @@ -132,6 +143,10 @@ __setrunelocale(const char *encoding) } (void)fclose(fp); + __mbrtowc = __emulated_mbrtowc; + __wcrtomb = __emulated_wcrtomb; + rl->sputrune = __emulated_sputrune; + rl->sgetrune = __emulated_sgetrune; if (strcmp(rl->encoding, "NONE") == 0) ret = _none_init(rl); else if (strcmp(rl->encoding, "UTF2") == 0) diff --git a/lib/libc/locale/srune.c b/lib/libc/locale/srune.c new file mode 100644 index 000000000000..86d40a7cbdf9 --- /dev/null +++ b/lib/libc/locale/srune.c @@ -0,0 +1,94 @@ +/*- + * Copyright (c) 2003 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +/* + * Emulate the deprecated 4.4BSD sgetrune() function in terms of + * the ISO C mbrtowc() function. + */ +rune_t +__emulated_sgetrune(const char *string, size_t n, const char **result) +{ + wchar_t wc; + size_t nconv; + + /* + * Pass a NULL conversion state to mbrtowc() since multibyte + * conversion states are not supported. + */ + nconv = mbrtowc(&wc, string, n, NULL); + if (nconv == (size_t)-2) { + if (result != NULL) + *result = string; + return (_INVALID_RUNE); + } + if (nconv == (size_t)-1) { + if (result != NULL) + *result = string + 1; + return (_INVALID_RUNE); + } + if (nconv == 0) + nconv = 1; + if (result != NULL) + *result = string + nconv; + return ((rune_t)wc); +} + +/* + * Emulate the deprecated 4.4BSD sputrune() function in terms of + * the ISO C wcrtomb() function. + */ +int +__emulated_sputrune(rune_t rune, char *string, size_t n, char **result) +{ + char buf[MB_LEN_MAX]; + size_t nconv; + + nconv = wcrtomb(buf, (wchar_t)rune, NULL); + if (nconv == (size_t)-1) { + if (result != NULL) + *result = NULL; + return (0); + } + if (string == NULL) { + if (result != NULL) + *result = (char *)0 + nconv; + } else if (n >= nconv) { + memcpy(string, buf, nconv); + if (result != NULL) + *result = string + nconv; + } else { + if (result != NULL) + *result = NULL; + } + return (nconv); +} diff --git a/lib/libc/locale/table.c b/lib/libc/locale/table.c index b21ba0ce6b74..6497a24b7119 100644 --- a/lib/libc/locale/table.c +++ b/lib/libc/locale/table.c @@ -42,15 +42,24 @@ __FBSDID("$FreeBSD$"); #include #include +#include -extern rune_t _none_sgetrune(const char *, size_t, char const **); -extern int _none_sputrune(rune_t, char *, size_t, char **); +extern size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict); +extern size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +extern size_t __emulated_mbrtowc(wchar_t * __restrict, + const char * __restrict, size_t, + mbstate_t * __restrict ps); +extern size_t __emulated_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict ps); +extern rune_t __emulated_sgetrune(const char *, size_t, const char **); +extern int __emulated_sputrune(rune_t, char *, size_t, char **); _RuneLocale _DefaultRuneLocale = { _RUNE_MAGIC_1, "NONE", - _none_sgetrune, - _none_sputrune, + __emulated_sgetrune, + __emulated_sputrune, 0xFFFD, { /*00*/ _CTYPE_C, @@ -253,4 +262,7 @@ _RuneLocale _DefaultRuneLocale = { _RuneLocale *_CurrentRuneLocale = &_DefaultRuneLocale; int __mb_cur_max = 1; - +size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict) = _none_mbrtowc; +size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict) = + _none_wcrtomb; diff --git a/lib/libc/locale/wcrtomb.c b/lib/libc/locale/wcrtomb.c index 7bebd1ad97bf..cd1c45b514f9 100644 --- a/lib/libc/locale/wcrtomb.c +++ b/lib/libc/locale/wcrtomb.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002 Tim J. Robbins. + * Copyright (c) 2002, 2003 Tim J. Robbins. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,8 +33,22 @@ __FBSDID("$FreeBSD$"); #include #include +extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); + size_t -wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps __unused) +wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) +{ + + return (__wcrtomb(s, wc, ps)); +} + +/* + * Emulate the ISO C wcrtomb() function in terms of the deprecated + * 4.4BSD sputrune() function. + */ +size_t +__emulated_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps __unused) { char *e; char buf[MB_LEN_MAX];