Add a placeholder implementation of wcscoll() and wcsxfrm() which gives

locale-sensitive collation only in single-byte locales, and just does
binary comparison for the others with extended character sets.
This commit is contained in:
Tim J. Robbins 2002-10-04 03:18:26 +00:00
parent 92d0f59903
commit fd4f1dd9fa
6 changed files with 456 additions and 4 deletions

View File

@ -141,6 +141,7 @@ size_t wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
wchar_t *wcscat(wchar_t * __restrict, const wchar_t * __restrict);
wchar_t *wcschr(const wchar_t *, wchar_t);
int wcscmp(const wchar_t *, const wchar_t *);
int wcscoll(const wchar_t *, const wchar_t *);
wchar_t *wcscpy(wchar_t * __restrict, const wchar_t * __restrict);
size_t wcscspn(const wchar_t *, const wchar_t *);
size_t wcsftime(wchar_t * __restrict, size_t, const wchar_t * __restrict,
@ -156,6 +157,7 @@ size_t wcsrtombs(char * __restrict, const wchar_t ** __restrict, size_t,
mbstate_t * __restrict);
size_t wcsspn(const wchar_t *, const wchar_t *);
wchar_t *wcsstr(const wchar_t * __restrict, const wchar_t * __restrict);
size_t wcsxfrm(wchar_t * __restrict, const wchar_t * __restrict, size_t);
int wctob(wint_t);
double wcstod(const wchar_t * __restrict, wchar_t ** __restrict);
wchar_t *wcstok(wchar_t * __restrict, const wchar_t * __restrict,

View File

@ -12,9 +12,11 @@ MISRCS+=bcmp.c bcopy.c bzero.c ffs.c index.c memccpy.c memchr.c memcmp.c \
strlcat.c strlcpy.c strlen.c strmode.c strncat.c strncmp.c strncpy.c \
strcasestr.c strnstr.c \
strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscpy.c wcscspn.c \
strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscoll.c wcscpy.c \
wcscspn.c \
wcslcat.c wcslcpy.c wcslen.c wcsncat.c wcsncmp.c wcsncpy.c wcspbrk.c \
wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wmemchr.c wmemcmp.c \
wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wcsxfrm.c wmemchr.c \
wmemcmp.c \
wmemcpy.c wmemmove.c wmemset.c
@ -28,8 +30,8 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \
memcmp.3 memcpy.3 memmove.3 memset.3 rindex.3 strcasecmp.3 strcat.3 \
strchr.3 strcmp.3 strcoll.3 strcpy.3 strcspn.3 strdup.3 strerror.3 \
string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strrchr.3 strsep.3 \
strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcstok.3 wcswidth.3 \
wmemchr.3
strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcscoll.3 wcstok.3 \
wcswidth.3 wcsxfrm.3 wmemchr.3
MLINKS+=strcasecmp.3 strncasecmp.3
MLINKS+=strcat.3 strncat.3

112
lib/libc/string/wcscoll.3 Normal file
View File

@ -0,0 +1,112 @@
.\" Copyright (c) 1990, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" Chris Torek and the American National Standards Committee X3,
.\" on Information Processing Systems.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by the University of
.\" California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)strcoll.3 8.1 (Berkeley) 6/4/93
.\" FreeBSD: src/lib/libc/string/strcoll.3,v 1.11 2001/10/01 16:09:00 ru Exp
.\" $FreeBSD$
.\"
.Dd October 4, 2002
.Dt WCSCOLL 3
.Os
.Sh NAME
.Nm wcscoll
.Nd compare wide strings according to current collation
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In wchar.h
.Ft int
.Fn wcscoll "const wchar_t *s1" "const wchar_t *s2"
.Sh DESCRIPTION
The
.Fn wcscoll
function compares the null-terminated strings
.Fa s1
and
.Fa s2
according to the current locale collation order.
In the
.Dq Li C
locale,
.Fn wcscoll
is equivalent to
.Fn wcscmp .
.Sh RETURN VALUES
The
.Fn wcscoll
function
returns an integer greater than, equal to, or less than 0,
if
.Fa s1
is greater than, equal to, or less than
.Fa s2 .
.Pp
No return value is reserved to indicate errors;
callers should set
.Va errno
to 0 before calling
.Fn wcscoll .
If it is non-zero upon return from
.Fn wcscoll ,
an error has occurred.
.Sh ERRORS
The
.Fn wcscoll
function will fail if:
.Bl -tag -width Er
.It Bq Er EILSEQ
An invalid wide character code was specified.
.It Bq Er ENOMEM
Cannot allocate enough memory for temporary buffers.
.El
.Sh SEE ALSO
.Xr setlocale 3 ,
.Xr strcoll 3 ,
.Xr wcscmp 3 ,
.Xr wcsxfrm 3
.Sh STANDARDS
The
.Fn wcscoll
function
conforms to
.St -isoC-99 .
.Sh BUGS
The current implementation of
.Fn wcscoll
only works in single-byte
.Dv LC_CTYPE
locales, and falls back to using
.Fn wcscmp
in locales with extended character sets.

97
lib/libc/string/wcscoll.c Normal file
View File

@ -0,0 +1,97 @@
/*-
* Copyright (c) 2002 Tim J. Robbins
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "collate.h"
static char *__mbsdup(const wchar_t *);
/*
* Placeholder implementation of wcscoll(). Attempts to use the single-byte
* collation ordering where possible, and falls back on wcscmp() in locales
* with extended character sets.
*/
int
wcscoll(const wchar_t *ws1, const wchar_t *ws2)
{
char *mbs1, *mbs2;
int diff, sverrno;
if (__collate_load_error || MB_CUR_MAX > 1)
/*
* Locale has no special collating order, could not be
* loaded, or has an extended character set; do a fast binary
* comparison.
*/
return (wcscmp(ws1, ws2));
if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) {
/*
* Out of memory or illegal wide chars; fall back to wcscmp()
* but leave errno indicating the error. Callers that don't
* check for error will get a reasonable but often slightly
* incorrect result.
*/
sverrno = errno;
free(mbs1);
errno = sverrno;
return (wcscmp(ws1, ws2));
}
diff = strcoll(mbs1, mbs2);
sverrno = errno;
free(mbs1);
free(mbs2);
errno = sverrno;
return (diff);
}
static char *
__mbsdup(const wchar_t *ws)
{
mbstate_t state;
const wchar_t *wcp;
size_t len;
char *mbs;
memset(&state, 0, sizeof(state));
wcp = ws;
if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1)
return (NULL);
if ((mbs = malloc(len + 1)) == NULL)
return (NULL);
memset(&state, 0, sizeof(state));
wcsrtombs(mbs, &ws, len + 1, &state);
return (mbs);
}

124
lib/libc/string/wcsxfrm.3 Normal file
View File

@ -0,0 +1,124 @@
.\" Copyright (c) 1990, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" Chris Torek and the American National Standards Committee X3,
.\" on Information Processing Systems.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by the University of
.\" California, Berkeley and its contributors.
.\" 4. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)strxfrm.3 8.1 (Berkeley) 6/4/93
.\" FreeBSD: src/lib/libc/string/strxfrm.3,v 1.16 2002/09/06 11:24:06 tjr Exp
.\" $FreeBSD$
.\"
.Dd October 4, 2002
.Dt WCSXFRM 3
.Os
.Sh NAME
.Nm wcsxfrm
.Nd transform a wide string under locale
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In wchar.h
.Ft size_t
.Fn wcsxfrm "wchar_t * restrict dst" "const wchar_t * restrict src" "size_t n"
.Sh DESCRIPTION
The
.Fn wcsxfrm
function transforms a null-terminated wide character string pointed to by
.Fa src
according to the current locale collation order
then copies the transformed string
into
.Fa dst .
No more than
.Fa n
wide characters are copied into
.Fa dst ,
including the terminating null character added.
If
.Fa n
is set to 0
(it helps to determine an actual size needed
for transformation),
.Fa dst
is permitted to be a NULL pointer.
.Pp
Comparing two strings using
.Fn wcscmp
after
.Fn wcsxfrm
is equivalent to comparing
two original strings with
.Fn wcscoll .
.Sh RETURN VALUES
Upon successful completion,
.Fn wcsxfrm
returns the length of the transformed string not including
the terminating null character.
If this value is
.Fa n
or more, the contents of
.Fa dst
are indeterminate.
.Sh SEE ALSO
.Xr setlocale 3 ,
.Xr strxfrm 3 ,
.Xr wcscoll 3 ,
.Xr wcscmp 3
.Sh STANDARDS
The
.Fn wcsxfrm
function
conforms to
.St -isoC-99 .
.Sh BUGS
The current implementation of
.Fn wcsxfrm
only works in single-byte
.Dv LC_CTYPE
locales, and falls back to using
.Fn wcsncpy
in locales with extended character sets.
.Pp
Comparing two strings using
.Fn wcscmp
after
.Fn wcsxfrm
is
.Em not
always equivalent to comparison with
.Fn wcscoll ;
.Fn wcsxfrm
only stores information about primary collation weights into
.Fa dst ,
whereas
.Fn wcscoll
compares characters using both primary and secondary weights.

115
lib/libc/string/wcsxfrm.c Normal file
View File

@ -0,0 +1,115 @@
/*-
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#if 0
__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp ");
#endif
__FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "collate.h"
static char *__mbsdup(const wchar_t *);
/*
* Placeholder wcsxfrm() implementation. See wcscoll.c for a description of
* the logic used.
*/
size_t
wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len)
{
int prim, sec, l;
size_t slen;
char *mbsrc, *s, *ss;
if (*src == L'\0') {
if (len != 0)
*dest = L'\0';
return (0);
}
if (__collate_load_error || MB_CUR_MAX > 1) {
slen = wcslen(src);
if (len > 0) {
if (slen < len)
wcscpy(dest, src);
else {
wcsncpy(dest, src, len - 1);
dest[len - 1] = L'\0';
}
}
return (slen);
}
mbsrc = __mbsdup(src);
slen = 0;
prim = sec = 0;
ss = s = __collate_substitute(mbsrc);
while (*s != '\0') {
while (*s != '\0' && prim == 0) {
__collate_lookup(s, &l, &prim, &sec);
s += l;
}
if (prim != 0) {
if (len > 1) {
*dest++ = (wchar_t)prim;
len--;
}
slen++;
prim = 0;
}
}
free(ss);
free(mbsrc);
if (len != 0)
*dest = L'\0';
return (slen);
}
static char *
__mbsdup(const wchar_t *ws)
{
mbstate_t state;
const wchar_t *wcp;
size_t len;
char *mbs;
memset(&state, 0, sizeof(state));
wcp = ws;
if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1)
return (NULL);
if ((mbs = malloc(len + 1)) == NULL)
return (NULL);
memset(&state, 0, sizeof(state));
wcsrtombs(mbs, &ws, len + 1, &state);
return (mbs);
}