249 lines
6.7 KiB
Groff
249 lines
6.7 KiB
Groff
.\" Copyright (c) 1993
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
.\"
|
|
.\" This code is derived from software contributed to Berkeley by
|
|
.\" Donn Seeley of BSDI.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. All advertising materials mentioning features or use of this software
|
|
.\" must display the following acknowledgement:
|
|
.\" This product includes software developed by the University of
|
|
.\" California, Berkeley and its contributors.
|
|
.\" 4. Neither the name of the University nor the names of its contributors
|
|
.\" may be used to endorse or promote products derived from this software
|
|
.\" without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" @(#)multibyte.3 8.1 (Berkeley) 6/4/93
|
|
.\" $FreeBSD$
|
|
.\"
|
|
.Dd October 6, 2002
|
|
.Dt MULTIBYTE 3
|
|
.Os
|
|
.Sh NAME
|
|
.Nm mblen ,
|
|
.Nm mbstowcs ,
|
|
.Nm mbtowc ,
|
|
.Nm wcstombs ,
|
|
.Nm wctomb
|
|
.Nd multibyte character support for C
|
|
.Sh LIBRARY
|
|
.Lb libc
|
|
.Sh SYNOPSIS
|
|
.In stdlib.h
|
|
.Ft int
|
|
.Fn mblen "const char *mbchar" "size_t nbytes"
|
|
.Ft size_t
|
|
.Fn mbstowcs "wchar_t * restrict wcstring" "const char * restrict mbstring" "size_t nwchars"
|
|
.Ft int
|
|
.Fn mbtowc "wchar_t * restrict wcharp" "const char * restrict mbchar" "size_t nbytes"
|
|
.Ft size_t
|
|
.Fn wcstombs "char * restrict mbstring" "const wchar_t * restrict wcstring" "size_t nbytes"
|
|
.Ft int
|
|
.Fn wctomb "char *mbchar" "wchar_t wchar"
|
|
.Sh DESCRIPTION
|
|
The basic elements of some written natural languages such as Chinese
|
|
cannot be represented uniquely with single C
|
|
.Va char Ns s .
|
|
The C standard supports two different ways of dealing with
|
|
extended natural language encodings,
|
|
.Em wide
|
|
characters and
|
|
.Em multibyte
|
|
characters.
|
|
Wide characters are an internal representation
|
|
which allows each basic element to map
|
|
to a single object of type
|
|
.Va wchar_t .
|
|
Multibyte characters are used for input and output
|
|
and code each basic element as a sequence of C
|
|
.Va char Ns s .
|
|
Individual basic elements may map into one or more
|
|
(up to
|
|
.Dv MB_LEN_MAX )
|
|
bytes in a multibyte character.
|
|
.Pp
|
|
The current locale
|
|
.Pq Xr setlocale 3
|
|
governs the interpretation of wide and multibyte characters.
|
|
The locale category
|
|
.Dv LC_CTYPE
|
|
specifically controls this interpretation.
|
|
The
|
|
.Va wchar_t
|
|
type is wide enough to hold the largest value
|
|
in the wide character representations for all locales.
|
|
.Pp
|
|
Multibyte strings may contain
|
|
.Sq shift
|
|
indicators to switch to and from
|
|
particular modes within the given representation.
|
|
If explicit bytes are used to signal shifting,
|
|
these are not recognized as separate characters
|
|
but are lumped with a neighboring character.
|
|
There is always a distinguished
|
|
.Sq initial
|
|
shift state.
|
|
The
|
|
.Fn mbstowcs
|
|
and
|
|
.Fn wcstombs
|
|
functions assume that multibyte strings are interpreted
|
|
starting from the initial shift state.
|
|
The
|
|
.Fn mblen ,
|
|
.Fn mbtowc
|
|
and
|
|
.Fn wctomb
|
|
functions maintain static shift state internally.
|
|
A call with a null
|
|
.Fa mbchar
|
|
pointer returns nonzero if the current locale requires shift states,
|
|
zero otherwise;
|
|
if shift states are required, the shift state is reset to the initial state.
|
|
The internal shift states are undefined after a call to
|
|
.Fn setlocale
|
|
with the
|
|
.Dv LC_CTYPE
|
|
or
|
|
.Dv LC_ALL
|
|
categories.
|
|
.Pp
|
|
For convenience in processing,
|
|
the wide character with value 0
|
|
(the null wide character)
|
|
is recognized as the wide character string terminator,
|
|
and the character with value 0
|
|
(the null byte)
|
|
is recognized as the multibyte character string terminator.
|
|
Null bytes are not permitted within multibyte characters.
|
|
.Pp
|
|
The
|
|
.Fn mblen
|
|
function computes the length in bytes
|
|
of a multibyte character
|
|
.Fa mbchar .
|
|
Up to
|
|
.Fa nbytes
|
|
bytes are examined.
|
|
.Pp
|
|
The
|
|
.Fn mbtowc
|
|
function converts a multibyte character
|
|
.Fa mbchar
|
|
into a wide character and stores the result
|
|
in the object pointed to by
|
|
.Fa wcharp .
|
|
Up to
|
|
.Fa nbytes
|
|
bytes are examined.
|
|
.Pp
|
|
The
|
|
.Fn wctomb
|
|
function converts a wide character
|
|
.Fa wchar
|
|
into a multibyte character and stores
|
|
the result in
|
|
.Fa mbchar .
|
|
The object pointed to by
|
|
.Fa mbchar
|
|
must be large enough to accommodate the multibyte character.
|
|
.Pp
|
|
The
|
|
.Fn mbstowcs
|
|
function converts a multibyte character string
|
|
.Fa mbstring
|
|
into a wide character string
|
|
.Fa wcstring .
|
|
No more than
|
|
.Fa nwchars
|
|
wide characters are stored.
|
|
A terminating null wide character is appended if there is room.
|
|
.Pp
|
|
The
|
|
.Fn wcstombs
|
|
function converts a wide character string
|
|
.Fa wcstring
|
|
into a multibyte character string
|
|
.Fa mbstring .
|
|
Up to
|
|
.Fa nbytes
|
|
bytes are stored in
|
|
.Fa mbstring .
|
|
Partial multibyte characters at the end of the string are not stored.
|
|
The multibyte character string is null terminated if there is room.
|
|
.Sh "RETURN VALUES
|
|
If
|
|
.Fa mbchar
|
|
is
|
|
.Dv NULL ,
|
|
the
|
|
.Fn mblen ,
|
|
.Fn mbtowc
|
|
and
|
|
.Fn wctomb
|
|
functions return nonzero if shift states are supported,
|
|
zero otherwise.
|
|
If
|
|
.Fa mbchar
|
|
is valid,
|
|
then these functions return
|
|
the number of bytes processed in
|
|
.Fa mbchar ,
|
|
or \-1 if no multibyte character
|
|
could be recognized or converted.
|
|
.Pp
|
|
The
|
|
.Fn mbstowcs
|
|
function returns the number of wide characters converted,
|
|
not counting any terminating null wide character.
|
|
The
|
|
.Fn wcstombs
|
|
function returns the number of bytes converted,
|
|
not counting any terminating null byte.
|
|
If any invalid multibyte characters are encountered,
|
|
both functions return \-1.
|
|
.Sh SEE ALSO
|
|
.Xr btowc 3 ,
|
|
.Xr mbrlen 3 ,
|
|
.Xr mbrtowc 3 ,
|
|
.Xr mbrune 3 ,
|
|
.Xr mbsrtowcs 3 ,
|
|
.Xr rune 3 ,
|
|
.Xr setlocale 3 ,
|
|
.Xr wcrtomb 3 ,
|
|
.Xr wcsrtombs 3 ,
|
|
.Xr euc 4 ,
|
|
.Xr utf2 4 ,
|
|
.Xr utf8 5
|
|
.Sh STANDARDS
|
|
The
|
|
.Fn mblen ,
|
|
.Fn mbstowcs ,
|
|
.Fn mbtowc ,
|
|
.Fn wcstombs
|
|
and
|
|
.Fn wctomb
|
|
functions conform to
|
|
.St -isoC .
|
|
.Sh BUGS
|
|
The current implementation does not support shift states.
|