I have added the support for BIG5 encoding into libc/libxpg4/mklocale.

the diff is attached below. This is done on the 3.0 source-tree.
I have test this on 2.2-stable before, but I don't have a 3.0 machine
right now.

This patch is mainly to make libc support BIG5 encoding, thus add
zh_TW.BIG5 locale to 3.0.

Submitted by:	Chen Hsiung Chan <frankch@waru.life.nthu.edu.tw>
This commit is contained in:
Poul-Henning Kamp 1998-08-15 12:51:49 +00:00
parent 15160dd67b
commit 7a55a3c230
7 changed files with 329 additions and 10 deletions

View File

@ -1,13 +1,13 @@
# from @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
# $Id: Makefile.inc,v 1.11 1997/10/21 08:41:07 bde Exp $
# $Id: Makefile.inc,v 1.12 1998/02/20 08:17:23 jb Exp $
# locale sources
.PATH: ${.CURDIR}/../libc/${MACHINE_ARCH}/locale ${.CURDIR}/../libc/locale
SRCS+= ansi.c collate.c collcmp.c ctype.c euc.c frune.c isctype.c lconv.c \
localeconv.c mbrune.c mskanji.c nomacros.c none.c rune.c runetype.c \
setinvalidrune.c setlocale.c setrunelocale.c table.c tolower.c \
toupper.c utf2.c
SRCS+= ansi.c big5.c collate.c collcmp.c ctype.c euc.c frune.c isctype.c \
lconv.c localeconv.c mbrune.c mskanji.c nomacros.c none.c rune.c \
runetype.c setinvalidrune.c setlocale.c setrunelocale.c table.c \
tolower.c toupper.c utf2.c
.if ${LIB} == "c"
MAN3+= ctype.3 isalnum.3 isalpha.3 isascii.3 isblank.3 iscntrl.3 \

119
lib/libc/locale/big5.c Normal file
View File

@ -0,0 +1,119 @@
/*-
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Paul Borman at Krystal Technologies.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifdef XPG4
#if defined(LIBC_SCCS) && !defined(lint)
static char sccsid[] = "@(#)big5.c 8.1 (Berkeley) 6/4/93";
#endif /* LIBC_SCCS and not lint */
#include <errno.h>
#include <rune.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
rune_t _BIG5_sgetrune __P((const char *, size_t, char const **));
int _BIG5_sputrune __P((rune_t, char *, size_t, char **));
int
_BIG5_init(rl)
_RuneLocale *rl;
{
rl->sgetrune = _BIG5_sgetrune;
rl->sputrune = _BIG5_sputrune;
_CurrentRuneLocale = rl;
__mb_cur_max = 2;
return (0);
}
static inline int
_big5_check(c)
u_int c;
{
c &= 0xff;
return ((c >= 0xa1 && c <= 0xfe) ? 2 : 1);
}
rune_t
_BIG5_sgetrune(string, n, result)
const char *string;
size_t n;
char const **result;
{
rune_t rune = 0;
int len;
if (n < 1 || (len = _big5_check(*string)) > n) {
if (result)
*result = string;
return (_INVALID_RUNE);
}
while (len-- >= 0)
rune = (rune << 8) | ((u_int)(*string++) & 0xff);
if (result)
*result = string + len;
return rune;
}
int
_BIG5_sputrune(c, string, n, result)
rune_t c;
char *string, **result;
size_t n;
{
if (c & 0x8000) {
if (n >= 2) {
string[0] = (c >> 8) & 0xff;
string[1] = c & 0xff;
if (result)
*result = string + 2;
return (2);
}
}
else {
if (n >= 1) {
*string = c & 0xff;
if (result)
*result = string + 1;
return (1);
}
}
if (result)
*result = string;
return (0);
}
#endif /* XPG4 */

View File

@ -47,6 +47,7 @@ extern int _none_init __P((_RuneLocale *));
#ifdef XPG4
extern int _UTF2_init __P((_RuneLocale *));
extern int _EUC_init __P((_RuneLocale *));
extern int _BIG5_init __P((_RuneLocale *));
extern int _MSKanji_init __P((_RuneLocale *));
extern int _xpg4_setrunelocale __P((char *));
#endif
@ -128,6 +129,8 @@ _xpg4_setrunelocale(encoding)
#ifdef XPG4
else if (!strcmp(rl->encoding, "EUC"))
return(_EUC_init(rl));
else if (!strcmp(rl->encoding, "BIG5"))
return(_BIG5_init(rl));
else if (!strcmp(rl->encoding, "MSKanji"))
return(_MSKanji_init(rl));
#endif

View File

@ -1,5 +1,5 @@
LIB= xpg4
SRCS= setlocale.c setrunelocale.c euc.c mskanji.c utf2.c runetype.c \
SRCS= setlocale.c setrunelocale.c big5.c euc.c mskanji.c utf2.c runetype.c \
tolower.c toupper.c
CFLAGS+= -Wall -DXPG4 -I${.CURDIR}/../libc/locale
.PATH: ${.CURDIR}/../libc/locale

View File

@ -1,4 +1,4 @@
# $Id: Makefile,v 1.14 1998/08/07 17:07:12 ache Exp $
# $Id: Makefile,v 1.15 1998/08/10 09:49:42 phk Exp $
NOMAN=YES
CLEANFILES+= ${LOCALES:S/$/.out/g}
@ -11,7 +11,8 @@ LOCALES= ja_JP.EUC \
lt_LN.ISO_8859-2 \
ru_SU.CP866 \
ru_SU.KOI8-R \
zh_CN.EUC
zh_CN.EUC \
zh_TW.BIG5
LOCALEDIR= ${DESTDIR}/usr/share/locale

View File

@ -1,4 +1,4 @@
# $Id: Makefile,v 1.14 1998/08/07 17:07:12 ache Exp $
# $Id: Makefile,v 1.15 1998/08/10 09:49:42 phk Exp $
NOMAN=YES
CLEANFILES+= ${LOCALES:S/$/.out/g}
@ -11,7 +11,8 @@ LOCALES= ja_JP.EUC \
lt_LN.ISO_8859-2 \
ru_SU.CP866 \
ru_SU.KOI8-R \
zh_CN.EUC
zh_CN.EUC \
zh_TW.BIG5
LOCALEDIR= ${DESTDIR}/usr/share/locale

View File

@ -0,0 +1,195 @@
/*
* big5 first byte: A1-FE
* second byte: 40-7E, A1-FE
*/
ENCODING "BIG5"
/* VARIABLE BIG5 character set */
/*
* ASCII
*/
ALPHA 'A' - 'Z' 'a' - 'z'
CONTROL 0x00 - 0x1f 0x7f
DIGIT '0' - '9'
GRAPH 0x21 - 0x7e
LOWER 'a' - 'z'
PUNCT 0x21 - 0x2f 0x3a - 0x40 0x5b - 0x60 0x7b - 0x7e
SPACE 0x09 - 0x0d 0x20
UPPER 'A' - 'Z'
XDIGIT '0' - '9' 'a' - 'f' 'A' - 'F'
BLANK ' ' '\t'
PRINT 0x20 - 0x7e
MAPLOWER < 'A' - 'Z' : 'a' >
MAPLOWER < 'a' - 'z' : 'a' >
MAPUPPER < 'A' - 'Z' : 'A' >
MAPUPPER < 'a' - 'z' : 'A' >
TODIGIT < '0' - '9' : 0 >
TODIGIT < 'A' - 'F' : 10 >
TODIGIT < 'a' - 'f' : 10 >
/*
* the real thing
*/
PUNCT 0xa141 - 0xa17e 0xa1a1 - 0xa1ac
SPECIAL 0xa1ad - 0xa1fe 0xa240 - 0xa27e 0xa2a1 - 0xa2ae
/* full width 0 1 2 .. 9 */
DIGIT 0xa2af - 0xa2b8
/* map these (SuChou Code (12) and Roman (I II etc.) number (10)) to digits */
TODIGIT < 0xa2b9 - 0xa2c2 : 1 >
TODIGIT < 0xa2c3 - 0xa2ce : 1 >
/* full width A .. Z and a .. z */
/* note the lower case alphabets are not continuous */
UPPER 0xa2cf - 0xa2e8
LOWER 0xa2e9 - 0xa2fe 0xa340 - 0xa343
/* we have to got two line for each 'cause they are not continuous */
MAPLOWER < 0xa2cf - 0xa2e4 : 0xa2e9 > < 0xa2e5 - 0xa2e8 : 0xa340 >
MAPLOWER < 0xa2e9 - 0xa2fe : 0xa2e9 > < 0xa340 - 0xa343 : 0xa340 >
MAPUPPER < 0xa2cf - 0xa2e4 : 0xa2cf > < 0xa2e5 - 0xa2e8 : 0xa2e5 >
MAPUPPER < 0xa2e9 - 0xa2fe : 0xa2cf > < 0xa340 - 0xa343 : 0xa2e5 >
XDIGIT 0xa2cf - 0xa2d4 0xa2e9 - 0xa2ee
SPACE 0xa3bc
/* bopomofo symbols */
PHONOGRAM 0xa374 - 0xa37e 0xa3a1 - 0xa3bb
/* tone symbols */
PHONOGRAM 0xa3bd - 0xa3bf
/* greek */
UPPER 0xa344 - 0xa35b
LOWER 0xa35c - 0xa373
MAPUPPER < 0xa344 - 0xa35b : 0xa344 > < 0xa35c - 0xa373 : 0xa344 >
MAPLOWER < 0xa344 - 0xa35b : 0xa35c > < 0xa35c - 0xa373 : 0xa35c >
/* cyrillic (russian etc) */
UPPER 0xc7f3 - 0xc7fe 0xc840 - 0xc854
LOWER 0xc855 - 0xc875
MAPUPPER < 0xc7f3 - 0xc7fe : 0xc7f3 > < 0xc840 - 0xc854 : 0xc840 >
MAPUPPER < 0xc855 - 0xc860 : 0xc7f3 > < 0xc861 - 0xc875 : 0xc840 >
MAPLOWER < 0xc7f3 - 0xc7fe : 0xc855 > < 0xc840 - 0xc854 : 0xc861 >
MAPLOWER < 0xc855 - 0xc860 : 0xc855 > < 0xc861 - 0xc875 : 0xc861 >
IDEOGRAM 0xa440 - 0xa47e 0xa4a1 - 0xa4fe
IDEOGRAM 0xa540 - 0xa57e 0xa5a1 - 0xa5fe
IDEOGRAM 0xa640 - 0xa67e 0xa6a1 - 0xa6fe
IDEOGRAM 0xa740 - 0xa77e 0xa7a1 - 0xa7fe
IDEOGRAM 0xa840 - 0xa87e 0xa8a1 - 0xa8fe
IDEOGRAM 0xa940 - 0xa97e 0xa9a1 - 0xa9fe
IDEOGRAM 0xaa40 - 0xaa7e 0xaaa1 - 0xaafe
IDEOGRAM 0xab40 - 0xab7e 0xaba1 - 0xabfe
IDEOGRAM 0xac40 - 0xac7e 0xaca1 - 0xacfe
IDEOGRAM 0xad40 - 0xad7e 0xada1 - 0xadfe
IDEOGRAM 0xae40 - 0xae7e 0xaea1 - 0xaefe
IDEOGRAM 0xaf40 - 0xaf7e 0xafa1 - 0xaffe
IDEOGRAM 0xb040 - 0xb07e 0xb0a1 - 0xb0fe
IDEOGRAM 0xb140 - 0xb17e 0xb1a1 - 0xb1fe
IDEOGRAM 0xb240 - 0xb27e 0xb2a1 - 0xb2fe
IDEOGRAM 0xb340 - 0xb37e 0xb3a1 - 0xb3fe
IDEOGRAM 0xb440 - 0xb47e 0xb4a1 - 0xb4fe
IDEOGRAM 0xb540 - 0xb57e 0xb5a1 - 0xb5fe
IDEOGRAM 0xb640 - 0xb67e 0xb6a1 - 0xb6fe
IDEOGRAM 0xb740 - 0xb77e 0xb7a1 - 0xb7fe
IDEOGRAM 0xb840 - 0xb87e 0xb8a1 - 0xb8fe
IDEOGRAM 0xb940 - 0xb97e 0xb9a1 - 0xb9fe
IDEOGRAM 0xba40 - 0xba7e 0xbaa1 - 0xbafe
IDEOGRAM 0xbb40 - 0xbb7e 0xbba1 - 0xbbfe
IDEOGRAM 0xbc40 - 0xbc7e 0xbca1 - 0xbcfe
IDEOGRAM 0xbd40 - 0xbd7e 0xbda1 - 0xbdfe
IDEOGRAM 0xbe40 - 0xbe7e 0xbea1 - 0xbefe
IDEOGRAM 0xbf40 - 0xbf7e 0xbfa1 - 0xbffe
IDEOGRAM 0xc040 - 0xc07e 0xc0a1 - 0xc0fe
IDEOGRAM 0xc140 - 0xc17e 0xc1a1 - 0xc1fe
IDEOGRAM 0xc240 - 0xc27e 0xc2a1 - 0xc2fe
IDEOGRAM 0xc340 - 0xc37e 0xc3a1 - 0xc3fe
IDEOGRAM 0xc440 - 0xc47e 0xc4a1 - 0xc4fe
IDEOGRAM 0xc540 - 0xc57e 0xc5a1 - 0xc5fe
IDEOGRAM 0xc640 - 0xc67e
IDEOGRAM 0xc940 - 0xc97e 0xc9a1 - 0xc9fe
IDEOGRAM 0xca40 - 0xca7e 0xcaa1 - 0xcafe
IDEOGRAM 0xcb40 - 0xcb7e 0xcba1 - 0xcbfe
IDEOGRAM 0xcc40 - 0xcc7e 0xcca1 - 0xccfe
IDEOGRAM 0xcd40 - 0xcd7e 0xcda1 - 0xcdfe
IDEOGRAM 0xce40 - 0xce7e 0xcea1 - 0xcefe
IDEOGRAM 0xcf40 - 0xcf7e 0xcfa1 - 0xcffe
IDEOGRAM 0xd040 - 0xd074 0xd0a1 - 0xd0fe
IDEOGRAM 0xd140 - 0xd174 0xd1a1 - 0xd1fe
IDEOGRAM 0xd240 - 0xd274 0xd2a1 - 0xd2fe
IDEOGRAM 0xd340 - 0xd374 0xd3a1 - 0xd3fe
IDEOGRAM 0xd440 - 0xd474 0xd4a1 - 0xd4fe
IDEOGRAM 0xd540 - 0xd574 0xd5a1 - 0xd5fe
IDEOGRAM 0xd640 - 0xd674 0xd6a1 - 0xd6fe
IDEOGRAM 0xd740 - 0xd774 0xd7a1 - 0xd7fe
IDEOGRAM 0xd840 - 0xd874 0xd8a1 - 0xd8fe
IDEOGRAM 0xd940 - 0xd974 0xd9a1 - 0xd9fe
IDEOGRAM 0xda40 - 0xda74 0xdaa1 - 0xdafe
IDEOGRAM 0xdb40 - 0xdb74 0xdba1 - 0xdbfe
IDEOGRAM 0xdc40 - 0xdc74 0xdca1 - 0xdcfe
IDEOGRAM 0xdd40 - 0xdd74 0xdda1 - 0xddfe
IDEOGRAM 0xde40 - 0xde74 0xdea1 - 0xdefe
IDEOGRAM 0xdf40 - 0xdf74 0xdfa1 - 0xdffe
IDEOGRAM 0xe040 - 0xe074 0xe0a1 - 0xe0fe
IDEOGRAM 0xe140 - 0xe174 0xe1a1 - 0xe1fe
IDEOGRAM 0xe240 - 0xe274 0xe2a1 - 0xe2fe
IDEOGRAM 0xe340 - 0xe374 0xe3a1 - 0xe3fe
IDEOGRAM 0xe440 - 0xe474 0xe4a1 - 0xe4fe
IDEOGRAM 0xe540 - 0xe574 0xe5a1 - 0xe5fe
IDEOGRAM 0xe640 - 0xe674 0xe6a1 - 0xe6fe
IDEOGRAM 0xe740 - 0xe774 0xe7a1 - 0xe7fe
IDEOGRAM 0xe840 - 0xe874 0xe8a1 - 0xe8fe
IDEOGRAM 0xe940 - 0xe974 0xe9a1 - 0xe9fe
IDEOGRAM 0xea40 - 0xea74 0xeaa1 - 0xeafe
IDEOGRAM 0xeb40 - 0xeb74 0xeba1 - 0xebfe
IDEOGRAM 0xec40 - 0xec74 0xeca1 - 0xecfe
IDEOGRAM 0xed40 - 0xed74 0xeda1 - 0xedfe
IDEOGRAM 0xee40 - 0xee74 0xeea1 - 0xeefe
IDEOGRAM 0xef40 - 0xef74 0xefa1 - 0xeffe
IDEOGRAM 0xf040 - 0xf074 0xf0a1 - 0xf0fe
IDEOGRAM 0xf140 - 0xf174 0xf1a1 - 0xf1fe
IDEOGRAM 0xf240 - 0xf274 0xf2a1 - 0xf2fe
IDEOGRAM 0xf340 - 0xf374 0xf3a1 - 0xf3fe
IDEOGRAM 0xf440 - 0xf474 0xf4a1 - 0xf4fe
IDEOGRAM 0xf540 - 0xf574 0xf5a1 - 0xf5fe
IDEOGRAM 0xf640 - 0xf674 0xf6a1 - 0xf6fe
IDEOGRAM 0xf740 - 0xf774 0xf7a1 - 0xf7fe
IDEOGRAM 0xf840 - 0xf874 0xf8a1 - 0xf8fe
IDEOGRAM 0xf940 - 0xf974 0xf9a1 - 0xf9dc
/* Eten external charset 0xc6a1 - 0xc8d3 */
/* lots strange numbers */
SPECIAL 0xc6a1 - 0xc6be
TODIGIT < 0xc6a1 - 0xc6aa : 1 > < 0xc6ab - 0xc6b4 : 1 >
TODIGIT < 0xc6b5 - 0xc6be : 1>
/* these are HanZi Radicals */
SPECIAL 0xc6bf - 0xc6d7 0xc879 - 0xc87e 0xc8a1 - 0xc8a4
/* who knows what these are ... */
SPECIAL 0xc6d8 - 0xc6e6 0xc876 - 0xc878
/* hiragana */
SPECIAL 0xc6e7 - 0xc6fe 0xc740 - 0xc77a
/* katakana */
SPECIAL 0xc77b - 0xc77e 0xc7a1 - 0xc7f2
/* ZIP code (in Japan) */
SPECIAL 0xc8a5 - 0x08cc
/* various Japanese symbols */
SPECIAL 0x08cd - 0xc8d3
/* line drawing glyphs */
SPECIAL 0xf9dd - 0xf9ff