Split tolower/toupper code from usual xlat16 kiconv table, and make it

possible to do tolower/toupper independently without code conversion.

Submitted by:	imura (but bugs are mine)
Obtained from:	http://people.freebsd.org/~imura/kiconv/
		(1_kiconv_wctype_kern.diff, 1_kiconv_wctype_user.diff)
This commit is contained in:
Xin LI 2009-06-22 17:09:46 +00:00
parent c90c7d6946
commit 6ac937c89c
7 changed files with 342 additions and 51 deletions

View File

@ -2,7 +2,7 @@
LIB= kiconv
SHLIBDIR?= /lib
SRCS= xlat16_iconv.c xlat16_sysctl.c
SRCS= kiconv_sysctl.c xlat16_iconv.c xlat16_sysctl.c
SRCS+= quirks.c
SHLIB_MAJOR= 3

View File

@ -0,0 +1,93 @@
/*-
* Copyright (c) 2005 Ryuichiro Imura
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/types.h>
#include <sys/iconv.h>
#include <sys/sysctl.h>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
int
kiconv_lookupconv(const char *drvname)
{
size_t size;
int error;
if (sysctlbyname("kern.iconv.drvlist", NULL, &size, NULL, 0) == -1)
return (errno);
if (size > 0) {
char *drivers, *drvp;
drivers = malloc(size);
if (drivers == NULL)
return (ENOMEM);
if (sysctlbyname("kern.iconv.drvlist", drivers, &size, NULL, 0) == -1) {
error = errno;
free(drivers);
return (errno);
}
for (drvp = drivers; *drvp != '\0'; drvp += strlen(drvp) + 1)
if (strcmp(drvp, drvname) == 0) {
free(drivers);
return (0);
}
}
return (ENOENT);
}
int
kiconv_lookupcs(const char *tocode, const char *fromcode)
{
size_t i, size;
struct iconv_cspair_info *csi, *csip;
int error;
if (sysctlbyname("kern.iconv.cslist", NULL, &size, NULL, 0) == -1)
return (errno);
if (size > 0) {
csi = malloc(size);
if (csi == NULL)
return (ENOMEM);
if (sysctlbyname("kern.iconv.cslist", csi, &size, NULL, 0) == -1) {
error = errno;
free(csi);
return (error);
}
for (i = 0, csip = csi; i < (size/sizeof(*csi)); i++, csip++){
if (strcmp(csip->cs_to, tocode) == 0 &&
strcmp(csip->cs_from, fromcode) == 0) {
free(csi);
return (0);
}
}
}
return (ENOENT);
}

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2003 Ryuichiro Imura
* Copyright (c) 2003, 2005 Ryuichiro Imura
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -41,9 +41,11 @@
#include <dlfcn.h>
#include <err.h>
#include <errno.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#include "quirks.h"
@ -56,6 +58,7 @@ struct xlat16_table {
};
static struct xlat16_table kiconv_xlat16_open(const char *, const char *, int);
static int chklocale(int, const char *);
static int my_iconv_init(void);
static iconv_t (*my_iconv_open)(const char *, const char *);
@ -67,30 +70,18 @@ int
kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
{
int error;
size_t i, size, idxsize;
struct iconv_cspair_info *csi;
size_t idxsize;
struct xlat16_table xt;
void *data;
char *p;
if (sysctlbyname("kern.iconv.cslist", NULL, &size, NULL, 0) == -1)
return (-1);
if (size > 0) {
csi = malloc(size);
if (csi == NULL)
return (-1);
if (sysctlbyname("kern.iconv.cslist", csi, &size, NULL, 0) == -1) {
free(csi);
return (-1);
}
for (i = 0; i < (size/sizeof(*csi)); i++, csi++){
if (strcmp(csi->cs_to, tocode) == 0 &&
strcmp(csi->cs_from, fromcode) == 0)
return (0);
}
}
if (kiconv_lookupcs(tocode, fromcode) == 0)
return (0);
xt = kiconv_xlat16_open(tocode, fromcode, flag);
if (flag & KICONV_WCTYPE)
xt = kiconv_xlat16_open(fromcode, fromcode, flag);
else
xt = kiconv_xlat16_open(tocode, fromcode, flag);
if (xt.size == 0)
return (-1);
@ -117,7 +108,7 @@ kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
int
kiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode)
{
int error;
int error, locale;
error = kiconv_add_xlat16_cspair(foreigncode, localcode,
KICONV_FROM_LOWER | KICONV_FROM_UPPER);
@ -127,7 +118,14 @@ kiconv_add_xlat16_cspairs(const char *foreigncode, const char *localcode)
KICONV_LOWER | KICONV_UPPER);
if (error)
return (error);
locale = chklocale(LC_CTYPE, localcode);
if (locale == 0) {
error = kiconv_add_xlat16_cspair(KICONV_WCTYPE_NAME, localcode,
KICONV_WCTYPE);
if (error)
return (error);
}
return (0);
}
@ -175,6 +173,31 @@ kiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase)
bzero(dst, outbytesleft);
c = ((ls & 0x100 ? us | 0x80 : us) << 8) | (u_char)ls;
if (lcase & KICONV_WCTYPE) {
if ((c & 0xff) == 0)
c >>= 8;
if (iswupper(c)) {
c = towlower(c);
if ((c & 0xff00) == 0)
c <<= 8;
table[us] = c | XLAT16_HAS_LOWER_CASE;
} else if (iswlower(c)) {
c = towupper(c);
if ((c & 0xff00) == 0)
c <<= 8;
table[us] = c | XLAT16_HAS_UPPER_CASE;
} else
table[us] = 0;
/*
* store not NULL
*/
if (table[us])
xt.idx[ls] = table;
continue;
}
c = quirk_vendor2unix(c, pre_q_list, pre_q_size);
src[0] = (u_char)(c >> 8);
src[1] = (u_char)c;
@ -257,6 +280,24 @@ kiconv_xlat16_open(const char *tocode, const char *fromcode, int lcase)
return (xt);
}
static int
chklocale(int category, const char *code)
{
char *p;
int error = -1;
p = strchr(setlocale(category, NULL), '.');
if (p++) {
error = strcasecmp(code, p);
if (error) {
/* XXX - can't avoid calling quirk here... */
error = strcasecmp(code, kiconv_quirkcs(p,
KICONV_VENDOR_MICSFT));
}
}
return (error);
}
static int
my_iconv_init(void)
{
@ -380,17 +421,21 @@ my_iconv_char(iconv_t cd, const u_char **ibuf, size_t * ilen, u_char **obuf,
#else /* statically linked */
#include <sys/types.h>
#include <sys/iconv.h>
#include <errno.h>
int
kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
kiconv_add_xlat16_cspair(const char *tocode __unused, const char *fromcode __unused,
int flag __unused)
{
errno = EINVAL;
return (-1);
}
int
kiconv_add_xlat16_cspairs(const char *tocode, const char *fromcode)
kiconv_add_xlat16_cspairs(const char *tocode __unused, const char *fromcode __unused)
{
errno = EINVAL;
return (-1);

View File

@ -307,6 +307,18 @@ iconv_convchr_case(void *handle, const char **inbuf,
return ICONV_CONVERTER_CONV(handle, inbuf, inbytesleft, outbuf, outbytesleft, 1, casetype);
}
int
towlower(int c, void *handle)
{
return ICONV_CONVERTER_TOLOWER(handle, c);
}
int
towupper(int c, void *handle)
{
return ICONV_CONVERTER_TOUPPER(handle, c);
}
/*
* Give a list of loaded converters. Each name terminated with 0.
* An empty string terminates the list.
@ -441,6 +453,12 @@ iconv_converter_donestub(struct iconv_converter_class *dp)
return 0;
}
int
iconv_converter_tolowerstub(int c, void *handle)
{
return (c);
}
int
iconv_converter_handler(module_t mod, int type, void *data)
{

View File

@ -68,3 +68,13 @@ STATICMETHOD void done {
STATICMETHOD const char * name {
struct iconv_converter_class *dcp;
};
METHOD int tolower {
void *handle;
int c;
} DEFAULT iconv_converter_tolowerstub;
METHOD int toupper {
void *handle;
int c;
} DEFAULT iconv_converter_tolowerstub;

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2003, Ryuichiro Imura
* Copyright (c) 2003, 2005 Ryuichiro Imura
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -43,12 +43,17 @@ __FBSDID("$FreeBSD$");
MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2);
#endif
#define C2I1(c) ((c) & 0x8000 ? ((c) & 0xff) | 0x100 : (c) & 0xff)
#define C2I2(c) ((c) & 0x8000 ? ((c) >> 8) & 0x7f : ((c) >> 8) & 0xff)
/*
* XLAT16 converter instance
*/
struct iconv_xlat16 {
KOBJ_FIELDS;
uint32_t * d_table[0x200];
void * f_ctp;
void * t_ctp;
struct iconv_cspair * d_csp;
};
@ -72,6 +77,16 @@ iconv_xlat16_open(struct iconv_converter_class *dcp,
}
idxp++;
}
if (strcmp(csp->cp_to, KICONV_WCTYPE_NAME) != 0) {
if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_from, &dp->f_ctp) != 0)
dp->f_ctp = NULL;
if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_to, &dp->t_ctp) != 0)
dp->t_ctp = NULL;
} else {
dp->f_ctp = dp->t_ctp = dp;
}
dp->d_csp = csp;
csp->cp_refcount++;
*dpp = (void*)dp;
@ -83,6 +98,10 @@ iconv_xlat16_close(void *data)
{
struct iconv_xlat16 *dp = data;
if (dp->f_ctp && dp->f_ctp != data)
iconv_close(dp->f_ctp);
if (dp->t_ctp && dp->t_ctp != data)
iconv_close(dp->t_ctp);
dp->d_csp->cp_refcount--;
kobj_delete((struct kobj*)data, M_ICONV);
return (0);
@ -100,7 +119,7 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
size_t in, on, ir, or, inlen;
uint32_t code;
u_char u, l;
uint16_t c1, c2;
uint16_t c1, c2, ctmp;
if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
return (0);
@ -112,21 +131,32 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
while(ir > 0 && or > 0) {
inlen = 0;
code = '\0';
code = 0;
c1 = ir > 1 ? *(src+1) & 0xff : 0;
c2 = *src & 0xff;
ctmp = 0;
c1 = c2 & 0x80 ? c1 | 0x100 : c1;
c2 = c2 & 0x80 ? c2 & 0x7f : c2;
if (ir > 1 && dp->d_table[c1]) {
if (ir > 1 && dp->d_table[c1] && dp->d_table[c1][c2]) {
/*
* inbuf char is a double byte char
*/
code = dp->d_table[c1][c2];
if (code)
inlen = 2;
inlen = 2;
/* toupper,tolower */
if (casetype == KICONV_FROM_LOWER && dp->f_ctp)
ctmp = towlower(((u_char)*src << 8) | (u_char)*(src + 1),
dp->f_ctp);
else if (casetype == KICONV_FROM_UPPER && dp->f_ctp)
ctmp = towupper(((u_char)*src << 8) | (u_char)*(src + 1),
dp->f_ctp);
if (ctmp) {
c1 = C2I1(ctmp);
c2 = C2I2(ctmp);
}
}
if (inlen == 0) {
@ -139,11 +169,31 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
* inbuf char is a single byte char
*/
inlen = 1;
code = dp->d_table[c1][c2];
if (!code) {
ret = -1;
break;
if (casetype & (KICONV_FROM_LOWER|KICONV_FROM_UPPER))
code = dp->d_table[c1][c2];
if (casetype == KICONV_FROM_LOWER) {
if (dp->f_ctp)
ctmp = towlower((u_char)*src, dp->f_ctp);
else if (code & XLAT16_HAS_FROM_LOWER_CASE)
ctmp = (u_char)(code >> 16);
} else if (casetype == KICONV_FROM_UPPER) {
if (dp->f_ctp)
ctmp = towupper((u_char)*src, dp->f_ctp);
else if (code & XLAT16_HAS_FROM_UPPER_CASE)
ctmp = (u_char)(code >> 16);
}
if (ctmp) {
c1 = C2I1(ctmp << 8);
c2 = C2I2(ctmp << 8);
}
}
code = dp->d_table[c1][c2];
if (!code) {
ret = -1;
break;
}
nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0;
@ -158,14 +208,6 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
/*
* now start translation
*/
if ((casetype == KICONV_FROM_LOWER && code & XLAT16_HAS_FROM_LOWER_CASE) ||
(casetype == KICONV_FROM_UPPER && code & XLAT16_HAS_FROM_UPPER_CASE)) {
c2 = (u_char)(code >> 16);
c1 = c2 & 0x80 ? 0x100 : 0;
c2 = c2 & 0x80 ? c2 & 0x7f : c2;
code = dp->d_table[c1][c2];
}
u = (u_char)(code >> 8);
l = (u_char)code;
@ -186,15 +228,38 @@ iconv_xlat16_conv(void *d2p, const char **inbuf,
ret = -1;
break;
}
/* toupper,tolower */
if (casetype == KICONV_LOWER && dp->t_ctp) {
code = towlower((uint16_t)code, dp->t_ctp);
u = (u_char)(code >> 8);
l = (u_char)code;
}
if (casetype == KICONV_UPPER && dp->t_ctp) {
code = towupper((uint16_t)code, dp->t_ctp);
u = (u_char)(code >> 8);
l = (u_char)code;
}
*dst++ = u;
*dst++ = l;
or -= 2;
} else {
if ((casetype == KICONV_LOWER && code & XLAT16_HAS_LOWER_CASE) ||
(casetype == KICONV_UPPER && code & XLAT16_HAS_UPPER_CASE))
*dst++ = (u_char)(code >> 16);
else
*dst++ = l;
/* toupper,tolower */
if (casetype == KICONV_LOWER) {
if (dp->t_ctp)
l = (u_char)towlower(l, dp->t_ctp);
else if (code & XLAT16_HAS_LOWER_CASE)
l = (u_char)(code >> 16);
}
if (casetype == KICONV_UPPER) {
if (dp->t_ctp)
l = (u_char)towupper(l, dp->t_ctp);
else if (code & XLAT16_HAS_UPPER_CASE)
l = (u_char)(code >> 16);
}
*dst++ = l;
or--;
}
@ -232,6 +297,55 @@ iconv_xlat16_name(struct iconv_converter_class *dcp)
return ("xlat16");
}
static int
iconv_xlat16_tolower(void *d2p, register int c)
{
struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
register int c1, c2, out;
if (c < 0x100) {
c1 = C2I1(c << 8);
c2 = C2I2(c << 8);
} else if (c < 0x10000) {
c1 = C2I1(c);
c2 = C2I2(c);
} else
return (c);
if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_LOWER_CASE) {
/*return (int)(dp->d_table[c1][c2] & 0xffff);*/
out = dp->d_table[c1][c2] & 0xffff;
if ((out & 0xff) == 0)
out = (out >> 8) & 0xff;
return (out);
} else
return (c);
}
static int
iconv_xlat16_toupper(void *d2p, register int c)
{
struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
register int c1, c2, out;
if (c < 0x100) {
c1 = C2I1(c << 8);
c2 = C2I2(c << 8);
} else if (c < 0x10000) {
c1 = C2I1(c);
c2 = C2I2(c);
} else
return (c);
if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_UPPER_CASE) {
out = dp->d_table[c1][c2] & 0xffff;
if ((out & 0xff) == 0)
out = (out >> 8) & 0xff;
return (out);
} else
return (c);
}
static kobj_method_t iconv_xlat16_methods[] = {
KOBJMETHOD(iconv_converter_open, iconv_xlat16_open),
KOBJMETHOD(iconv_converter_close, iconv_xlat16_close),
@ -241,6 +355,8 @@ static kobj_method_t iconv_xlat16_methods[] = {
KOBJMETHOD(iconv_converter_done, iconv_xlat16_done),
#endif
KOBJMETHOD(iconv_converter_name, iconv_xlat16_name),
KOBJMETHOD(iconv_converter_tolower, iconv_xlat16_tolower),
KOBJMETHOD(iconv_converter_toupper, iconv_xlat16_toupper),
{0, 0}
};

View File

@ -51,6 +51,9 @@
#define KICONV_UPPER 2 /* toupper converted character */
#define KICONV_FROM_LOWER 4 /* tolower source character, then convert */
#define KICONV_FROM_UPPER 8 /* toupper source character, then convert */
#define KICONV_WCTYPE 16 /* towlower/towupper characters */
#define KICONV_WCTYPE_NAME "_wctype"
/*
* Entry for cslist sysctl
@ -95,6 +98,8 @@ int kiconv_add_xlat_table(const char *, const char *, const u_char *);
int kiconv_add_xlat16_cspair(const char *, const char *, int);
int kiconv_add_xlat16_cspairs(const char *, const char *);
int kiconv_add_xlat16_table(const char *, const char *, const void *, int);
int kiconv_lookupconv(const char *drvname);
int kiconv_lookupcs(const char *tocode, const char *fromcode);
const char *kiconv_quirkcs(const char *, int);
__END_DECLS
@ -128,7 +133,7 @@ struct iconv_cspair {
TAILQ_ENTRY(iconv_cspair) cp_link;
};
#define KICONV_CONVERTER(name,size) \
#define KICONV_CONVERTER(name,size) \
static struct iconv_converter_class iconv_ ## name ## _class = { \
"iconv_"#name, iconv_ ## name ## _methods, size, NULL \
}; \
@ -138,7 +143,7 @@ struct iconv_cspair {
}; \
DECLARE_MODULE(iconv_ ## name, iconv_ ## name ## _mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
#define KICONV_CES(name,size) \
#define KICONV_CES(name,size) \
static DEFINE_CLASS(iconv_ces_ ## name, iconv_ces_ ## name ## _methods, (size)); \
static moduledata_t iconv_ces_ ## name ## _mod = { \
"iconv_ces_"#name, iconv_cesmod_handler, \
@ -167,6 +172,9 @@ char* iconv_convstr(void *handle, char *dst, const char *src);
void* iconv_convmem(void *handle, void *dst, const void *src, int size);
int iconv_vfs_refcount(const char *fsname);
int towlower(int c, void *handle);
int towupper(int c, void *handle);
/*
* Bridge struct of iconv functions
*/
@ -233,6 +241,7 @@ int iconv_lookupcp(char **cpp, const char *s);
int iconv_converter_initstub(struct iconv_converter_class *dp);
int iconv_converter_donestub(struct iconv_converter_class *dp);
int iconv_converter_tolowerstub(int c, void *handle);
int iconv_converter_handler(module_t mod, int type, void *data);
#ifdef ICONV_DEBUG