libedit: improve multibyte support

Until this commit libedit only supported UTF-8 for multibyte charset
Improve it to support other multibyte charsets

Tested with eucJP and SJIS charsets.
Note that this change as been review and committed in upstream libedit
as well via christos@NetBSD

Submitted by:	naito.yuichiro _at_ gmail.com
Reviewed by:	bapt, pfg, yuripv, 0mp
MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D17903
This commit is contained in:
Baptiste Daroussin 2018-11-26 08:16:33 +00:00
parent acc84f29fe
commit 6bbd1d19fd
5 changed files with 12 additions and 24 deletions

View File

@ -37,6 +37,7 @@ __RCSID("$NetBSD: chartype.c,v 1.23 2016/02/28 23:02:24 christos Exp $");
__FBSDID("$FreeBSD$");
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
@ -182,17 +183,13 @@ ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
protected size_t
ct_enc_width(Char c)
{
/* UTF-8 encoding specific values */
if (c < 0x80)
return 1;
else if (c < 0x0800)
return 2;
else if (c < 0x10000)
return 3;
else if (c < 0x110000)
return 4;
else
return 0; /* not a valid codepoint */
mbstate_t ps = (mbstate_t){{0}};
size_t len;
char cbuf[MB_LEN_MAX];
len = ct_wcrtomb(cbuf, c, &ps);
if (len == (size_t)-1)
return (0);
return (len);
}
protected ssize_t

View File

@ -56,6 +56,7 @@
#define ct_wctob wctob
#define ct_wctomb wctomb
#define ct_wcrtomb wcrtomb
#define ct_wctomb_reset wctomb(0,0)
#define ct_wcstombs wcstombs
#define ct_mbstowcs mbstowcs
@ -109,6 +110,7 @@ Width(wchar_t c)
#define ct_wctob(w) ((int)(w))
#define ct_wctomb error
#define ct_wcrtomb error
#define ct_wctomb_reset
#define ct_wcstombs(a, b, c) (strncpy(a, b, c), strlen(a))
#define ct_mbstowcs(a, b, c) (strncpy(a, b, c), strlen(a))

View File

@ -99,10 +99,6 @@ el_init_fd(const char *prog, FILE *fin, FILE *fout, FILE *ferr,
* Initialize all the modules. Order is important!!!
*/
el->el_flags = 0;
if (setlocale(LC_CTYPE, NULL) != NULL){
if (strcmp(nl_langinfo(CODESET), "UTF-8") == 0)
el->el_flags |= CHARSET_IS_UTF8;
}
if (terminal_init(el) == -1) {
el_free(el->el_prog);
@ -293,7 +289,7 @@ FUN(el,set)(EditLine *el, int op, ...)
void *ptr = va_arg(ap, void *);
rv = hist_set(el, func, ptr);
if (!(el->el_flags & CHARSET_IS_UTF8))
if (MB_CUR_MAX == 1)
el->el_flags &= ~NARROW_HISTORY;
break;
}

View File

@ -56,7 +56,6 @@
#define NO_TTY 0x02
#define EDIT_DISABLED 0x04
#define UNBUFFERED 0x08
#define CHARSET_IS_UTF8 0x10
#define NARROW_HISTORY 0x40
typedef unsigned char el_action_t; /* Index to command array */

View File

@ -363,13 +363,7 @@ read_char(EditLine *el, wchar_t *cp)
goto again;
}
case (size_t)-2:
/*
* We don't support other multibyte charsets.
* The second condition shouldn't happen
* and is here merely for additional safety.
*/
if ((el->el_flags & CHARSET_IS_UTF8) == 0 ||
cbp >= MB_LEN_MAX) {
if (cbp >= MB_LEN_MAX) {
errno = EILSEQ;
*cp = L'\0';
return -1;