Improve case-insensitive matching in multibyte locales.
Obtained from: Isamu Hasegawa (IBM) via Fedora
This commit is contained in:
parent
0c6755d771
commit
3aceb5c9da
@ -416,7 +416,7 @@ update_mb_len_index (unsigned char const *p, int len)
|
||||
|
||||
/* This function fetch a wide character, and update cur_mb_len,
|
||||
used only if the current locale is a multibyte environment. */
|
||||
static wchar_t
|
||||
static wint_t
|
||||
fetch_wc (char const *eoferr)
|
||||
{
|
||||
wchar_t wc;
|
||||
@ -425,7 +425,7 @@ fetch_wc (char const *eoferr)
|
||||
if (eoferr != 0)
|
||||
dfaerror (eoferr);
|
||||
else
|
||||
return -1;
|
||||
return WEOF;
|
||||
}
|
||||
|
||||
cur_mb_len = mbrtowc(&wc, lexptr, lexleft, &mbs);
|
||||
@ -461,7 +461,7 @@ fetch_wc (char const *eoferr)
|
||||
static void
|
||||
parse_bracket_exp_mb ()
|
||||
{
|
||||
wchar_t wc, wc1, wc2;
|
||||
wint_t wc, wc1, wc2;
|
||||
|
||||
/* Work area to build a mb_char_classes. */
|
||||
struct mb_char_classes *work_mbc;
|
||||
@ -498,7 +498,7 @@ parse_bracket_exp_mb ()
|
||||
work_mbc->invert = 0;
|
||||
do
|
||||
{
|
||||
wc1 = -1; /* mark wc1 is not initialized". */
|
||||
wc1 = WEOF; /* mark wc1 is not initialized". */
|
||||
|
||||
/* Note that if we're looking at some other [:...:] construct,
|
||||
we just treat it as a bunch of ordinary characters. We can do
|
||||
@ -588,7 +588,7 @@ parse_bracket_exp_mb ()
|
||||
work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
|
||||
}
|
||||
}
|
||||
wc = -1;
|
||||
wc = WEOF;
|
||||
}
|
||||
else
|
||||
/* We treat '[' as a normal character here. */
|
||||
@ -602,7 +602,7 @@ parse_bracket_exp_mb ()
|
||||
wc = fetch_wc(("Unbalanced ["));
|
||||
}
|
||||
|
||||
if (wc1 == -1)
|
||||
if (wc1 == WEOF)
|
||||
wc1 = fetch_wc(_("Unbalanced ["));
|
||||
|
||||
if (wc1 == L'-')
|
||||
@ -632,17 +632,17 @@ parse_bracket_exp_mb ()
|
||||
}
|
||||
REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t,
|
||||
range_sts_al, work_mbc->nranges + 1);
|
||||
work_mbc->range_sts[work_mbc->nranges] = wc;
|
||||
work_mbc->range_sts[work_mbc->nranges] = (wchar_t)wc;
|
||||
REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t,
|
||||
range_ends_al, work_mbc->nranges + 1);
|
||||
work_mbc->range_ends[work_mbc->nranges++] = wc2;
|
||||
work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2;
|
||||
}
|
||||
else if (wc != -1)
|
||||
else if (wc != WEOF)
|
||||
/* build normal characters. */
|
||||
{
|
||||
REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al,
|
||||
work_mbc->nchars + 1);
|
||||
work_mbc->chars[work_mbc->nchars++] = wc;
|
||||
work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc;
|
||||
}
|
||||
}
|
||||
while ((wc = wc1) != L']');
|
||||
|
@ -33,6 +33,12 @@
|
||||
# include <sys/time.h>
|
||||
# include <sys/resource.h>
|
||||
#endif
|
||||
#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
|
||||
/* We can handle multibyte string. */
|
||||
# define MBS_SUPPORT
|
||||
# include <wchar.h>
|
||||
# include <wctype.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include "system.h"
|
||||
#include "getopt.h"
|
||||
@ -1805,6 +1811,37 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
|
||||
if (!install_matcher (matcher) && !install_matcher ("default"))
|
||||
abort ();
|
||||
|
||||
#ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX != 1 && match_icase)
|
||||
{
|
||||
wchar_t wc;
|
||||
mbstate_t cur_state, prev_state;
|
||||
int i, len = strlen(keys);
|
||||
|
||||
memset(&cur_state, 0, sizeof(mbstate_t));
|
||||
for (i = 0; i <= len ;)
|
||||
{
|
||||
size_t mbclen;
|
||||
mbclen = mbrtowc(&wc, keys + i, len - i, &cur_state);
|
||||
if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
|
||||
{
|
||||
/* An invalid sequence, or a truncated multibyte character.
|
||||
We treat it as a singlebyte character. */
|
||||
mbclen = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (iswupper((wint_t)wc))
|
||||
{
|
||||
wc = towlower((wint_t)wc);
|
||||
wcrtomb(keys + i, wc, &cur_state);
|
||||
}
|
||||
}
|
||||
i += mbclen;
|
||||
}
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
(*compile)(keys, keycc);
|
||||
|
||||
if ((argc - optind > 1 && !no_filenames) || with_filenames)
|
||||
|
@ -151,15 +151,16 @@ kwsmusts (void)
|
||||
static char*
|
||||
check_multibyte_string(char const *buf, size_t size)
|
||||
{
|
||||
char *mb_properties = malloc(size);
|
||||
char *mb_properties = xmalloc(size);
|
||||
mbstate_t cur_state;
|
||||
wchar_t wc;
|
||||
int i;
|
||||
memset(&cur_state, 0, sizeof(mbstate_t));
|
||||
memset(mb_properties, 0, sizeof(char)*size);
|
||||
for (i = 0; i < size ;)
|
||||
{
|
||||
size_t mbclen;
|
||||
mbclen = mbrlen(buf + i, size - i, &cur_state);
|
||||
mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
|
||||
|
||||
if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
|
||||
{
|
||||
@ -167,6 +168,14 @@ check_multibyte_string(char const *buf, size_t size)
|
||||
We treat it as a singlebyte character. */
|
||||
mbclen = 1;
|
||||
}
|
||||
else if (match_icase)
|
||||
{
|
||||
if (iswupper((wint_t)wc))
|
||||
{
|
||||
wc = towlower((wint_t)wc);
|
||||
wcrtomb(buf + i, wc, &cur_state);
|
||||
}
|
||||
}
|
||||
mb_properties[i] = mbclen;
|
||||
i += mbclen;
|
||||
}
|
||||
@ -235,7 +244,7 @@ Gcompile (char const *pattern, size_t size)
|
||||
static char const line_end[] = "\\)$";
|
||||
static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
|
||||
static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
|
||||
char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
|
||||
char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
|
||||
size_t i;
|
||||
strcpy (n, match_lines ? line_beg : word_beg);
|
||||
i = strlen (n);
|
||||
@ -318,7 +327,7 @@ Ecompile (char const *pattern, size_t size)
|
||||
static char const line_end[] = ")$";
|
||||
static char const word_beg[] = "(^|[^[:alnum:]_])(";
|
||||
static char const word_end[] = ")([^[:alnum:]_]|$)";
|
||||
char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
|
||||
char *n = xmalloc (sizeof word_beg - 1 + size + sizeof word_end);
|
||||
size_t i;
|
||||
strcpy (n, match_lines ? line_beg : word_beg);
|
||||
i = strlen(n);
|
||||
@ -341,14 +350,20 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
||||
char eol = eolbyte;
|
||||
int backref, start, len;
|
||||
struct kwsmatch kwsm;
|
||||
size_t i;
|
||||
size_t i, ret_val;
|
||||
#ifdef MBS_SUPPORT
|
||||
char *mb_properties = NULL;
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
#ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX > 1 && kwset)
|
||||
mb_properties = check_multibyte_string(buf, size);
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
if (match_icase)
|
||||
{
|
||||
char *case_buf = xmalloc(size);
|
||||
memcpy(case_buf, buf, size);
|
||||
buf = case_buf;
|
||||
}
|
||||
if (kwset)
|
||||
mb_properties = check_multibyte_string(buf, size);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
buflim = buf + size;
|
||||
@ -457,8 +472,13 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
||||
|
||||
failure:
|
||||
#ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX > 1 && mb_properties)
|
||||
free (mb_properties);
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
if (mb_properties)
|
||||
free (mb_properties);
|
||||
if (match_icase)
|
||||
free ((char *) buf);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
return (size_t) -1;
|
||||
|
||||
@ -469,8 +489,13 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
||||
|
||||
success_in_start_and_len:
|
||||
#ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX > 1 && mb_properties)
|
||||
free (mb_properties);
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
if (mb_properties)
|
||||
free (mb_properties);
|
||||
if (match_icase)
|
||||
free ((char *) buf);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
*match_size = len;
|
||||
return start;
|
||||
@ -506,10 +531,19 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
||||
register size_t len;
|
||||
char eol = eolbyte;
|
||||
struct kwsmatch kwsmatch;
|
||||
size_t ret_val;
|
||||
#ifdef MBS_SUPPORT
|
||||
char *mb_properties;
|
||||
char *mb_properties = NULL;
|
||||
if (MB_CUR_MAX > 1)
|
||||
mb_properties = check_multibyte_string (buf, size);
|
||||
{
|
||||
if (match_icase)
|
||||
{
|
||||
char *case_buf = xmalloc(size);
|
||||
memcpy(case_buf, buf, size);
|
||||
buf = case_buf;
|
||||
}
|
||||
mb_properties = check_multibyte_string(buf, size);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
for (beg = buf; beg <= buf + size; ++beg)
|
||||
@ -567,7 +601,12 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
||||
failure:
|
||||
#ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX > 1)
|
||||
free (mb_properties);
|
||||
{
|
||||
if (match_icase)
|
||||
free((char *) buf);
|
||||
if (mb_properties)
|
||||
free(mb_properties);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
return -1;
|
||||
|
||||
@ -583,7 +622,12 @@ Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
|
||||
*match_size = len;
|
||||
#ifdef MBS_SUPPORT
|
||||
if (MB_CUR_MAX > 1)
|
||||
free (mb_properties);
|
||||
{
|
||||
if (mb_properties)
|
||||
free (mb_properties);
|
||||
if (match_icase)
|
||||
free ((char *) buf);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
return beg - buf;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user