Correctly locate the character preceeding the matched string in -w

mode when in non-UTF-8 multibyte locales (e.g. EUC, GB2312, etc.).

PR:		91909
This commit is contained in:
Tim J. Robbins 2006-02-19 04:27:39 +00:00
parent c0d157d85f
commit 7c5f1a3fc5

View File

@ -524,11 +524,16 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
if (mb_cur_max > 1)
{
const char *s;
int mr;
size_t mr;
wchar_t pwc;
/* Locate the start of the multibyte character
before the match position (== beg + start). */
if (using_utf8)
{
/* UTF-8 is a special case: scan backwards
until we find a 7-bit character or a
lead byte. */
s = beg + start - 1;
while (s > buf
&& (unsigned char) *s >= 0x80
@ -536,15 +541,40 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
--s;
}
else
s = last_char;
mr = mbtowc (&pwc, s, beg + start - s);
if (mr <= 0)
{
/* Scan forwards to find the start of the
last complete character before the
match position. */
size_t bytes_left = start - 1;
s = beg;
while (bytes_left > 0)
{
mr = mbrlen (s, bytes_left, &mbs);
if (mr == (size_t) -1 || mr == 0)
{
memset (&mbs, '\0', sizeof (mbs));
s++;
bytes_left--;
continue;
}
if (mr == (size_t) -2)
{
memset (&mbs, '\0', sizeof (mbs));
break;
}
s += mr;
bytes_left -= mr;
}
}
mr = mbrtowc (&pwc, s, beg + start - s, &mbs);
if (mr == (size_t) -2 || mr == (size_t) -1 ||
mr == 0)
{
memset (&mbs, '\0', sizeof (mbstate_t));
lword_match = 1;
}
else if (!(iswalnum (pwc) || pwc == L'_')
&& mr == (int) (beg + start - s))
&& mr == beg + start - s)
lword_match = 1;
}
else