The regex code does not work with multibyte codesets like UTF-8.
In fact, it doesn't even work with single-byte codesets like ISO-8859-1. The comparison blows up at index 128 (the range is 0 to UCHAR_MAX (255). As a temporary workaround, all comparisons will be done in C locale regardless of the environment setting. The regex library needs to be updated to handle all codesets. Obtained from: Dragonfly
This commit is contained in:
parent
1fbe2addc6
commit
1cd79ad208
@ -38,6 +38,13 @@
|
||||
* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
/*
|
||||
* This implementation currently only works with C locale
|
||||
* It's definitely limited by UCHAR_MAX, but not even ISO-8859 charsets
|
||||
* are working. The forced changing of locale to C for the comparison
|
||||
* is considered a workaround until a better solution is found.
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
@ -768,8 +775,9 @@ p_b_term(struct parse *p, cset *cs)
|
||||
char c;
|
||||
wint_t start, finish;
|
||||
wint_t i;
|
||||
locale_t loc = &__xlocale_C_locale; /* see note under license */
|
||||
struct xlocale_collate *table =
|
||||
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
|
||||
(struct xlocale_collate*)loc->components[XLC_COLLATE];
|
||||
|
||||
/* classify what we've got */
|
||||
switch ((MORE()) ? PEEK() : '\0') {
|
||||
|
Loading…
x
Reference in New Issue
Block a user