From 5a5807dd4ca34467ac5fb458bc19f12bf62075a5 Mon Sep 17 00:00:00 2001 From: "Andrey A. Chernov" Date: Sun, 10 Jul 2016 03:49:38 +0000 Subject: [PATCH] Remove broken support for collation in [a-z] type ranges. Only first 256 wide chars are considered currently, all other are just dropped from the range. Proper implementation require reverse tables database lookup, since objects are really big as max UTF-8 (1114112 code points), so just the same scanning as it was for 256 chars will slow things down. POSIX does not require collation for [a-z] type ranges and does not prohibit it for non-POSIX locales. POSIX require collation for ranges only for POSIX (or C) locale which is equal to ASCII and binary for other chars, so we already have it. No other *BSD implements collation for [a-z] type ranges. Restore ABI compatibility with unused now __collate_range_cmp() which is visible from outside (will be removed later). --- lib/libc/gen/fnmatch.c | 10 +--------- lib/libc/gen/glob.c | 10 +--------- lib/libc/locale/collate.h | 3 +-- lib/libc/locale/collcmp.c | 21 ++------------------- lib/libc/regex/regcomp.c | 20 ++------------------ lib/libc/stdio/vfscanf.c | 27 +++++---------------------- 6 files changed, 12 insertions(+), 79 deletions(-) diff --git a/lib/libc/gen/fnmatch.c b/lib/libc/gen/fnmatch.c index 8cdcaf1fbd37..408fb94acfeb 100644 --- a/lib/libc/gen/fnmatch.c +++ b/lib/libc/gen/fnmatch.c @@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include "collate.h" - #define EOS '\0' #define RANGE_MATCH 1 @@ -238,8 +236,6 @@ rangematch(const char *pattern, wchar_t test, int flags, char **newp, wchar_t c, c2; size_t pclen; const char *origpat; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; /* * A bracket expression starting with an unquoted circumflex @@ -294,11 +290,7 @@ rangematch(const char *pattern, wchar_t test, int flags, char **newp, if (flags & FNM_CASEFOLD) c2 = towlower(c2); - if (table->__collate_load_error ? - c <= test && test <= c2 : - __wcollate_range_cmp(table, c, test) <= 0 - && __wcollate_range_cmp(table, test, c2) <= 0 - ) + if (c <= test && test <= c2) ok = 1; } else if (c == test) ok = 1; diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c index 62aa8ad75f46..ef426e06f2b3 100644 --- a/lib/libc/gen/glob.c +++ b/lib/libc/gen/glob.c @@ -92,8 +92,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include "collate.h" - /* * glob(3) expansion limits. Stop the expansion if any of these limits * is reached. This caps the runtime in the face of DoS attacks. See @@ -804,8 +802,6 @@ match(Char *name, Char *pat, Char *patend) { int ok, negate_range; Char c, k; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; while (pat < patend) { c = *pat++; @@ -830,11 +826,7 @@ match(Char *name, Char *pat, Char *patend) ++pat; while (((c = *pat++) & M_MASK) != M_END) if ((*pat & M_MASK) == M_RNG) { - if (table->__collate_load_error ? - CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) : - __wcollate_range_cmp(table, CHAR(c), CHAR(k)) <= 0 - && __wcollate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0 - ) + if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1])) ok = 1; pat += 2; } else if (c == k) diff --git a/lib/libc/locale/collate.h b/lib/libc/locale/collate.h index 3253d9cb67d4..3cbc5aac700a 100644 --- a/lib/libc/locale/collate.h +++ b/lib/libc/locale/collate.h @@ -128,8 +128,7 @@ int __collate_load_tables(const char *); int __collate_equiv_value(locale_t, const wchar_t *, size_t); void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *, int, const int **); -int __collate_range_cmp(struct xlocale_collate *, char, char); -int __wcollate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t); +int __collate_range_cmp(int, int); size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *, size_t); size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *, diff --git a/lib/libc/locale/collcmp.c b/lib/libc/locale/collcmp.c index b90c379cb193..1f4409668786 100644 --- a/lib/libc/locale/collcmp.c +++ b/lib/libc/locale/collcmp.c @@ -33,15 +33,13 @@ __FBSDID("$FreeBSD$"); #include -#include -#include #include "collate.h" /* * Compare two characters using collate */ -int __collate_range_cmp(struct xlocale_collate *table, char c1, char c2) +int __collate_range_cmp(int c1, int c2) { char s1[2], s2[2]; @@ -49,20 +47,5 @@ int __collate_range_cmp(struct xlocale_collate *table, char c1, char c2) s1[1] = '\0'; s2[0] = c2; s2[1] = '\0'; - struct _xlocale l = {{0}}; - l.components[XLC_COLLATE] = (struct xlocale_component *)table; - return (strcoll_l(s1, s2, &l)); -} - -int __wcollate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2) -{ - wchar_t s1[2], s2[2]; - - s1[0] = c1; - s1[1] = L'\0'; - s2[0] = c2; - s2[1] = L'\0'; - struct _xlocale l = {{0}}; - l.components[XLC_COLLATE] = (struct xlocale_component *)table; - return (wcscoll_l(s1, s2, &l)); + return (strcoll(s1, s2)); } diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index 53332334fed7..91398eedcb42 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -51,12 +51,9 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include -#include "collate.h" - #include "utils.h" #include "regex2.h" @@ -767,9 +764,6 @@ p_b_term(struct parse *p, cset *cs) { char c; wint_t start, finish; - wint_t i; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; /* classify what we've got */ switch ((MORE()) ? PEEK() : '\0') { @@ -817,18 +811,8 @@ p_b_term(struct parse *p, cset *cs) if (start == finish) CHadd(p, cs, start); else { - if (table->__collate_load_error) { - (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE); - CHaddrange(p, cs, start, finish); - } else { - (void)REQUIRE(__wcollate_range_cmp(table, start, finish) <= 0, REG_ERANGE); - for (i = 0; i <= UCHAR_MAX; i++) { - if ( __wcollate_range_cmp(table, start, i) <= 0 - && __wcollate_range_cmp(table, i, finish) <= 0 - ) - CHadd(p, cs, i); - } - } + (void)REQUIRE(start <= finish, REG_ERANGE); + CHaddrange(p, cs, start, finish); } break; } diff --git a/lib/libc/stdio/vfscanf.c b/lib/libc/stdio/vfscanf.c index e09a5b22eef9..f3cb1fb67bcb 100644 --- a/lib/libc/stdio/vfscanf.c +++ b/lib/libc/stdio/vfscanf.c @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include #include "un-namespace.h" -#include "collate.h" #include "libc_private.h" #include "local.h" #include "xlocale_private.h" @@ -816,9 +815,7 @@ again: c = *fmt++; static const u_char * __sccl(char *tab, const u_char *fmt) { - int c, n, v, i; - struct xlocale_collate *table = - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; + int c, n, v; /* first `clear' the whole table */ c = *fmt++; /* first char hat => negated scanset */ @@ -871,29 +868,15 @@ __sccl(char *tab, const u_char *fmt) * we just stored in the table (c). */ n = *fmt; - if (n == ']' - || (table->__collate_load_error ? n < c : - __wcollate_range_cmp(table, n, c) < 0 - ) - ) { + if (n == ']' || n < c) { c = '-'; break; /* resume the for(;;) */ } fmt++; - /* fill in the range */ - if (table->__collate_load_error) { - do { - tab[++c] = v; - } while (c < n); - } else { - for (i = 0; i < 256; i ++) - if (__wcollate_range_cmp(table, c, i) < 0 && - __wcollate_range_cmp(table, i, n) <= 0 - ) - tab[i] = v; - } + do { /* fill in the range */ + tab[++c] = v; + } while (c < n); #if 1 /* XXX another disgusting compatibility hack */ - c = n; /* * Alas, the V7 Unix scanf also treats formats * such as [a-c-e] as `the letters a through e'.