Remove broken support for collation in [a-z] type ranges.

Only first 256 wide chars are considered currently, all other are just
dropped from the range. Proper implementation require reverse tables
database lookup, since objects are really big as max UTF-8 (1114112
code points), so just the same scanning as it was for 256 chars will
slow things down.

POSIX does not require collation for [a-z] type ranges and does not
prohibit it for non-POSIX locales. POSIX require collation for ranges
only for POSIX (or C) locale which is equal to ASCII and binary for
other chars, so we already have it.

No other *BSD implements collation for [a-z] type ranges.

Restore ABI compatibility with unused now __collate_range_cmp() which
is visible from outside (will be removed later).
This commit is contained in:
Andrey A. Chernov 2016-07-10 03:49:38 +00:00
parent d6f7a4fb17
commit 5a5807dd4c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=302512
6 changed files with 12 additions and 79 deletions

View File

@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <wctype.h>
#include "collate.h"
#define EOS '\0'
#define RANGE_MATCH 1
@ -238,8 +236,6 @@ rangematch(const char *pattern, wchar_t test, int flags, char **newp,
wchar_t c, c2;
size_t pclen;
const char *origpat;
struct xlocale_collate *table =
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
/*
* A bracket expression starting with an unquoted circumflex
@ -294,11 +290,7 @@ rangematch(const char *pattern, wchar_t test, int flags, char **newp,
if (flags & FNM_CASEFOLD)
c2 = towlower(c2);
if (table->__collate_load_error ?
c <= test && test <= c2 :
__wcollate_range_cmp(table, c, test) <= 0
&& __wcollate_range_cmp(table, test, c2) <= 0
)
if (c <= test && test <= c2)
ok = 1;
} else if (c == test)
ok = 1;

View File

@ -92,8 +92,6 @@ __FBSDID("$FreeBSD$");
#include <unistd.h>
#include <wchar.h>
#include "collate.h"
/*
* glob(3) expansion limits. Stop the expansion if any of these limits
* is reached. This caps the runtime in the face of DoS attacks. See
@ -804,8 +802,6 @@ match(Char *name, Char *pat, Char *patend)
{
int ok, negate_range;
Char c, k;
struct xlocale_collate *table =
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
while (pat < patend) {
c = *pat++;
@ -830,11 +826,7 @@ match(Char *name, Char *pat, Char *patend)
++pat;
while (((c = *pat++) & M_MASK) != M_END)
if ((*pat & M_MASK) == M_RNG) {
if (table->__collate_load_error ?
CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
__wcollate_range_cmp(table, CHAR(c), CHAR(k)) <= 0
&& __wcollate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0
)
if (CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]))
ok = 1;
pat += 2;
} else if (c == k)

View File

@ -128,8 +128,7 @@ int __collate_load_tables(const char *);
int __collate_equiv_value(locale_t, const wchar_t *, size_t);
void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
int, const int **);
int __collate_range_cmp(struct xlocale_collate *, char, char);
int __wcollate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t);
int __collate_range_cmp(int, int);
size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *,
size_t);
size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *,

View File

@ -33,15 +33,13 @@
__FBSDID("$FreeBSD$");
#include <string.h>
#include <wchar.h>
#include <xlocale.h>
#include "collate.h"
/*
* Compare two characters using collate
*/
int __collate_range_cmp(struct xlocale_collate *table, char c1, char c2)
int __collate_range_cmp(int c1, int c2)
{
char s1[2], s2[2];
@ -49,20 +47,5 @@ int __collate_range_cmp(struct xlocale_collate *table, char c1, char c2)
s1[1] = '\0';
s2[0] = c2;
s2[1] = '\0';
struct _xlocale l = {{0}};
l.components[XLC_COLLATE] = (struct xlocale_component *)table;
return (strcoll_l(s1, s2, &l));
}
int __wcollate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2)
{
wchar_t s1[2], s2[2];
s1[0] = c1;
s1[1] = L'\0';
s2[0] = c2;
s2[1] = L'\0';
struct _xlocale l = {{0}};
l.components[XLC_COLLATE] = (struct xlocale_component *)table;
return (wcscoll_l(s1, s2, &l));
return (strcoll(s1, s2));
}

View File

@ -51,12 +51,9 @@ __FBSDID("$FreeBSD$");
#include <limits.h>
#include <stdlib.h>
#include <regex.h>
#include <runetype.h>
#include <wchar.h>
#include <wctype.h>
#include "collate.h"
#include "utils.h"
#include "regex2.h"
@ -767,9 +764,6 @@ p_b_term(struct parse *p, cset *cs)
{
char c;
wint_t start, finish;
wint_t i;
struct xlocale_collate *table =
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
/* classify what we've got */
switch ((MORE()) ? PEEK() : '\0') {
@ -817,18 +811,8 @@ p_b_term(struct parse *p, cset *cs)
if (start == finish)
CHadd(p, cs, start);
else {
if (table->__collate_load_error) {
(void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
CHaddrange(p, cs, start, finish);
} else {
(void)REQUIRE(__wcollate_range_cmp(table, start, finish) <= 0, REG_ERANGE);
for (i = 0; i <= UCHAR_MAX; i++) {
if ( __wcollate_range_cmp(table, start, i) <= 0
&& __wcollate_range_cmp(table, i, finish) <= 0
)
CHadd(p, cs, i);
}
}
(void)REQUIRE(start <= finish, REG_ERANGE);
CHaddrange(p, cs, start, finish);
}
break;
}

View File

@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include <wctype.h>
#include "un-namespace.h"
#include "collate.h"
#include "libc_private.h"
#include "local.h"
#include "xlocale_private.h"
@ -816,9 +815,7 @@ again: c = *fmt++;
static const u_char *
__sccl(char *tab, const u_char *fmt)
{
int c, n, v, i;
struct xlocale_collate *table =
(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
int c, n, v;
/* first `clear' the whole table */
c = *fmt++; /* first char hat => negated scanset */
@ -871,29 +868,15 @@ __sccl(char *tab, const u_char *fmt)
* we just stored in the table (c).
*/
n = *fmt;
if (n == ']'
|| (table->__collate_load_error ? n < c :
__wcollate_range_cmp(table, n, c) < 0
)
) {
if (n == ']' || n < c) {
c = '-';
break; /* resume the for(;;) */
}
fmt++;
/* fill in the range */
if (table->__collate_load_error) {
do {
tab[++c] = v;
} while (c < n);
} else {
for (i = 0; i < 256; i ++)
if (__wcollate_range_cmp(table, c, i) < 0 &&
__wcollate_range_cmp(table, i, n) <= 0
)
tab[i] = v;
}
do { /* fill in the range */
tab[++c] = v;
} while (c < n);
#if 1 /* XXX another disgusting compatibility hack */
c = n;
/*
* Alas, the V7 Unix scanf also treats formats
* such as [a-c-e] as `the letters a through e'.