awk: revert to upstream behavior for ranges for gawk compatibility
In 2005, FreeBSD changed one-true-awk to honor the locale's collating order. This was billed as a temporary patch. It was also compatible with the then-current behavior of gawk. That temporary patch has lasted 16 years now. However, IEEE Std 1003.1-2008 changed the behaivor of ranges in regular expressions outside of the "C" and "POSIX" locales to be undefined. Starting in 2011, gawk 4.0 stopped using the locale for the range regular expressions and used the traditional behavior only. The maintainer had grown weary of answering why '[A-Z]' would sometimes match lower-case expressions. The details about are explained here: https://www.gnu.org/software/gawk/manual/html_node/Ranges-and-Locales.html To restore compatibility with other implementaitons of awk, revert this patch. FreeBSD is the odd-system out. It also has the nice side effect of eliminating the last of our differences with upstream one-true-awk. Reviewed by: cy, rgrimes MFC After: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D31114
This commit is contained in:
parent
2929813c4f
commit
628bd30ab5
@ -361,20 +361,9 @@ int quoted(const uschar **pp) /* pick up next thing after a \\ */
|
||||
return c;
|
||||
}
|
||||
|
||||
static int collate_range_cmp(int a, int b)
|
||||
{
|
||||
static char s[2][2];
|
||||
|
||||
if ((uschar)a == (uschar)b)
|
||||
return 0;
|
||||
s[0][0] = a;
|
||||
s[1][0] = b;
|
||||
return (strcoll(s[0], s[1]));
|
||||
}
|
||||
|
||||
char *cclenter(const char *argp) /* add a character class */
|
||||
{
|
||||
int i, c, c2, j;
|
||||
int i, c, c2;
|
||||
const uschar *op, *p = (const uschar *) argp;
|
||||
uschar *bp;
|
||||
static uschar *buf = NULL;
|
||||
@ -393,18 +382,15 @@ char *cclenter(const char *argp) /* add a character class */
|
||||
c2 = *p++;
|
||||
if (c2 == '\\')
|
||||
c2 = quoted(&p);
|
||||
if (collate_range_cmp(c, c2) > 0) {
|
||||
if (c > c2) { /* empty; ignore */
|
||||
bp--;
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < NCHARS; j++) {
|
||||
if ((collate_range_cmp(c, j) > 0) ||
|
||||
collate_range_cmp(j, c2) > 0)
|
||||
continue;
|
||||
while (c < c2) {
|
||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
|
||||
FATAL("out of space for character class [%.10s...] 2", p);
|
||||
*bp++ = j;
|
||||
*bp++ = ++c;
|
||||
i++;
|
||||
}
|
||||
continue;
|
||||
|
@ -117,7 +117,6 @@ int main(int argc, char *argv[])
|
||||
char *fn, *vn;
|
||||
|
||||
setlocale(LC_CTYPE, "");
|
||||
setlocale(LC_COLLATE, "");
|
||||
setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
|
||||
cmdname = argv[0];
|
||||
if (argc == 1) {
|
||||
|
@ -665,7 +665,7 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
|
||||
j = x->fval - y->fval;
|
||||
i = j<0? -1: (j>0? 1: 0);
|
||||
} else {
|
||||
i = strcoll(getsval(x), getsval(y));
|
||||
i = strcmp(getsval(x), getsval(y));
|
||||
}
|
||||
tempfree(x);
|
||||
tempfree(y);
|
||||
|
Loading…
Reference in New Issue
Block a user