bsdgrep: fix matching behaviour

- Set REG_NOTBOL if we've already matched beginning of line and we're
  examining later parts

- For each pattern we examine, apply it to the remaining bits of the
  line rather than (potentially) smaller subsets

- Check for REG_NOSUB after we've looked at all patterns initially
  matching the line

- Keep track of the last match we made to later determine if we're
  simply not matching any longer or if we need to proceed another byte
  because we hit a zero-length match

- Match the earliest and longest bit of each line before moving the
  beginning of what we match to further in the line, past the end of the
  longest match; this generally matches how gnugrep(1) seems to behave,
  and seems like pretty good behavior to me

- Finally, bail out of printing any matches if we were set to print all
  (empty pattern) but -o (output matches) was set

PR:		195763, 180990, 197555, 197531, 181263, 209116
Submitted by:	"Kyle Evans" <kevans91@ksu.edu>
Reviewed by:	cem
MFC after:	1 month
Relnotes:	Yes
Differential Revision:	https://reviews.freebsd.org/D10104
This commit is contained in:
Ed Maste 2017-04-03 23:16:51 +00:00
parent 5613f78480
commit 87c485cfb5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=316477

View File

@ -276,28 +276,29 @@ static int
procline(struct str *l, int nottext)
{
regmatch_t matches[MAX_LINE_MATCHES];
regmatch_t pmatch;
size_t st = 0;
regmatch_t pmatch, lastmatch;
size_t st = 0, nst = 0;
unsigned int i;
int c = 0, m = 0, r = 0;
int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
int startm = 0;
/* Loop to process the whole line */
while (st <= l->len) {
pmatch.rm_so = st;
pmatch.rm_eo = l->len;
lastmatches = 0;
startm = m;
if (st > 0)
leflags |= REG_NOTBOL;
/* Loop to compare with all the patterns */
for (i = 0; i < patterns; i++) {
pmatch.rm_so = st;
pmatch.rm_eo = l->len;
if (fg_pattern[i].pattern)
r = fastexec(&fg_pattern[i],
l->dat, 1, &pmatch, eflags);
l->dat, 1, &pmatch, leflags);
else
r = regexec(&r_pattern[i], l->dat, 1,
&pmatch, eflags);
&pmatch, leflags);
r = (r == 0) ? 0 : REG_NOMATCH;
st = (cflags & REG_NOSUB)
? (size_t)l->len
: (size_t)pmatch.rm_eo;
if (r == REG_NOMATCH)
continue;
/* Check for full match */
@ -324,10 +325,29 @@ procline(struct str *l, int nottext)
r = REG_NOMATCH;
}
if (r == 0) {
lastmatches++;
lastmatch = pmatch;
/* Skip over zero-length matches */
if (pmatch.rm_so == pmatch.rm_eo)
continue;
if (m == 0)
c++;
if (m < MAX_LINE_MATCHES)
matches[m++] = pmatch;
if (m < MAX_LINE_MATCHES) {
/* Replace previous match if the new one is earlier and/or longer */
if (m > startm) {
if (pmatch.rm_so < matches[m-1].rm_so ||
(pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) {
matches[m-1] = pmatch;
nst = pmatch.rm_eo;
}
} else {
/* Advance as normal if not */
matches[m++] = pmatch;
nst = pmatch.rm_eo;
}
}
/* matches - skip further patterns */
if ((color == NULL && !oflag) ||
qflag || lflag)
@ -344,8 +364,19 @@ procline(struct str *l, int nottext)
if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
break;
if (st == (size_t)pmatch.rm_so)
break; /* No matches */
/* If we didn't have any matches or REG_NOSUB set */
if (lastmatches == 0 || (cflags & REG_NOSUB))
nst = l->len;
if (lastmatches == 0)
/* No matches */
break;
else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
/* Zero-length match -- advance one more so we don't get stuck */
nst++;
/* Advance st based on previous matches */
st = nst;
}
@ -444,6 +475,10 @@ printline(struct str *line, int sep, regmatch_t *matches, int m)
size_t a = 0;
int i, n = 0;
/* If matchall, everything matches but don't actually print for -o */
if (oflag && matchall)
return;
if (!hflag) {
if (!nullflag) {
fputs(line->file, stdout);
@ -474,13 +509,13 @@ printline(struct str *line, int sep, regmatch_t *matches, int m)
fwrite(line->dat + a, matches[i].rm_so - a, 1,
stdout);
if (color)
fprintf(stdout, "\33[%sm\33[K", color);
fprintf(stdout, "\33[%sm", color);
fwrite(line->dat + matches[i].rm_so,
matches[i].rm_eo - matches[i].rm_so, 1,
stdout);
if (color)
fprintf(stdout, "\33[m\33[K");
fprintf(stdout, "\33[00m\33[K");
a = matches[i].rm_eo;
if (oflag)
putchar('\n');