bsdgrep: fix matching behaviour
- Set REG_NOTBOL if we've already matched beginning of line and we're examining later parts - For each pattern we examine, apply it to the remaining bits of the line rather than (potentially) smaller subsets - Check for REG_NOSUB after we've looked at all patterns initially matching the line - Keep track of the last match we made to later determine if we're simply not matching any longer or if we need to proceed another byte because we hit a zero-length match - Match the earliest and longest bit of each line before moving the beginning of what we match to further in the line, past the end of the longest match; this generally matches how gnugrep(1) seems to behave, and seems like pretty good behavior to me - Finally, bail out of printing any matches if we were set to print all (empty pattern) but -o (output matches) was set PR: 195763, 180990, 197555, 197531, 181263, 209116 Submitted by: "Kyle Evans" <kevans91@ksu.edu> Reviewed by: cem MFC after: 1 month Relnotes: Yes Differential Revision: https://reviews.freebsd.org/D10104
This commit is contained in:
parent
5613f78480
commit
87c485cfb5
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=316477
@ -276,28 +276,29 @@ static int
|
||||
procline(struct str *l, int nottext)
|
||||
{
|
||||
regmatch_t matches[MAX_LINE_MATCHES];
|
||||
regmatch_t pmatch;
|
||||
size_t st = 0;
|
||||
regmatch_t pmatch, lastmatch;
|
||||
size_t st = 0, nst = 0;
|
||||
unsigned int i;
|
||||
int c = 0, m = 0, r = 0;
|
||||
int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
|
||||
int startm = 0;
|
||||
|
||||
/* Loop to process the whole line */
|
||||
while (st <= l->len) {
|
||||
pmatch.rm_so = st;
|
||||
pmatch.rm_eo = l->len;
|
||||
|
||||
lastmatches = 0;
|
||||
startm = m;
|
||||
if (st > 0)
|
||||
leflags |= REG_NOTBOL;
|
||||
/* Loop to compare with all the patterns */
|
||||
for (i = 0; i < patterns; i++) {
|
||||
pmatch.rm_so = st;
|
||||
pmatch.rm_eo = l->len;
|
||||
if (fg_pattern[i].pattern)
|
||||
r = fastexec(&fg_pattern[i],
|
||||
l->dat, 1, &pmatch, eflags);
|
||||
l->dat, 1, &pmatch, leflags);
|
||||
else
|
||||
r = regexec(&r_pattern[i], l->dat, 1,
|
||||
&pmatch, eflags);
|
||||
&pmatch, leflags);
|
||||
r = (r == 0) ? 0 : REG_NOMATCH;
|
||||
st = (cflags & REG_NOSUB)
|
||||
? (size_t)l->len
|
||||
: (size_t)pmatch.rm_eo;
|
||||
if (r == REG_NOMATCH)
|
||||
continue;
|
||||
/* Check for full match */
|
||||
@ -324,10 +325,29 @@ procline(struct str *l, int nottext)
|
||||
r = REG_NOMATCH;
|
||||
}
|
||||
if (r == 0) {
|
||||
lastmatches++;
|
||||
lastmatch = pmatch;
|
||||
/* Skip over zero-length matches */
|
||||
if (pmatch.rm_so == pmatch.rm_eo)
|
||||
continue;
|
||||
if (m == 0)
|
||||
c++;
|
||||
if (m < MAX_LINE_MATCHES)
|
||||
matches[m++] = pmatch;
|
||||
|
||||
if (m < MAX_LINE_MATCHES) {
|
||||
/* Replace previous match if the new one is earlier and/or longer */
|
||||
if (m > startm) {
|
||||
if (pmatch.rm_so < matches[m-1].rm_so ||
|
||||
(pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) {
|
||||
matches[m-1] = pmatch;
|
||||
nst = pmatch.rm_eo;
|
||||
}
|
||||
} else {
|
||||
/* Advance as normal if not */
|
||||
matches[m++] = pmatch;
|
||||
nst = pmatch.rm_eo;
|
||||
}
|
||||
}
|
||||
|
||||
/* matches - skip further patterns */
|
||||
if ((color == NULL && !oflag) ||
|
||||
qflag || lflag)
|
||||
@ -344,8 +364,19 @@ procline(struct str *l, int nottext)
|
||||
if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
|
||||
break;
|
||||
|
||||
if (st == (size_t)pmatch.rm_so)
|
||||
break; /* No matches */
|
||||
/* If we didn't have any matches or REG_NOSUB set */
|
||||
if (lastmatches == 0 || (cflags & REG_NOSUB))
|
||||
nst = l->len;
|
||||
|
||||
if (lastmatches == 0)
|
||||
/* No matches */
|
||||
break;
|
||||
else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
|
||||
/* Zero-length match -- advance one more so we don't get stuck */
|
||||
nst++;
|
||||
|
||||
/* Advance st based on previous matches */
|
||||
st = nst;
|
||||
}
|
||||
|
||||
|
||||
@ -444,6 +475,10 @@ printline(struct str *line, int sep, regmatch_t *matches, int m)
|
||||
size_t a = 0;
|
||||
int i, n = 0;
|
||||
|
||||
/* If matchall, everything matches but don't actually print for -o */
|
||||
if (oflag && matchall)
|
||||
return;
|
||||
|
||||
if (!hflag) {
|
||||
if (!nullflag) {
|
||||
fputs(line->file, stdout);
|
||||
@ -474,13 +509,13 @@ printline(struct str *line, int sep, regmatch_t *matches, int m)
|
||||
fwrite(line->dat + a, matches[i].rm_so - a, 1,
|
||||
stdout);
|
||||
if (color)
|
||||
fprintf(stdout, "\33[%sm\33[K", color);
|
||||
fprintf(stdout, "\33[%sm", color);
|
||||
|
||||
fwrite(line->dat + matches[i].rm_so,
|
||||
matches[i].rm_eo - matches[i].rm_so, 1,
|
||||
stdout);
|
||||
if (color)
|
||||
fprintf(stdout, "\33[m\33[K");
|
||||
fprintf(stdout, "\33[00m\33[K");
|
||||
a = matches[i].rm_eo;
|
||||
if (oflag)
|
||||
putchar('\n');
|
||||
|
Loading…
Reference in New Issue
Block a user