bsdgrep: emit more than MAX_LINE_MATCHES per line

We should not set an arbitrary cap on the number of matches on a line,
and in any case MAX_LINE_MATCHES of 32 is much too low.  Instead, if we
match more than MAX_LINE_MATCHES, keep processing and matching from the
last match until all are found.

For the regression test, we produce 4096 matches (larger than we expect
we'll ever set MAX_LINE_MATCHES) and make sure we actually get 4096
lines of output with the -o flag.

We'll also make sure that every distinct line is getting its own line
number to detect line metadata not being printed as appropriate along
the way.

PR:		218811
Submitted by:	Kyle Evans <kevans91@ksu.edu>
Reported by:	jbeich
Reviewed by:	cem
Differential Revision:	https://reviews.freebsd.org/D10577
This commit is contained in:
Ed Maste 2017-05-20 03:51:31 +00:00
parent 85c4e67075
commit fe8c9d5bf1
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=318571
2 changed files with 44 additions and 7 deletions

View File

@ -413,6 +413,26 @@ wflag_emptypat_body()
atf_check -o file:test4 grep -w -e "" test4
}
atf_test_case excessive_matches
excessive_matches_head()
{
atf_set "descr" "Check for proper handling of lines with excessive matches (PR 218811)"
}
excessive_matches_body()
{
grep_type
if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then
atf_expect_fail "this test does not pass with GNU grep in base"
fi
for i in $(jot 4096); do
printf "x" >> test.in
done
atf_check -s exit:0 -x '[ $(grep -o x test.in | wc -l) -eq 4096 ]'
#atf_check -s exit:1 -x 'grep -on x test.in | grep -v "1:x"'
}
atf_test_case fgrep_sanity
fgrep_sanity_head()
{
@ -603,6 +623,7 @@ atf_init_test_cases()
atf_add_test_case egrep_empty_invalid
atf_add_test_case zerolen
atf_add_test_case wflag_emptypat
atf_add_test_case excessive_matches
atf_add_test_case wv_combo_break
atf_add_test_case fgrep_sanity
atf_add_test_case egrep_sanity

View File

@ -63,6 +63,7 @@ static bool first_match = true;
struct parsec {
regmatch_t matches[MAX_LINE_MATCHES]; /* Matches made */
struct str ln; /* Current line */
size_t lnstart; /* Start of line processing */
size_t matchidx; /* Latest used match index */
bool binary; /* Binary file? */
};
@ -247,8 +248,9 @@ procfile(const char *fn)
mcount = mlimit;
for (c = 0; c == 0 || !(lflag || qflag); ) {
/* Reset match count for every line processed */
/* Reset match count and line start for every line processed */
pc.matchidx = 0;
pc.lnstart = 0;
pc.ln.off += pc.ln.len + 1;
if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL ||
pc.ln.len == 0) {
@ -288,6 +290,14 @@ procfile(const char *fn)
/* Print the matching line, but only if not quiet/binary */
if (t == 0 && printmatch) {
printline(&pc, ':');
while (pc.matchidx >= MAX_LINE_MATCHES) {
/* Reset matchidx and try again */
pc.matchidx = 0;
if (procline(&pc) == 0)
printline(&pc, ':');
else
break;
}
first_match = false;
same_file = true;
last_outed = 0;
@ -356,11 +366,11 @@ procline(struct parsec *pc)
{
regmatch_t pmatch, lastmatch, chkmatch;
wchar_t wbegin, wend;
size_t st = 0, nst = 0;
size_t st, nst;
unsigned int i;
int c = 0, r = 0, lastmatches = 0, leflags = eflags;
size_t startm = 0, matchidx;
int retry;
unsigned int retry;
matchidx = pc->matchidx;
@ -376,6 +386,8 @@ procline(struct parsec *pc)
} else if (matchall)
return (0);
st = pc->lnstart;
nst = 0;
/* Initialize to avoid a false positive warning from GCC. */
lastmatch.rm_so = lastmatch.rm_eo = 0;
@ -432,12 +444,12 @@ procline(struct parsec *pc)
* still match a whole word.
*/
if (r == REG_NOMATCH &&
(retry == 0 || pmatch.rm_so + 1 < retry))
(retry == pc->lnstart ||
pmatch.rm_so + 1 < retry))
retry = pmatch.rm_so + 1;
if (r == REG_NOMATCH)
continue;
}
lastmatches++;
lastmatch = pmatch;
@ -466,8 +478,11 @@ procline(struct parsec *pc)
}
/* avoid excessive matching - skip further patterns */
if ((color == NULL && !oflag) || qflag || lflag ||
matchidx >= MAX_LINE_MATCHES)
matchidx >= MAX_LINE_MATCHES) {
pc->lnstart = nst;
lastmatches = 0;
break;
}
}
/*
@ -475,7 +490,7 @@ procline(struct parsec *pc)
* again just in case we still have a chance to match later in
* the string.
*/
if (lastmatches == 0 && retry > 0) {
if (lastmatches == 0 && retry > pc->lnstart) {
st = retry;
continue;
}
@ -497,6 +512,7 @@ procline(struct parsec *pc)
/* Advance st based on previous matches */
st = nst;
pc->lnstart = st;
}
/* Reflect the new matchidx in the context */