grep: fix null pattern and empty pattern file behavior
The null pattern semantics were terrible because I tried to match gnugrep, but I got it wrong. Let's unwind that: - The null pattern should match every line if neither -w nor -x. - The null pattern should match empty lines if -x. - The null pattern should not match any lines if -w. The first two will stop processing (shortcut) even if additional patterns are specified. In any other case, we will continue processing other patterns. If no other patterns are specified beside a null pattern, then we match if neither -w nor -x or set and do not match if either of those are specified. The justification for -w is that it should match on a whole word, but the null pattern deos not have a whole word to match on. Empty pattern files should never match anything, and more importantly, -v should cause everything to be written. PR: 253209 MFC-after: 4 days
This commit is contained in:
parent
856789c123
commit
f823c6dc73
@ -489,11 +489,11 @@ wflag_emptypat_body()
|
||||
|
||||
atf_check -s exit:1 -o empty grep -w -e "" test1
|
||||
|
||||
atf_check -o file:test2 grep -w -e "" test2
|
||||
atf_check -o file:test2 grep -vw -e "" test2
|
||||
|
||||
atf_check -s exit:1 -o empty grep -w -e "" test3
|
||||
|
||||
atf_check -o file:test4 grep -w -e "" test4
|
||||
atf_check -o file:test4 grep -vw -e "" test4
|
||||
}
|
||||
|
||||
atf_test_case xflag_emptypat
|
||||
@ -504,7 +504,6 @@ xflag_emptypat_body()
|
||||
printf "qaz" > test3
|
||||
printf " qaz\n" > test4
|
||||
|
||||
# -x is whole-line, more strict than -w.
|
||||
atf_check -s exit:1 -o empty grep -x -e "" test1
|
||||
|
||||
atf_check -o file:test2 grep -x -e "" test2
|
||||
@ -550,6 +549,22 @@ xflag_emptypat_plus_body()
|
||||
atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines
|
||||
}
|
||||
|
||||
atf_test_case emptyfile
|
||||
emptyfile_descr()
|
||||
{
|
||||
atf_set "descr" "Check for proper handling of empty pattern files (PR 253209)"
|
||||
}
|
||||
emptyfile_body()
|
||||
{
|
||||
:> epatfile
|
||||
echo "blubb" > subj
|
||||
|
||||
# From PR 253209, bsdgrep was short-circuiting completely on an empty
|
||||
# file, but we should have still been processing lines.
|
||||
atf_check -s exit:1 -o empty fgrep -f epatfile subj
|
||||
atf_check -o file:subj fgrep -vf epatfile subj
|
||||
}
|
||||
|
||||
atf_test_case excessive_matches
|
||||
excessive_matches_head()
|
||||
{
|
||||
@ -946,6 +961,7 @@ atf_init_test_cases()
|
||||
atf_add_test_case wflag_emptypat
|
||||
atf_add_test_case xflag_emptypat
|
||||
atf_add_test_case xflag_emptypat_plus
|
||||
atf_add_test_case emptyfile
|
||||
atf_add_test_case excessive_matches
|
||||
atf_add_test_case wv_combo_break
|
||||
atf_add_test_case fgrep_sanity
|
||||
|
@ -69,13 +69,6 @@ const char *errstr[] = {
|
||||
int cflags = REG_NOSUB | REG_NEWLINE;
|
||||
int eflags = REG_STARTEND;
|
||||
|
||||
/* XXX TODO: Get rid of this flag.
|
||||
* matchall is a gross hack that means that an empty pattern was passed to us.
|
||||
* It is a necessary evil at the moment because our regex(3) implementation
|
||||
* does not allow for empty patterns, as supported by POSIX's definition of
|
||||
* grammar for BREs/EREs. When libregex becomes available, it would be wise
|
||||
* to remove this and let regex(3) handle the dirty details of empty patterns.
|
||||
*/
|
||||
bool matchall;
|
||||
|
||||
/* Searching patterns */
|
||||
@ -637,10 +630,6 @@ main(int argc, char *argv[])
|
||||
aargc -= optind;
|
||||
aargv += optind;
|
||||
|
||||
/* Empty pattern file matches nothing */
|
||||
if (!needpattern && (patterns == 0) && !matchall)
|
||||
exit(1);
|
||||
|
||||
/* Fail if we don't have any pattern */
|
||||
if (aargc == 0 && needpattern)
|
||||
usage();
|
||||
|
@ -471,31 +471,28 @@ procline(struct parsec *pc)
|
||||
|
||||
matchidx = pc->matchidx;
|
||||
|
||||
/*
|
||||
* With matchall (empty pattern), we can try to take some shortcuts.
|
||||
* Emtpy patterns trivially match every line except in the -w and -x
|
||||
* cases. For -w (whole-word) cases, we only match if the first
|
||||
* character isn't a word-character. For -x (whole-line) cases, we only
|
||||
* match if the line is empty.
|
||||
*/
|
||||
/* Null pattern shortcuts. */
|
||||
if (matchall) {
|
||||
if (pc->ln.len == 0)
|
||||
if (xflag && pc->ln.len == 0) {
|
||||
/* Matches empty lines (-x). */
|
||||
return (true);
|
||||
if (wflag) {
|
||||
wend = L' ';
|
||||
if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 &&
|
||||
!iswword(wend))
|
||||
return (true);
|
||||
} else if (!xflag)
|
||||
} else if (!wflag && !xflag) {
|
||||
/* Matches every line (no -w or -x). */
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't have any other patterns, we really don't match.
|
||||
* If we do have other patterns, we must fall through and check
|
||||
* them.
|
||||
* If we only have the NULL pattern, whether we match or not
|
||||
* depends on if we got here with -w or -x. If either is set,
|
||||
* the answer is no. If we have other patterns, we'll defer
|
||||
* to them.
|
||||
*/
|
||||
if (patterns == 0)
|
||||
return (false);
|
||||
if (patterns == 0) {
|
||||
return (!(wflag || xflag));
|
||||
}
|
||||
} else if (patterns == 0) {
|
||||
/* Pattern file with no patterns. */
|
||||
return (false);
|
||||
}
|
||||
|
||||
matched = false;
|
||||
|
Loading…
Reference in New Issue
Block a user