From f823c6dc730b0dd08b54a53be1d8fd587eee7021 Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Thu, 4 Feb 2021 15:26:45 -0600 Subject: [PATCH] grep: fix null pattern and empty pattern file behavior The null pattern semantics were terrible because I tried to match gnugrep, but I got it wrong. Let's unwind that: - The null pattern should match every line if neither -w nor -x. - The null pattern should match empty lines if -x. - The null pattern should not match any lines if -w. The first two will stop processing (shortcut) even if additional patterns are specified. In any other case, we will continue processing other patterns. If no other patterns are specified beside a null pattern, then we match if neither -w nor -x or set and do not match if either of those are specified. The justification for -w is that it should match on a whole word, but the null pattern deos not have a whole word to match on. Empty pattern files should never match anything, and more importantly, -v should cause everything to be written. PR: 253209 MFC-after: 4 days --- contrib/netbsd-tests/usr.bin/grep/t_grep.sh | 22 +++++++++++-- usr.bin/grep/grep.c | 11 ------- usr.bin/grep/util.c | 35 ++++++++++----------- 3 files changed, 35 insertions(+), 33 deletions(-) diff --git a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh index e094b15c6d67..ef3f0617465e 100755 --- a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh +++ b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh @@ -489,11 +489,11 @@ wflag_emptypat_body() atf_check -s exit:1 -o empty grep -w -e "" test1 - atf_check -o file:test2 grep -w -e "" test2 + atf_check -o file:test2 grep -vw -e "" test2 atf_check -s exit:1 -o empty grep -w -e "" test3 - atf_check -o file:test4 grep -w -e "" test4 + atf_check -o file:test4 grep -vw -e "" test4 } atf_test_case xflag_emptypat @@ -504,7 +504,6 @@ xflag_emptypat_body() printf "qaz" > test3 printf " qaz\n" > test4 - # -x is whole-line, more strict than -w. atf_check -s exit:1 -o empty grep -x -e "" test1 atf_check -o file:test2 grep -x -e "" test2 @@ -550,6 +549,22 @@ xflag_emptypat_plus_body() atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines } +atf_test_case emptyfile +emptyfile_descr() +{ + atf_set "descr" "Check for proper handling of empty pattern files (PR 253209)" +} +emptyfile_body() +{ + :> epatfile + echo "blubb" > subj + + # From PR 253209, bsdgrep was short-circuiting completely on an empty + # file, but we should have still been processing lines. + atf_check -s exit:1 -o empty fgrep -f epatfile subj + atf_check -o file:subj fgrep -vf epatfile subj +} + atf_test_case excessive_matches excessive_matches_head() { @@ -946,6 +961,7 @@ atf_init_test_cases() atf_add_test_case wflag_emptypat atf_add_test_case xflag_emptypat atf_add_test_case xflag_emptypat_plus + atf_add_test_case emptyfile atf_add_test_case excessive_matches atf_add_test_case wv_combo_break atf_add_test_case fgrep_sanity diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c index 307a91353b66..33541e4fe734 100644 --- a/usr.bin/grep/grep.c +++ b/usr.bin/grep/grep.c @@ -69,13 +69,6 @@ const char *errstr[] = { int cflags = REG_NOSUB | REG_NEWLINE; int eflags = REG_STARTEND; -/* XXX TODO: Get rid of this flag. - * matchall is a gross hack that means that an empty pattern was passed to us. - * It is a necessary evil at the moment because our regex(3) implementation - * does not allow for empty patterns, as supported by POSIX's definition of - * grammar for BREs/EREs. When libregex becomes available, it would be wise - * to remove this and let regex(3) handle the dirty details of empty patterns. - */ bool matchall; /* Searching patterns */ @@ -637,10 +630,6 @@ main(int argc, char *argv[]) aargc -= optind; aargv += optind; - /* Empty pattern file matches nothing */ - if (!needpattern && (patterns == 0) && !matchall) - exit(1); - /* Fail if we don't have any pattern */ if (aargc == 0 && needpattern) usage(); diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c index e517e4eaee6d..f22b7abd79ef 100644 --- a/usr.bin/grep/util.c +++ b/usr.bin/grep/util.c @@ -471,31 +471,28 @@ procline(struct parsec *pc) matchidx = pc->matchidx; - /* - * With matchall (empty pattern), we can try to take some shortcuts. - * Emtpy patterns trivially match every line except in the -w and -x - * cases. For -w (whole-word) cases, we only match if the first - * character isn't a word-character. For -x (whole-line) cases, we only - * match if the line is empty. - */ + /* Null pattern shortcuts. */ if (matchall) { - if (pc->ln.len == 0) + if (xflag && pc->ln.len == 0) { + /* Matches empty lines (-x). */ return (true); - if (wflag) { - wend = L' '; - if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 && - !iswword(wend)) - return (true); - } else if (!xflag) + } else if (!wflag && !xflag) { + /* Matches every line (no -w or -x). */ return (true); + } /* - * If we don't have any other patterns, we really don't match. - * If we do have other patterns, we must fall through and check - * them. + * If we only have the NULL pattern, whether we match or not + * depends on if we got here with -w or -x. If either is set, + * the answer is no. If we have other patterns, we'll defer + * to them. */ - if (patterns == 0) - return (false); + if (patterns == 0) { + return (!(wflag || xflag)); + } + } else if (patterns == 0) { + /* Pattern file with no patterns. */ + return (false); } matched = false;