From c4cbf1fbabcb84d9d0a49c2444ed398ed9907df5 Mon Sep 17 00:00:00 2001
From: Craig Leres <leres@FreeBSD.org>
Date: Mon, 20 Jul 2020 23:57:53 +0000
Subject: [PATCH] Fix some regressions with the zgrep(1) wrapper.

 - Handle whitespace with long flags that take arguments:

	echo 'foo bar' > test
	zgrep --regexp='foo bar' test

 - Do not hang reading from stdin with patterns in a file:

	echo foobar > test
	echo foo > pattern
	zgrep -f pattern test
	zgrep --file=pattern test

 - Handle any flags after -e:

	echo foobar > test
	zgrep -e foo --ignore-case < test

These two are still outstanding problems:

 - Does not handle flags that take an argument if there is no
   whitespace:

	zgrep -enfs /etc/rpc

 - When more than one -e pattern used matching should occur for all
   patterns (similar to multiple patterns supplied with -f file).
   Instead only the last pattern is used for matching:

	zgrep -e rex -e nfs /etc/rpc

   (This problem is masked in the unpatched version by the "any
   flags after -e" problem.)

Add tests for the above problems.

Update the mange and add references to gzip(1) and zstd(1) and also
document the remaining known problems.

PR:		247126
Approved by:	markj
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D25613
---
 contrib/netbsd-tests/usr.bin/grep/t_grep.sh | 89 +++++++++++++++++++++
 usr.bin/grep/zgrep.1                        | 24 +++++-
 usr.bin/grep/zgrep.sh                       | 50 +++++++++---
 3 files changed, 150 insertions(+), 13 deletions(-)

diff --git a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
index c5c47e992378..75ee254cc79b 100755
--- a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
+++ b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
@@ -214,6 +214,89 @@ zgrep_body()
 	atf_check -o file:"$(atf_get_srcdir)/d_zgrep.out" zgrep -h line d_input.gz
 }
 
+atf_test_case zgrep_combined_flags
+zgrep_combined_flags_head()
+{
+	atf_set "descr" "Checks for zgrep wrapper problems with combined flags (PR 247126)"
+}
+zgrep_combined_flags_body()
+{
+	atf_expect_fail "known but unsolved zgrep wrapper script regression"
+
+	echo 'foo bar' > test
+
+	atf_check -o inline:"foo bar\n" zgrep -we foo test
+	# Avoid hang on reading from stdin in the failure case
+	atf_check -o inline:"foo bar\n" zgrep -wefoo test < /dev/null
+}
+
+atf_test_case zgrep_eflag
+zgrep_eflag_head()
+{
+	atf_set "descr" "Checks for zgrep wrapper problems with -e PATTERN (PR 247126)"
+}
+zgrep_eflag_body()
+{
+	echo 'foo bar' > test
+
+	# Avoid hang on reading from stdin in the failure case
+	atf_check -o inline:"foo bar\n" zgrep -e 'foo bar' test < /dev/null
+	atf_check -o inline:"foo bar\n" zgrep --regexp='foo bar' test < /dev/null
+}
+
+atf_test_case zgrep_fflag
+zgrep_fflag_head()
+{
+	atf_set "descr" "Checks for zgrep wrapper problems with -f FILE (PR 247126)"
+}
+zgrep_fflag_body()
+{
+	echo foo > pattern
+	echo foobar > test
+
+	# Avoid hang on reading from stdin in the failure case
+	atf_check -o inline:"foobar\n" zgrep -f pattern test </dev/null
+	atf_check -o inline:"foobar\n" zgrep --file=pattern test </dev/null
+}
+
+atf_test_case zgrep_long_eflag
+zgrep_long_eflag_head()
+{
+	atf_set "descr" "Checks for zgrep wrapper problems with --ignore-case reading from stdin (PR 247126)"
+}
+zgrep_long_eflag_body()
+{
+	echo foobar > test
+
+	atf_check -o inline:"foobar\n" zgrep -e foo --ignore-case < test
+}
+
+atf_test_case zgrep_multiple_eflags
+zgrep_multiple_eflags_head()
+{
+	atf_set "descr" "Checks for zgrep wrapper problems with multiple -e flags (PR 247126)"
+}
+zgrep_multiple_eflags_body()
+{
+	atf_expect_fail "known but unsolved zgrep wrapper script regression"
+
+	echo foobar > test
+
+	atf_check -o inline:"foobar\n" zgrep -e foo -e xxx test
+}
+
+atf_test_case zgrep_empty_eflag
+zgrep_empty_eflag_head()
+{
+	atf_set "descr" "Checks for zgrep wrapper problems with empty -e flags pattern (PR 247126)"
+}
+zgrep_empty_eflag_body()
+{
+	echo foobar > test
+
+	atf_check -o inline:"foobar\n" zgrep -e '' test
+}
+
 atf_test_case nonexistent
 nonexistent_head()
 {
@@ -826,6 +909,12 @@ atf_init_test_cases()
 	atf_add_test_case file_exp
 	atf_add_test_case egrep
 	atf_add_test_case zgrep
+	atf_add_test_case zgrep_combined_flags
+	atf_add_test_case zgrep_eflag
+	atf_add_test_case zgrep_empty_eflag
+	atf_add_test_case zgrep_fflag
+	atf_add_test_case zgrep_long_eflag
+	atf_add_test_case zgrep_multiple_eflags
 	atf_add_test_case nonexistent
 	atf_add_test_case context2
 # Begin FreeBSD
diff --git a/usr.bin/grep/zgrep.1 b/usr.bin/grep/zgrep.1
index 332f980feca7..e300bf54b6d8 100644
--- a/usr.bin/grep/zgrep.1
+++ b/usr.bin/grep/zgrep.1
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 21, 2018
+.Dd July 20, 2020
 .Dt ZGREP 1
 .Os
 .Sh NAME
@@ -86,9 +86,29 @@ to read compressed files.
 .Sh SEE ALSO
 .Xr bzip2 1 ,
 .Xr grep 1 ,
-.Xr xz 1
+.Xr gzip 1 ,
+.Xr xz 1 ,
+.Xr zstd 1
 .Sh AUTHORS
 This version of the
 .Nm
 utility was written by
 .An Thomas Klausner Aq Mt wiz@NetBSD.org .
+.Sh BUGS
+.Xr zgrep 1
+does not handle flags that take arguments if there is no whitespace
+between the flag and the argument, for example:
+.Pp
+.Dl "zgrep -enfs /etc/rpc"
+.Pp
+When more than one
+.Fl e
+flag is used matching
+should occur for any of the patterns (similar to multiple patterns
+supplied in a file with the
+.Fl f
+flag).
+.Xr zgrep 1
+only matches the last
+.Fl e
+pattern.
diff --git a/usr.bin/grep/zgrep.sh b/usr.bin/grep/zgrep.sh
index c645b1ca5907..acbcb48770a7 100755
--- a/usr.bin/grep/zgrep.sh
+++ b/usr.bin/grep/zgrep.sh
@@ -29,6 +29,7 @@ grep=grep
 zcat=zstdcat
 
 endofopts=0
+pattern_file=0
 pattern_found=0
 grep_args=""
 hyphen=0
@@ -75,18 +76,39 @@ while [ $# -gt 0 -a ${endofopts} -eq 0 ]
 do
     case $1 in
     # from GNU grep-2.5.1 -- keep in sync!
-	-[ABCDXdefm])
+	--)
+	    shift
+	    endofopts=1
+	    ;;
+	--file=*)
+	    pattern_file=1
+	    grep_args="${grep_args} ${1}"
+	    shift
+	    ;;
+	--regexp=*)
+	    pattern="${1#--regexp=}"
+	    pattern_found=1
+	    shift
+	    ;;
+	--*)
+	    grep_args="${grep_args} $1"
+	    shift
+	    ;;
+	-*[ABCDXdefm])
 	    if [ $# -lt 2 ]
 		then
 		echo "${prg}: missing argument for $1 flag" >&2
 		exit 1
 	    fi
 	    case $1 in
-		-e)
+		-*e)
 		    pattern="$2"
 		    pattern_found=1
 		    shift 2
-		    break
+		    continue
+		    ;;
+		-*f)
+		    pattern_file=1
 		    ;;
 		*)
 		    ;;
@@ -94,10 +116,6 @@ do
 	    grep_args="${grep_args} $1 $2"
 	    shift 2
 	    ;;
-	--)
-	    shift
-	    endofopts=1
-	    ;;
 	-)
 	    hyphen=1
 	    shift
@@ -125,7 +143,7 @@ do
 done
 
 # if no -e option was found, take next argument as grep-pattern
-if [ ${pattern_found} -lt 1 ]
+if [ ${pattern_file} -eq 0 -a ${pattern_found} -eq 0 ]
 then
     if [ $# -ge 1 ]; then
 	pattern="$1"
@@ -136,6 +154,7 @@ then
 	echo "${prg}: missing pattern" >&2
 	exit 1
     fi
+    pattern_found=1
 fi
 
 ret=0
@@ -143,15 +162,24 @@ ret=0
 if [ $# -lt 1 ]
 then
     # ... on stdin
-    ${cattool} ${catargs} - | ${grep} ${grep_args} -- "${pattern}" - || ret=$?
+    if [ ${pattern_file} -eq 0 ]; then
+	${cattool} ${catargs} - | ${grep} ${grep_args} -- "${pattern}" - || ret=$?
+    else
+	${cattool} ${catargs} - | ${grep} ${grep_args} -- - || ret=$?
+    fi
 else
     # ... on all files given on the command line
     if [ ${silent} -lt 1 -a $# -gt 1 ]; then
 	grep_args="-H ${grep_args}"
     fi
     for file; do
-	${cattool} ${catargs} -- "${file}" |
-	    ${grep} --label="${file}" ${grep_args} -- "${pattern}" - || ret=$?
+	if [ ${pattern_file} -eq 0 ]; then
+	    ${cattool} ${catargs} -- "${file}" |
+		${grep} --label="${file}" ${grep_args} -- "${pattern}" - || ret=$?
+	else
+	    ${cattool} ${catargs} -- "${file}" |
+		${grep} --label="${file}" ${grep_args} -- - || ret=$?
+	fi
     done
 fi