Merge FreeBSD chagnes into GNU grep 2.4.

2000-01-04 03:25:40 +00:00 · 2000-01-04 03:25:40 +00:00 · 7e5b33c6cd
commit 7e5b33c6cd
parent e3bfb27984
6 changed files with 558 additions and 347 deletions
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@ -353,15 +353,20 @@ static reg_syntax_t syntax_bits, syntax_bits_set;
 /* Flag for case-folding letters into sets. */
 static int case_fold;

+/* End-of-line byte in data.  */
+static unsigned char eolbyte;
+
 /* Entry point to set syntax options. */
 void
-dfasyntax(bits, fold)
+dfasyntax(bits, fold, eol)
     reg_syntax_t bits;
     int fold;
+     int eol;
 {
  syntax_bits_set = 1;
  syntax_bits = bits;
  case_fold = fold;
+  eolbyte = eol;
 }

 /* Lexical analyzer.  All the dross that deals with the obnoxious
@ -580,11 +585,32 @@ lex()
 	    goto normal_char;
 	  if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
 	    goto normal_char;
-	  minrep = maxrep = 0;
+	  if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+	    goto normal_char;
+
+	  if (syntax_bits & RE_NO_BK_BRACES)
+	    {
+	      /* Scan ahead for a valid interval; if it's not valid,
+		 treat it as a literal '{'.  */
+	      int lo = -1, hi = -1;
+	      char const *p = lexptr;
+	      char const *lim = p + lexleft;
+	      for (;  p != lim && ISDIGIT (*p);  p++)
+		lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
+	      if (p != lim && *p == ',')
+		while (++p != lim && ISDIGIT (*p))
+		  hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
+	      else
+		hi = lo;
+	      if (p == lim || *p != '}'
+		  || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
+		goto normal_char;
+	    }
+
+	  minrep = 0;
 	  /* Cases:
 	     {M} - exact count
 	     {M,} - minimum count, maximum is infinity
-	     {,M} - 0 through M
 	     {M,N} - M through N */
 	  FETCH(c, _("unfinished repeat count"));
 	  if (ISDIGIT(c))
@ -598,16 +624,27 @@ lex()
 		  minrep = 10 * minrep + c - '0';
 		}
 	    }
-	  else if (c != ',')
+	  else
 	    dfaerror(_("malformed repeat count"));
 	  if (c == ',')
-	    for (;;)
-	      {
-		FETCH(c, _("unfinished repeat count"));
-		if (!ISDIGIT(c))
-		  break;
-		maxrep = 10 * maxrep + c - '0';
-	      }
+	    {
+	      FETCH (c, _("unfinished repeat count"));
+	      if (! ISDIGIT (c))
+		maxrep = -1;
+	      else
+		{
+		  maxrep = c - '0';
+		  for (;;)
+		    {
+		      FETCH (c, _("unfinished repeat count"));
+		      if (! ISDIGIT (c))
+			break;
+		      maxrep = 10 * maxrep + c - '0';
+		    }
+		  if (0 <= maxrep && maxrep < minrep)
+		    dfaerror (_("malformed repeat count"));
+		}
+	    }
 	  else
 	    maxrep = minrep;
 	  if (!(syntax_bits & RE_NO_BK_BRACES))
@ -659,7 +696,7 @@ lex()
 	  zeroset(ccl);
 	  notset(ccl);
 	  if (!(syntax_bits & RE_DOT_NEWLINE))
-	    clrbit('\n', ccl);
+	    clrbit(eolbyte, ccl);
 	  if (syntax_bits & RE_DOT_NOT_NULL)
 	    clrbit('\0', ccl);
 	  laststart = 0;
@ -776,7 +813,7 @@ lex()
 	    {
 	      notset(ccl);
 	      if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
-		clrbit('\n', ccl);
+		clrbit(eolbyte, ccl);
 	    }
 	  laststart = 0;
 	  return lasttok = CSET + charclass_index(ccl);
@ -942,7 +979,7 @@ closure()
      {
 	ntokens = nsubtoks(dfa->tindex);
 	tindex = dfa->tindex - ntokens;
-	if (maxrep == 0)
+	if (maxrep < 0)
 	  addtok(PLUS);
 	if (minrep == 0)
 	  addtok(QMARK);
@ -1605,7 +1642,7 @@ dfastate(s, d, trans)
      for (i = 0; i < NOTCHAR; ++i)
 	if (IS_WORD_CONSTITUENT(i))
 	  setbit(i, letters);
-      setbit('\n', newline);
+      setbit(eolbyte, newline);
    }

  zeroset(matches);
@ -1626,7 +1663,7 @@ dfastate(s, d, trans)
 	{
 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
 					 d->states[s].newline, 1))
-	    clrbit('\n', matches);
+	    clrbit(eolbyte, matches);
 	  if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
 					 d->states[s].newline, 0))
 	    for (j = 0; j < CHARCLASS_INTS; ++j)
@ -1737,7 +1774,7 @@ dfastate(s, d, trans)
 	state_letter = state;
      for (i = 0; i < NOTCHAR; ++i)
 	trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
-      trans['\n'] = state_newline;
+      trans[eolbyte] = state_newline;
    }
  else
    for (i = 0; i < NOTCHAR; ++i)
@ -1761,7 +1798,7 @@ dfastate(s, d, trans)

      /* Find out if the new state will want any context information. */
      wants_newline = 0;
-      if (tstbit('\n', labels[i]))
+      if (tstbit(eolbyte, labels[i]))
 	for (j = 0; j < follows.nelem; ++j)
 	  if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
 	    wants_newline = 1;
@ -1793,7 +1830,7 @@ dfastate(s, d, trans)
 	    {
 	      int c = j * INTBITS + k;

-	      if (c == '\n')
+	      if (c == eolbyte)
 		trans[c] = state_newline;
 	      else if (IS_WORD_CONSTITUENT(c))
 		trans[c] = state_letter;
@ -1884,8 +1921,8 @@ build_state(s, d)

  /* Keep the newline transition in a special place so we can use it as
     a sentinel. */
-  d->newlines[s] = trans['\n'];
-  trans['\n'] = -1;
+  d->newlines[s] = trans[eolbyte];
+  trans[eolbyte] = -1;

  if (ACCEPTING(s, *d))
    d->fails[s] = trans;
@ -1933,6 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
  register unsigned char *p;	/* Current input character. */
  register int **trans, *t;	/* Copy of d->trans so it can be optimized
 				   into a register. */
+  register unsigned char eol = eolbyte;	/* Likewise for eolbyte.  */
  static int sbit[NOTCHAR];	/* Table for anding with d->success. */
  static int sbit_init;

@ -1943,7 +1981,7 @@ dfaexec(d, begin, end, newline, count, backref)
      sbit_init = 1;
      for (i = 0; i < NOTCHAR; ++i)
 	sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
-      sbit['\n'] = 4;
+      sbit[eol] = 4;
    }

  if (! d->tralloc)
@ -1952,7 +1990,7 @@ dfaexec(d, begin, end, newline, count, backref)
  s = s1 = 0;
  p = (unsigned char *) begin;
  trans = d->trans;
-  *end = '\n';
+  *end = eol;

  for (;;)
    {
@ -1980,7 +2018,7 @@ dfaexec(d, begin, end, newline, count, backref)
 	}

      /* If the previous character was a newline, count it. */
-      if (count && (char *) p <= end && p[-1] == '\n')
+      if (count && (char *) p <= end && p[-1] == eol)
 	++*count;

      /* Check if we've run off the end of the buffer. */
@ -1994,7 +2032,7 @@ dfaexec(d, begin, end, newline, count, backref)
 	  continue;
 	}

-      if (p[-1] == '\n' && newline)
+      if (p[-1] == eol && newline)
 	{
 	  s = d->newlines[s1];
 	  continue;
--- a/gnu/usr.bin/grep/dfa.h
+++ b/gnu/usr.bin/grep/dfa.h
@ -322,9 +322,10 @@ struct dfa

 /* Entry points. */

-/* dfasyntax() takes two arguments; the first sets the syntax bits described
-   earlier in this file, and the second sets the case-folding flag. */
-extern void dfasyntax PARAMS ((reg_syntax_t, int));
+/* dfasyntax() takes three arguments; the first sets the syntax bits described
+   earlier in this file, the second sets the case-folding flag, and the
+   third specifies the line terminator. */
+extern void dfasyntax PARAMS ((reg_syntax_t, int, int));

 /* Compile the given string of the given length into the given struct dfa.
   Final argument is a flag specifying whether to build a searching or an
--- a/gnu/usr.bin/grep/grep.1
+++ b/gnu/usr.bin/grep/grep.1
@ -1,26 +1,68 @@
 .\" grep man page
 .\" $FreeBSD$
+.if !\n(.g \{\
+.	if !\w|\*(lq| \{\
+.		ds lq ``
+.		if \w'\(lq' .ds lq "\(lq
+.	\}
+.	if !\w|\*(rq| \{\
+.		ds rq ''
+.		if \w'\(rq' .ds rq "\(rq
+.	\}
+.\}
 .de Id
 .ds Dt \\$4
 ..
-.Id $Id: grep.1,v 1.1 1998/11/22 06:45:20 alainm Exp $
+.Id $Id: grep.1,v 1.7 1999/10/12 20:41:01 alainm Exp $
 .TH GREP 1 \*(Dt "GNU Project"
 .SH NAME
 grep, egrep, fgrep, zgrep \- print lines matching a pattern
 .SH SYNOPSIS
 .B grep
-[-[AB] NUM] [-CEFGVZabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE]
-[-d ACTION] [--directories=ACTION]
-[--extended-regexp] [--fixed-strings] [--basic-regexp]
-[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp]
-[--line-regexp] [--line-regexp] [--no-messages] [--revert-match]
-[--version] [--help] [--byte-offset] [--line-number]
-[--with-filename] [--no-filename] [--quiet] [--silent] [--text]
-[--files-without-match] [--files-with-matcces] [--count]
-[--before-context=NUM] [--after-context=NUM] [--context]
-[--binary] [--unix-byte-offsets] [--recursive]
-[--decompress]
-.I files...
+.RB [ \- [ ABC ]
+.IR NUM ]
+.RB [ \-EFGHLUVZabchilnqrsuvwxyuz ]
+.RB [ \-e
+.I PATTERN
+|
+.B \-f
+.IR FILE ]
+.RB [ \-d
+.IR ACTION ]
+.RB [ \-\^\-directories=\fIACTION\fP ]
+.RB [ \-\^\-extended-regexp ]
+.RB [ \-\^\-fixed-strings ]
+.RB [ \-\^\-basic-regexp ]
+.RB [ \-\^\-regexp=\fIPATTERN\fP ]
+.RB [ \-\^\-file=\fIFILE\fP ]
+.RB [ \-\^\-ignore-case ]
+.RB [ \-\^\-word-regexp ]
+.RB [ \-\^\-line-regexp ]
+.RB [ \-\^\-line-regexp ]
+.RB [ \-\^\-no-messages ]
+.RB [ \-\^\-invert-match ]
+.RB [ \-\^\-version ]
+.RB [ \-\^\-help ]
+.RB [ \-\^\-byte-offset ]
+.RB [ \-\^\-line-number ]
+.RB [ \-\^\-with-filename ]
+.RB [ \-\^\-no-filename ]
+.RB [ \-\^\-quiet ]
+.RB [ \-\^\-silent ]
+.RB [ \-\^\-text ]
+.RB [ \-\^\-files-without-match ]
+.RB [ \-\^\-files-with-matches ]
+.RB [ \-\^\-count ]
+.RB [ \-\^\-before-context=\fINUM\fP ]
+.RB [ \-\^\-after-context=\fINUM\fP ]
+.RB [ \-\^\-context [ =\fINUM\fP ]]
+.RB [ \-\^\-binary ]
+.RB [ \-\^\-unix-byte-offsets ]
+.RB [ \-\^\-mmap ]
+.RB [ \-\^\-null ]
+.RB [ \-\^\-recursive ]
+.RB [ \-\^\-decompress ]
+.RI [ file .\|.\|.]
 .SH DESCRIPTION
 .PP
 .B grep
@ -41,83 +83,83 @@ There are three major variants of
 controlled by the following options.
 .PD 0
 .TP
-.B \-G, --basic-regexp
+.BR \-G ", " \-\^\-basic-regexp
 Interpret
 .I pattern
 as a basic regular expression (see below).  This is the default.
 .TP
-.B \-E, --extended-regexp
+.BR \-E ", " \-\^\-extended-regexp
 Interpret
 .I pattern
 as an extended regular expression (see below).
 .TP
-.B \-F, --fixed-strings
+.BR \-F ", " \-\^\-fixed-strings
 Interpret
 .I pattern
 as a list of fixed strings, separated by newlines,
 any of which is to be matched.
-.LP
+.PP
 In addition, two variant programs
 .B egrep
 and
 .B fgrep
 are available.
 .B egrep
-is similar (but not identical) to
-.BR "grep\ \-E" ,
-and is compatible with the historical Unix
-.BR egrep .
+is the same as
+.BR "grep\ \-E" .
 .B fgrep
 is the same as
 .BR "grep\ \-F" .
 .B zgrep
 is the same as
-.BR "grep\ \-Z" .
+.BR "grep\ \-z" .
 .PD
-.LP
+.PP
 All variants of
 .B grep
 understand the following options:
 .PD 0
 .TP
-.BI \-A " NUM" ", --after-context=" NUM
+.BI \-A " NUM" "\fR,\fP \-\^\-after-context=" NUM
 Print
 .I NUM
 lines of trailing context after matching lines.
 .TP
-.BI \-B " NUM" ", --before-context=" NUM
+.BI \-B " NUM" "\fR,\fP \-\^\-before-context=" NUM
 Print
 .I NUM
 lines of leading context before matching lines.
 .TP
-.BI \-C ,\  --context"[=NUM]"
-Print 
+.BI \-C " \fR[\fPNUM\fR]\fP" "\fR,\fP \-\^\-context\fR[\fP=" NUM\fR]\fP
+Print
 .I NUM
 lines (default 2) of output context.
 .TP
-.BI \- NUM \ 
-Same as --context=NUM lines of leading and trailing context.  However,
+.BI \- NUM
+Same as
+.BI \-\^\-context= NUM
+lines of leading and trailing context.  However,
 .B grep
 will never print any given line more than once.
 .TP
-.B \-V, --version
+.BR \-V ", " \-\^\-version
 Print the version number of
 .B grep
 to standard error.  This version number should
 be included in all bug reports (see below).
 .TP
-.B \-b, --byte-offset
+.BR \-b ", " \-\^\-byte-offset
 Print the byte offset within the input file before
 each line of output.
 .TP
-.B \-c, --count
+.BR \-c ", " \-\^\-count
 Suppress normal output; instead print a count of
 matching lines for each input file.
 With the
-.B \-v, --revert-match
+.BR \-v ", " \-\^\-invert-match
 option (see below), count non-matching lines.
 .TP
-.BI \-d " ACTION" ", --directories=" ACTION
+.BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION
 If an input file is a directory, use
 .I ACTION
 to process it.  By default,
@ -140,75 +182,78 @@ this is equivalent to the
 .B \-r
 option.
 .TP
-.BI \-e " PATTERN" ", --regexp=" PATTERN
+.BI \-e " PATTERN" "\fR,\fP \-\^\-regexp=" PATTERN
 Use
 .I PATTERN
 as the pattern; useful to protect patterns beginning with
 .BR \- .
 .TP
-.BI \-f " FILE" ", --file=" FILE
+.BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE
 Obtain patterns from
 .IR FILE ,
 one per line.
 The empty file contains zero patterns, and therfore matches nothing.
 .TP
-.B \-h, --no-filename
+.BR \-H ", " \-\^\-with-filename
+Print the filename for each match.
+.TP
+.BR \-h ", " \-\^\-no-filename
 Suppress the prefixing of filenames on output
 when multiple files are searched.
 .TP
-.B \-i, --ignore-case
+.BR \-i ", " \-\^\-ignore-case
 Ignore case distinctions in both the
 .I pattern
 and the input files.
 .TP
-.B \-L, --files-without-match
+.BR \-L ", " \-\^\-files-without-match
 Suppress normal output; instead print the name
 of each input file from which no output would
-normally have been printed. The scanning will stop
+normally have been printed.  The scanning will stop
 on the first match.
 .TP
-.B \-l, --files-with-matches
+.BR \-l ", " \-\^\-files-with-matches
 Suppress normal output; instead print
 the name of each input file from which output
-would normally have been printed. The scanning will
+would normally have been printed.  The scanning will
 stop on the first match.
 .TP
-.B \-n, --line-number
+.BR \-n ", " \-\^\-line-number
 Prefix each line of output with the line number
 within its input file.
 .TP
-.B \-q, --quiet, --silent
-Quiet; suppress normal output. The scanning will stop
+.BR \-q ", " \-\^\-quiet ", " \-\^\-silent
+Quiet; suppress normal output.  The scanning will stop
 on the first match.
 Also see the
 .B \-s
 or
-.B --no-messages
+.B \-\^\-no-messages
 option below.
 .TP
-.B \-r, --recursive
+.BR \-r ", " \-\^\-recursive
 Read all files under each directory, recursively;
 this is equivalent to the
 .B "\-d recurse"
 option.
 .TP
-.B \-s, --no-messages
+.BR \-s ", " \-\^\-no-messages
 Suppress error messages about nonexistent or unreadable files.
-Portability note: unlike GNU
+Portability note: unlike \s-1GNU\s0
 .BR grep ,
-BSD
+traditional
 .B grep
-does not comply with POSIX.2, because BSD
+did not conform to \s-1POSIX.2\s0, because traditional
 .B grep
-lacks a
+lacked a
 .B \-q
 option and its
 .B \-s
-option behaves like GNU
+option behaved like \s-1GNU\s0
 .BR grep 's
 .B \-q
 option.
-Shell scripts intended to be portable to BSD
+Shell scripts intended to be portable to traditional
 .B grep
 should avoid both
 .B \-q
@ -216,7 +261,7 @@ and
 .B \-s
 and should redirect output to /dev/null instead.
 .TP
-.B \-a, --text
+.BR \-a ", " \-\^\-text
 Do not suppress output lines that contain binary data.
 Normally, if the first few bytes of a file indicate that
 the file contains binary data,
@ -227,10 +272,10 @@ This option causes
 to act as if the file is a text file,
 even if it would otherwise be treated as binary.
 .TP
-.B \-v, --revert-match
+.BR \-v ", " \-\^\-invert-match
 Invert the sense of matching, to select non-matching lines.
 .TP
-.B \-w, --word-regexp
+.BR \-w ", " \-\^\-word-regexp
 Select only those lines containing matches that form whole words.
 The test is that the matching substring must either be at the
 beginning of the line, or preceded by a non-word constituent
@ -238,14 +283,14 @@ character.  Similarly, it must be either at the end of the line
 or followed by a non-word constituent character.  Word-constituent
 characters are letters, digits, and the underscore.
 .TP
-.B \-x, --line-regexp
+.BR \-x ", " \-\^\-line-regexp
 Select only those matches that exactly match the whole line.
 .TP
 .B \-y
 Obsolete synonym for
 .BR \-i .
 .TP
-.B \-U, --binary
+.BR \-U ", " \-\^\-binary
 Treat the file(s) as binary.  By default, under MS-DOS and MS-Windows,
 .BR grep
 guesses the file type by looking at the contents of the first 32KB
@ -261,10 +306,11 @@ work correctly).  Specifying
 overrules this guesswork, causing all files to be read and passed to the
 matching mechanism verbatim; if the file is a text file with CR/LF
 pairs at the end of each line, this will cause some regular
-expressions to fail.  This option is only supported on MS-DOS and
+expressions to fail.
+This option has no effect on platforms other than MS-DOS and
 MS-Windows.
 .TP
-.B \-u, --unix-byte-offsets
+.BR \-u ", " \-\^\-unix-byte-offsets
 Report Unix-style byte offsets.  This switch causes
 .B grep
 to report byte offsets as if the file were Unix-style text file, i.e. with
@ -272,13 +318,46 @@ CR characters stripped off.  This will produce results identical to running
 .B grep
 on a Unix machine.  This option has no effect unless
 .B \-b
-option is also used; it is only supported on MS-DOS and MS-Windows.
-.PD
+option is also used;
+it has no effect on platforms other than MS-DOS and MS-Windows.
+.TP
+.B \-\^\-mmap
+If possible, use the
+.BR mmap (2)
+system call to read input, instead of
+the default
+.BR read (2)
+system call.  In some situations,
+.B -\^-mmap
+yields better performance.  However,
+.B -\^-mmap
+can cause undefined behavior (including core dumps)
+if an input file shrinks while
+.B grep
+is operating, or if an I/O error occurs.
+.TP
+.BR \-Z ", " \-\^\-null
+Output a zero byte (the \s-1ASCII\s0
+.B NUL
+character) instead of the character that normally follows a file name.
+For example,
+.B "grep \-lZ"
+outputs a zero byte after each file name instead of the usual newline.
+This option makes the output unambiguous, even in the presence of file
+names containing unusual characters like newlines.  This option can be
+used with commands like
+.BR "find \-print0" ,
+.BR "perl \-0" ,
+.BR "sort \-z" ,
+and
+.B "xargs \-0"
+to process arbitrary file names,
+even those that contain newline characters.
 .LP
 Following option is only available if compiled with zlib(3) library:
 .PD 0
 .TP
-.B \-Z, --decompress
+.B \-z, --decompress
 Decompress the input data before searching.
 .PD
 .SH "REGULAR EXPRESSIONS"
@ -289,8 +368,8 @@ expressions, by using various operators to combine smaller expressions.
 .PP
 .B grep
 understands two different versions of regular expression syntax:
-``basic'' and ``extended.''  In
-.RB "GNU\ " grep ,
+\*(lqbasic\*(rq and \*(lqextended.\*(rq  In
+.RB "\s-1GNU\s0\ " grep ,
 there is no difference in available functionality using either syntax.
 In other implementations, basic regular expressions are less powerful.
 The following description applies to extended regular expressions;
@ -402,11 +481,6 @@ The preceding item is matched
 .I n
 or more times.
 .TP
-.BI {, m }
-The preceding item is optional and is matched at most
-.I m
-times.
-.TP
 .BI { n , m }
 The preceding item is matched at least
 .I n
@ -456,12 +530,35 @@ versions
 and
 .BR \e) .
 .PP
-In
+Traditional
 .B egrep
-the metacharacter
+did not support the
 .B {
-loses its special meaning; instead use
-.BR \e{ .
+metacharacter, and some
+.B egrep
+implementations support
+.B \e{
+instead, so portable scripts should avoid
+.B {
+in
+.B egrep
+patterns and should use
+.B [{]
+to match a literal
+.BR { .
+.PP
+\s-1GNU\s0
+.B egrep
+attempts to support traditional usage by assuming that
+.B {
+is not special if it would be the start of an invalid interval
+specification.  For example, the shell command
+.B "egrep '{1'"
+searches for the two-character string
+.B {1
+instead of reporting a syntax error in the regular expression.
+\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts
+should avoid it.
 .SH ENVIRONMENT
 The environment variable
 .B GREP_OPTIONS
@ -483,7 +580,8 @@ other system errors.
 .PP
 Email bug reports to
 .BR bug-gnu-utils@gnu.org .
-Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
+Be sure to include the word \*(lqgrep\*(rq somewhere in the
+\*(lqSubject:\*(rq field.
 .PP
 Large repetition counts in the
 .BI { m , n }
@ -495,3 +593,5 @@ and space, and may cause
 to run out of memory.
 .PP
 Backreferences are very slow, and may require exponential time.
+.\" Work around problems with some troff -man implementations.
+.br
--- a/gnu/usr.bin/grep/grep.c
+++ b/gnu/usr.bin/grep/grep.c
@ -58,6 +58,17 @@ static int show_help;
 /* If non-zero, print the version on standard output and exit.  */
 static int show_version;

+/* If nonzero, use mmap if possible.  */
+static int mmap_option;
+
+/* Short options.  */
+static char const short_options[] =
+#if HAVE_LIBZ > 0
+"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
+#else
+"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
+#endif
+
 /* Long options equivalences. */
 static struct option long_options[] =
 {
@ -78,18 +89,23 @@ static struct option long_options[] =
  {"ignore-case", no_argument, NULL, 'i'},
  {"line-number", no_argument, NULL, 'n'},
  {"line-regexp", no_argument, NULL, 'x'},
+  {"mmap", no_argument, &mmap_option, 1},
  {"no-filename", no_argument, NULL, 'h'},
  {"no-messages", no_argument, NULL, 's'},
+#if HAVE_LIBZ > 0
+  {"null", no_argument, NULL, /*'Z'*/ 1},
+#else
+  {"null", no_argument, NULL, 'Z'},
+#endif
+  {"null-data", no_argument, NULL, 'z'},
  {"quiet", no_argument, NULL, 'q'},
  {"recursive", no_argument, NULL, 'r'},
  {"regexp", required_argument, NULL, 'e'},
-  {"revert-match", no_argument, NULL, 'v'},
+  {"invert-match", no_argument, NULL, 'v'},
  {"silent", no_argument, NULL, 'q'},
  {"text", no_argument, NULL, 'a'},
-#if O_BINARY
  {"binary", no_argument, NULL, 'U'},
  {"unix-byte-offsets", no_argument, NULL, 'u'},
-#endif
  {"version", no_argument, NULL, 'V'},
  {"with-filename", no_argument, NULL, 'H'},
  {"word-regexp", no_argument, NULL, 'w'},
@ -100,10 +116,13 @@ static struct option long_options[] =
 };

 /* Define flags declared in grep.h. */
+/* I do not know why we need this decl, while if you build GNU grep 2.4 by
+   hand you don't... */
 char const *matcher;
 int match_icase;
 int match_words;
 int match_lines;
+unsigned char eolbyte;

 /* For error messages. */
 static char *prog;
@ -121,7 +140,10 @@ static enum
 static int  ck_atoi PARAMS ((char const *, int *));
 static void usage PARAMS ((int)) __attribute__((noreturn));
 static void error PARAMS ((const char *, int));
-static int  setmatcher PARAMS ((char const *));
+static void setmatcher PARAMS ((char const *));
+static int  install_matcher PARAMS ((char const *));
+static int  prepend_args PARAMS ((char const *, char *, char **));
+static void prepend_default_options PARAMS ((char const *, int *, char ***));
 static char *page_alloc PARAMS ((size_t, char **));
 static int  reset PARAMS ((int, char const *, struct stats *));
 static int  fillbuf PARAMS ((size_t, struct stats *));
@ -221,14 +243,15 @@ static char *ubuffer;		/* Unaligned base of buffer. */
 static char *buffer;		/* Base of buffer. */
 static size_t bufsalloc;	/* Allocated size of buffer save region. */
 static size_t bufalloc;		/* Total buffer size. */
+#define PREFERRED_SAVE_FACTOR 5	/* Preferred value of bufalloc / bufsalloc.  */
 static int bufdesc;		/* File descriptor. */
 static char *bufbeg;		/* Beginning of user-visible stuff. */
 static char *buflim;		/* Limit of user-visible stuff. */
 static size_t pagesize;		/* alignment of memory pages */
+static off_t bufoffset;		/* Read offset; defined on regular files.  */

 #if defined(HAVE_MMAP)
-static int bufmapped;		/* True for ordinary files. */
-static off_t bufoffset;		/* What read() normally remembers. */
+static int bufmapped;		/* True if buffer is memory-mapped.  */
 static off_t initial_bufoffset;	/* Initial value of bufoffset. */
 #endif

@ -245,32 +268,26 @@ static int Zflag;		/* uncompress before searching. */
   ? (val) \
   : (val) + ((alignment) - (size_t) (val) % (alignment)))

-/* Return the address of a new page-aligned buffer of size SIZE.  Set
-   *UP to the newly allocated (but possibly unaligned) buffer used to
-   *build the aligned buffer.  To free the buffer, free (*UP).  */
+/* Return the address of a page-aligned buffer of size SIZE,
+   reallocating it from *UP.  Set *UP to the newly allocated (but
+   possibly unaligned) buffer used to build the aligned buffer.  To
+   free the buffer, free (*UP).  */
 static char *
 page_alloc (size, up)
     size_t size;
     char **up;
 {
-  /* HAVE_WORKING_VALLOC means that valloc is properly declared, and
-     you can free the result of valloc.  This symbol is not (yet)
-     autoconfigured.  It can be useful to define HAVE_WORKING_VALLOC
-     while debugging, since some debugging memory allocators might
-     catch more bugs if this symbol is enabled.  */
-#if HAVE_WORKING_VALLOC
-  *up = valloc (size);
-  return *up;
-#else
  size_t asize = size + pagesize - 1;
  if (size <= asize)
    {
-      *up = malloc (asize);
-      if (*up)
-	return ALIGN_TO (*up, pagesize);
+      char *p = *up ? realloc (*up, asize) : malloc (asize);
+      if (p)
+	{
+	  *up = p;
+	  return ALIGN_TO (p, pagesize);
+	}
    }
  return NULL;
-#endif
 }

 /* Reset the buffer for a new file, returning zero if we should skip it.
@ -281,7 +298,9 @@ reset (fd, file, stats)
     char const *file;
     struct stats *stats;
 {
-  if (pagesize == 0)
+  if (pagesize)
+    bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
+  else
    {
      size_t ubufsalloc;
      pagesize = getpagesize ();
@ -293,162 +312,212 @@ reset (fd, file, stats)
      ubufsalloc = BUFSALLOC;
 #endif
      bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
-      bufalloc = 5 * bufsalloc;
+      bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
      /* The 1 byte of overflow is a kludge for dfaexec(), which
 	 inserts a sentinel newline at the end of the buffer
 	 being searched.  There's gotta be a better way... */
      if (bufsalloc < ubufsalloc
-	  || bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
+	  || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
+	  || bufalloc + 1 < bufalloc
 	  || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
 	fatal (_("memory exhausted"), 0);
-      bufbeg = buffer;
-      buflim = buffer;
    }
 #if HAVE_LIBZ > 0
-  if (Zflag) {
+  if (Zflag)
+    {
    gzbufdesc = gzdopen(fd, "r");
    if (gzbufdesc == NULL)
      fatal(_("memory exhausted"), 0);
-  }
+    }
 #endif
+
+  buflim = buffer;
  bufdesc = fd;

-  if (
-#if defined(HAVE_MMAP)
-      1
-#else
-      directories != READ_DIRECTORIES
-#endif
-      )
-    if (fstat (fd, &stats->stat) != 0)
-      {
-	error ("fstat", errno);
-	return 0;
-      }
+  if (fstat (fd, &stats->stat) != 0)
+    {
+      error ("fstat", errno);
+      return 0;
+    }
  if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
    return 0;
-#if defined(HAVE_MMAP)
  if (
 #if HAVE_LIBZ > 0
      Zflag ||
 #endif
-      !S_ISREG (stats->stat.st_mode))
-    bufmapped = 0;
+      S_ISREG (stats->stat.st_mode))
+    {
+      if (file)
+	bufoffset = 0;
+      else
+	{
+	  bufoffset = lseek (fd, 0, SEEK_CUR);
+	  if (bufoffset < 0)
+	    {
+	      error ("lseek", errno);
+	      return 0;
+	    }
+	}
+#ifdef HAVE_MMAP
+      initial_bufoffset = bufoffset;
+      bufmapped = mmap_option && bufoffset % pagesize == 0;
+#endif
+    }
  else
    {
-      bufmapped = 1;
-      bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1);
-    }
+#ifdef HAVE_MMAP
+      bufmapped = 0;
 #endif
+    }
  return 1;
 }

 /* Read new stuff into the buffer, saving the specified
   amount of old stuff.  When we're done, 'bufbeg' points
   to the beginning of the buffer contents, and 'buflim'
-   points just after the end.  Return count of new stuff. */
+   points just after the end.  Return zero if there's an error.  */
 static int
 fillbuf (save, stats)
     size_t save;
     struct stats *stats;
 {
-  int cc;
-#if defined(HAVE_MMAP)
-  caddr_t maddr;
-#endif
+  size_t fillsize = 0;
+  int cc = 1;
+  size_t readsize;

-  if (save > bufsalloc)
+  /* Offset from start of unaligned buffer to start of old stuff
+     that we want to save.  */
+  size_t saved_offset = buflim - ubuffer - save;
+
+  if (bufsalloc < save)
    {
-      char *nubuffer;
-      char *nbuffer;
+      size_t aligned_save = ALIGN_TO (save, pagesize);
+      size_t maxalloc = (size_t) -1;
+      size_t newalloc;

-      while (save > bufsalloc)
-	bufsalloc *= 2;
-      bufalloc = 5 * bufsalloc;
-      if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
-	  || ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer)))
+      if (S_ISREG (stats->stat.st_mode))
+	{
+	  /* Calculate an upper bound on how much memory we should allocate.
+	     We can't use ALIGN_TO here, since off_t might be longer than
+	     size_t.  Watch out for arithmetic overflow.  */
+	  off_t to_be_read = stats->stat.st_size - bufoffset;
+	  size_t slop = to_be_read % pagesize;
+	  off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
+	  off_t maxalloc_off = aligned_save + aligned_to_be_read;
+	  if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
+	    maxalloc = maxalloc_off;
+	}
+
+      /* Grow bufsalloc until it is at least as great as `save'; but
+	 if there is an overflow, just grow it to the next page boundary.  */
+      while (bufsalloc < save)
+	if (bufsalloc < bufsalloc * 2)
+	  bufsalloc *= 2;
+	else
+	  {
+	    bufsalloc = aligned_save;
+	    break;
+	  }
+
+      /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
+	 bufsalloc....  */
+      newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
+      if (maxalloc < newalloc)
+	{
+	  /* ... except don't grow it more than a pagesize past the
+	     file size, as that might cause unnecessary memory
+	     exhaustion if the file is large.  */
+	  newalloc = maxalloc;
+	  bufsalloc = aligned_save;
+	}
+
+      /* Check that the above calculations made progress, which might
+         not occur if there is arithmetic overflow.  If there's no
+	 progress, or if the new buffer size is larger than the old
+	 and buffer reallocation fails, report memory exhaustion.  */
+      if (bufsalloc < save || newalloc < save
+	  || (newalloc == save && newalloc != maxalloc)
+	  || (bufalloc < newalloc
+	      && ! (buffer
+		    = page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
 	fatal (_("memory exhausted"), 0);
+    }

-      bufbeg = nbuffer + bufsalloc - save;
-      memcpy (bufbeg, buflim - save, save);
-      free (ubuffer);
-      ubuffer = nubuffer;
-      buffer = nbuffer;
-    }
-  else
-    {
-      bufbeg = buffer + bufsalloc - save;
-      memcpy (bufbeg, buflim - save, save);
-    }
+  bufbeg = buffer + bufsalloc - save;
+  memmove (bufbeg, ubuffer + saved_offset, save);
+  readsize = bufalloc - bufsalloc;

 #if defined(HAVE_MMAP)
-  if (bufmapped && bufoffset % pagesize == 0
-      && stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
+  if (bufmapped)
    {
-      maddr = buffer + bufsalloc;
-      maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
-		   MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
-      if (maddr == (caddr_t) -1)
+      size_t mmapsize = readsize;
+
+      /* Don't mmap past the end of the file; some hosts don't allow this.
+	 Use `read' on the last page.  */
+      if (stats->stat.st_size - bufoffset < mmapsize)
 	{
-          /* This used to issue a warning, but on some hosts
-             (e.g. Solaris 2.5) mmap can fail merely because some
-             other process has an advisory read lock on the file.
-             There's no point alarming the user about this misfeature.  */
-#if 0
-	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
-		  strerror (errno));
-#endif
-	  goto tryread;
+	  mmapsize = stats->stat.st_size - bufoffset;
+	  mmapsize -= mmapsize % pagesize;
 	}
-#if 0
-      /* You might thing this (or MADV_WILLNEED) would help,
-	 but it doesn't, at least not on a Sun running 4.1.
-	 In fact, it actually slows us down about 30%! */
-      madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
-#endif
-      cc = bufalloc - bufsalloc;
-      bufoffset += cc;
-    }
-  else
-    {
-    tryread:
-      /* We come here when we're not going to use mmap() any more.
-	 Note that we need to synchronize the file offset the
-	 first time through. */
-      if (bufmapped)
+
+      if (mmapsize
+	  && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
+		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
+		    bufdesc, bufoffset)
+	      != (caddr_t) -1))
 	{
-	  bufmapped = 0;
-	  if (bufoffset != initial_bufoffset)
-	    lseek (bufdesc, bufoffset, 0);
+	  /* Do not bother to use madvise with MADV_SEQUENTIAL or
+	     MADV_WILLNEED on the mmapped memory.  One might think it
+	     would help, but it slows us down about 30% on SunOS 4.1.  */
+	  fillsize = mmapsize;
 	}
-#if HAVE_LIBZ > 0
-      if (Zflag)
-        cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
      else
-#endif
-      cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
+	{
+	  /* Stop using mmap on this file.  Synchronize the file
+	     offset.  Do not warn about mmap failures.  On some hosts
+	     (e.g. Solaris 2.5) mmap can fail merely because some
+	     other process has an advisory read lock on the file.
+	     There's no point alarming the user about this misfeature.  */
+	  bufmapped = 0;
+	  if (bufoffset != initial_bufoffset
+	      && lseek (bufdesc, bufoffset, SEEK_SET) < 0)
+	    {
+	      error ("lseek", errno);
+	      cc = 0;
+	    }
+	}
    }
-#else
-#if HAVE_LIBZ > 0
-  if (Zflag)
-    cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
-  else
-#endif
-  cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
 #endif /*HAVE_MMAP*/
-#if O_BINARY
-  if (cc > 0)
-    cc = undossify_input (buffer + bufsalloc, cc);
+
+  if (! fillsize)
+    {
+      ssize_t bytesread;
+      do
+#if HAVE_LIBZ > 0
+	if (Zflag)
+	  bytesread = gzread (gzbufdesc, buffer + bufsalloc, readsize);
+	else
 #endif
-  if (cc > 0)
-    buflim = buffer + bufsalloc + cc;
-  else
-    buflim = buffer + bufsalloc;
+	  bytesread = read (bufdesc, buffer + bufsalloc, readsize);
+      while (bytesread < 0 && errno == EINTR);
+      if (bytesread < 0)
+	cc = 0;
+      else
+	fillsize = bytesread;
+    }
+
+  bufoffset += fillsize;
+#if O_BINARY
+  if (fillsize)
+    fillsize = undossify_input (buffer + bufsalloc, fillsize);
+#endif
+  buflim = buffer + bufsalloc + fillsize;
  return cc;
 }

 /* Flags controlling the style of output. */
 static int always_text;		/* Assume the input is always text. */
+static int filename_mask;	/* If zero, output nulls after filenames.  */
 static int out_quiet;		/* Suppress all normal output. */
 static int out_invert;		/* Print nonmatching stuff. */
 static int out_file;		/* Print filenames. */
@ -480,11 +549,9 @@ nlscan (lim)
     char *lim;
 {
  char *beg;
-
-  for (beg = lastnl; beg < lim; ++beg)
-    if (*beg == '\n')
-      ++totalnl;
-  lastnl = beg;
+  for (beg = lastnl;  (beg = memchr (beg, eolbyte, lim - beg));  beg++)
+    totalnl++;
+  lastnl = lim;
 }

 static void
@ -513,7 +580,7 @@ prline (beg, lim, sep)
     int sep;
 {
  if (out_file)
-    printf ("%s%c", filename, sep);
+    printf ("%s%c", filename, sep & filename_mask);
  if (out_line)
    {
      nlscan (beg);
@ -546,7 +613,7 @@ prpending (lim)
  while (pending > 0 && lastout < lim)
    {
      --pending;
-      if ((nl = memchr (lastout, '\n', lim - lastout)) != 0)
+      if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
 	++nl;
      else
 	nl = lim;
@ -564,6 +631,7 @@ prtext (beg, lim, nlinesp)
 {
  static int used;		/* avoid printing "--" before any output */
  char *bp, *p, *nl;
+  char eol = eolbyte;
  int i, n;

  if (!out_quiet && pending > 0)
@ -580,7 +648,7 @@ prtext (beg, lim, nlinesp)
 	if (p > bp)
 	  do
 	    --p;
-	  while (p > bp && p[-1] != '\n');
+	  while (p > bp && p[-1] != eol);

      /* We only print the "--" separator if our output is
 	 discontiguous from the last output in the file. */
@ -589,7 +657,7 @@ prtext (beg, lim, nlinesp)

      while (p < beg)
 	{
-	  nl = memchr (p, '\n', beg - p);
+	  nl = memchr (p, eol, beg - p);
 	  prline (p, nl + 1, '-');
 	  p = nl + 1;
 	}
@ -600,7 +668,7 @@ prtext (beg, lim, nlinesp)
      /* Caller wants a line count. */
      for (n = 0; p < lim; ++n)
 	{
-	  if ((nl = memchr (p, '\n', lim - p)) != 0)
+	  if ((nl = memchr (p, eol, lim - p)) != 0)
 	    ++nl;
 	  else
 	    nl = lim;
@ -614,7 +682,7 @@ prtext (beg, lim, nlinesp)
    if (!out_quiet)
      prline (beg, lim, ':');

-  pending = out_after;
+  pending = out_quiet ? 0 : out_after;
  used = 1;
 }

@ -629,13 +697,14 @@ grepbuf (beg, lim)
  int nlines, n;
  register char *p, *b;
  char *endp;
+  char eol = eolbyte;

  nlines = 0;
  p = beg;
  while ((b = (*execute)(p, lim - p, &endp)) != 0)
    {
      /* Avoid matching the empty line at the end of the buffer. */
-      if (b == lim && ((b > beg && b[-1] == '\n') || b == beg))
+      if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
 	break;
      if (!out_invert)
 	{
@ -672,6 +741,7 @@ grep (fd, file, stats)
  int not_text;
  size_t residue, save;
  char *beg, *lim;
+  char eol = eolbyte;

  if (!reset (fd, file, stats))
    return 0;
@ -700,7 +770,7 @@ grep (fd, file, stats)
  residue = 0;
  save = 0;

-  if (fillbuf (save, stats) < 0)
+  if (! fillbuf (save, stats))
    {
      if (! (is_EISDIR (errno, file) && suppress_errors))
 	error (filename, errno);
@ -708,7 +778,7 @@ grep (fd, file, stats)
    }

  not_text = (! (always_text | out_quiet)
-	      && memchr (bufbeg, '\0', buflim - bufbeg));
+	      && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
  done_on_match += not_text;
  out_quiet += not_text;

@ -720,7 +790,7 @@ grep (fd, file, stats)
      if (buflim - bufbeg == save)
 	break;
      beg = bufbeg + save - residue;
-      for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim)
+      for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
 	;
      residue = buflim - lim;
      if (beg < lim)
@ -738,7 +808,7 @@ grep (fd, file, stats)
 	  ++i;
 	  do
 	    --beg;
-	  while (beg > bufbeg && beg[-1] != '\n');
+	  while (beg > bufbeg && beg[-1] != eol);
 	}
      if (beg != lastout)
 	lastout = 0;
@ -746,7 +816,7 @@ grep (fd, file, stats)
      totalcc += buflim - bufbeg - save;
      if (out_line)
 	nlscan (beg);
-      if (fillbuf (save, stats) < 0)
+      if (! fillbuf (save, stats))
 	{
 	  if (! (is_EISDIR (errno, file) && suppress_errors))
 	    error (filename, errno);
@ -784,7 +854,8 @@ grepfile (file, stats)
    }
  else
    {
-      desc = open (file, O_RDONLY);
+      while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
+	continue;

      if (desc < 0)
 	{
@ -843,30 +914,26 @@ grepfile (file, stats)
      if (count_matches)
 	{
 	  if (out_file)
-	    printf ("%s:", filename);
+	    printf ("%s%c", filename, ':' & filename_mask);
 	  printf ("%d\n", count);
 	}

-      if (count)
-	{
-	  status = 0;
-	  if (list_files == 1)
-	    printf ("%s\n", filename);
-	}
-      else
-	{
-	  status = 1;
-	  if (list_files == -1)
-	    printf ("%s\n", filename);
-	}
+      status = !count;
+      if (list_files == 1 - 2 * status)
+	printf ("%s%c", filename, '\n' & filename_mask);

 #if HAVE_LIBZ > 0
      if (Zflag)
 	gzclose(gzbufdesc);
      else
 #endif
-      if (file && close (desc) != 0)
-	error (file, errno);
+      if (file)
+	while (close (desc) != 0)
+	  if (errno != EINTR)
+	    {
+	      error (file, errno);
+	      break;
+	    }
    }

  return status;
@ -882,8 +949,8 @@ grepdir (dir, stats)
  char *name_space;

  for (ancestor = stats;  (ancestor = ancestor->parent) != 0;  )
-    if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino)
-	   | (ancestor->stat.st_dev ^ stats->stat.st_dev)))
+    if (ancestor->stat.st_ino == stats->stat.st_ino
+	&& ancestor->stat.st_dev == stats->stat.st_dev)
      {
 	if (!suppress_errors)
 	  fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
@ -946,24 +1013,29 @@ int status;
      printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
      printf (_("\
 Search for PATTERN in each FILE or standard input.\n\
+Example: %s -i 'hello.*world' menu.h main.c\n\
 \n\
-Regexp selection and interpretation:\n\
+Regexp selection and interpretation:\n"), prog);
+      printf (_("\
  -E, --extended-regexp     PATTERN is an extended regular expression\n\
-  -F, --fixed-regexp        PATTERN is a fixed string separated by newlines\n\
-  -G, --basic-regexp        PATTERN is a basic regular expression\n\
+  -F, --fixed-strings       PATTERN is a set of newline-separated strings\n\
+  -G, --basic-regexp        PATTERN is a basic regular expression\n"));
+      printf (_("\
  -e, --regexp=PATTERN      use PATTERN as a regular expression\n\
  -f, --file=FILE           obtain PATTERN from FILE\n\
  -i, --ignore-case         ignore case distinctions\n\
  -w, --word-regexp         force PATTERN to match only whole words\n\
-  -x, --line-regexp         force PATTERN to match only whole lines\n"));
+  -x, --line-regexp         force PATTERN to match only whole lines\n\
+  -z, --null-data           a data line ends in 0 byte, not newline\n"));
      printf (_("\
 \n\
 Miscellaneous:\n\
  -s, --no-messages         suppress error messages\n\
-  -v, --revert-match        select non-matching lines\n\
+  -v, --invert-match        select non-matching lines\n\
  -V, --version             print version information and exit\n\
+      --help                display this help and exit\n\
  -Z, --decompress          decompress input before searching (HAVE_LIBZ=1)\n\
-      --help                display this help and exit\n"));
+      --mmap                use memory-mapped input if possible\n"));
      printf (_("\
 \n\
 Output control:\n\
@ -978,31 +1050,42 @@ Output control:\n\
  -r, --recursive           equivalent to --directories=recurse.\n\
  -L, --files-without-match only print FILE names containing no match\n\
  -l, --files-with-matches  only print FILE names containing matches\n\
-  -c, --count               only print a count of matching lines per FILE\n"));
+  -c, --count               only print a count of matching lines per FILE\n\
+      --null                print 0 byte after FILE name\n"));
      printf (_("\
 \n\
 Context control:\n\
  -B, --before-context=NUM  print NUM lines of leading context\n\
  -A, --after-context=NUM   print NUM lines of trailing context\n\
  -C, --context[=NUM]       print NUM (default 2) lines of output context\n\
-                            unless overriden by -A or -B\n\
+                            unless overridden by -A or -B\n\
  -NUM                      same as --context=NUM\n\
  -U, --binary              do not strip CR characters at EOL (MSDOS)\n\
  -u, --unix-byte-offsets   report offsets as if CRs were not there (MSDOS)\n\
 \n\
-If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\
-With no FILE, or when FILE is -, read standard input. If less than\n\
-two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\
-Exit with 2 if syntax errors or system errors.\n"));
+`egrep' means `grep -E'.  `fgrep' means `grep -F'.\n\
+With no FILE, or when FILE is -, read standard input.  If less than\n\
+two FILEs given, assume -h.  Exit status is 0 if match, 1 if no match,\n\
+and 2 if trouble.\n"));
      printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
    }
  exit (status);
 }

+/* Set the matcher to M, reporting any conflicts.  */
+static void
+setmatcher (m)
+     char const *m;
+{
+  if (matcher && strcmp (matcher, m) != 0)
+    fatal (_("conflicting matchers specified"), 0);
+  matcher = m;
+}
+
 /* Go through the matchers vector and look for the specified matcher.
   If we find it, install it in compile and execute, and return 1.  */
 static int
-setmatcher (name)
+install_matcher (name)
     char const *name;
 {
  int i;
@ -1158,7 +1241,8 @@ main (argc, argv)
  keys = NULL;
  keycc = 0;
  with_filenames = 0;
-  matcher = NULL;
+  eolbyte = '\n';
+  filename_mask = ~0;

  /* The value -1 means to use DEFAULT_CONTEXT. */
  out_after = out_before = -1;
@ -1179,15 +1263,8 @@ main (argc, argv)

  prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);

-  while ((opt = getopt_long (argc, argv,
-#if O_BINARY
-         "0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu",
-#elif HAVE_LIBZ > 0
-         "0123456789A:B:C::EFGHRVX:Zabcd:e:f:hiLlnqrsvwxy",
-#else
-         "0123456789A:B:C::EFGHRVX:abcd:e:f:hiLlnqrsvwxy",
-#endif
-         long_options, NULL)) != EOF)
+  while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
+	 != -1)
    switch (opt)
      {
      case '0':
@ -1229,44 +1306,33 @@ main (argc, argv)
 	  default_context = 2;
 	break;
      case 'E':
-	if (matcher && strcmp (matcher, "posix-egrep") != 0)
-	  fatal (_("you may specify only one of -E, -F, or -G"), 0);
-	matcher = "posix-egrep";
+	setmatcher ("egrep");
 	break;
      case 'F':
-	if (matcher && strcmp(matcher, "fgrep") != 0)
-	  fatal(_("you may specify only one of -E, -F, or -G"), 0);;
-	matcher = "fgrep";
+	setmatcher ("fgrep");
 	break;
      case 'G':
-	if (matcher && strcmp (matcher, "grep") != 0)
-	  fatal (_("you may specify only one of -E, -F, or -G"), 0);
-	matcher = "grep";
+	setmatcher ("grep");
 	break;
      case 'H':
 	with_filenames = 1;
 	break;
-#if O_BINARY
      case 'U':
+#if O_BINARY
 	dos_use_file_type = DOS_BINARY;
+#endif
 	break;
      case 'u':
+#if O_BINARY
 	dos_report_unix_offset = 1;
-	break;
 #endif
+	break;
      case 'V':
 	show_version = 1;
 	break;
      case 'X':
-	if (matcher)
-	  fatal (_("matcher already specified"), 0);
-	matcher = optarg;
+	setmatcher (optarg);
 	break;
-#if HAVE_LIBZ > 0
-      case 'Z':
-	Zflag = 1;
-	break;
-#endif
      case 'a':
 	always_text = 1;
 	break;
@ -1357,6 +1423,16 @@ main (argc, argv)
      case 'x':
 	match_lines = 1;
 	break;
+      case 'Z':
+#if HAVE_LIBZ > 0
+	Zflag = 1;
+#else
+	filename_mask = 0;
+#endif
+	break;
+      case 'z':
+	eolbyte = '\0';
+	break;
      case 0:
 	/* long options */
 	break;
@ -1370,9 +1446,12 @@ main (argc, argv)
  if (out_before < 0)
    out_before = default_context;

+  if (! matcher)
+    matcher = prog;
+
  if (show_version)
    {
-      printf (_("grep (GNU grep) %s\n"), VERSION);
+      printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
      printf ("\n");
      printf (_("\
 Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
@ -1404,10 +1483,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
    else
      usage (2);

-  if (! matcher)
-    matcher = prog;
-
-  if (!setmatcher (matcher) && !setmatcher ("default"))
+  if (!install_matcher (matcher) && !install_matcher ("default"))
    abort ();

  (*compile)(keys, keycc);
--- a/gnu/usr.bin/grep/grep.h
+++ b/gnu/usr.bin/grep/grep.h
@ -37,14 +37,12 @@ extern struct matcher
  char *(*execute) PARAMS ((char *, size_t, char **));
 } matchers[];

-/* Exported from grep.c. */
-extern char const *matcher;
-
 /* Exported from fgrepmat.c, egrepmat.c, grepmat.c.  */
-extern char const default_matcher[];
+extern char const *matcher;

 /* The following flags are exported from grep for the matchers
   to look at. */
 extern int match_icase;		/* -i */
 extern int match_words;		/* -w */
 extern int match_lines;		/* -x */
+extern unsigned char eolbyte;	/* -z */
--- a/gnu/usr.bin/grep/search.c
+++ b/gnu/usr.bin/grep/search.c
@ -48,7 +48,6 @@ struct matcher matchers[] = {
  { "default", Gcompile, EGexecute },
  { "grep", Gcompile, EGexecute },
  { "egrep", Ecompile, EGexecute },
-  { "posix-egrep", Ecompile, EGexecute },
  { "awk", Ecompile, EGexecute },
  { "fgrep", Fcompile, Fexecute },
  { 0, 0, 0 },
@ -61,7 +60,7 @@ struct matcher matchers[] = {
 static struct dfa dfa;

 /* Regex compiled regexp. */
-static struct re_pattern_buffer regex;
+static struct re_pattern_buffer regexbuf;

 /* KWset compiled pattern.  For Ecompile and Gcompile, we compile
   a list of strings, at least one of which is known to occur in
@ -140,9 +139,9 @@ Gcompile(pattern, size)
  const char *err;

  re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
-  dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
+  dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);

-  if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
+  if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
    fatal(err, 0);

  /* In the match_words and match_lines cases, we use a different pattern
@ -155,7 +154,8 @@ Gcompile(pattern, size)
 	 (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
 	 In the whole-line case, we use the pattern:
 	 ^(userpattern)$.
-	 BUG: Using [A-Za-z_] is locale-dependent!  */
+	 BUG: Using [A-Za-z_] is locale-dependent!
+	 So will use [:alnum:] */

      char *n = malloc(size + 50);
      int i = 0;
@ -165,14 +165,14 @@ Gcompile(pattern, size)
      if (match_lines)
 	strcpy(n, "^\\(");
      if (match_words)
-	strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
+	strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\(");

      i = strlen(n);
      memcpy(n + i, pattern, size);
      i += size;

      if (match_words)
-	strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)");
+	strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)");
      if (match_lines)
 	strcpy(n + i, "\\)$");

@ -192,23 +192,18 @@ Ecompile(pattern, size)
 {
  const char *err;

-  if (strcmp(matcher, "posix-egrep") == 0)
-    {
-      re_set_syntax(RE_SYNTAX_POSIX_EGREP);
-      dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
-    }
-  else if (strcmp(matcher, "awk") == 0)
+  if (strcmp(matcher, "awk") == 0)
    {
      re_set_syntax(RE_SYNTAX_AWK);
-      dfasyntax(RE_SYNTAX_AWK, match_icase);
+      dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte);
    }
  else
    {
-      re_set_syntax(RE_SYNTAX_EGREP);
-      dfasyntax(RE_SYNTAX_EGREP, match_icase);
+      re_set_syntax (RE_SYNTAX_POSIX_EGREP);
+      dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
    }

-  if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
+  if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
    fatal(err, 0);

  /* In the match_words and match_lines cases, we use a different pattern
@ -221,7 +216,8 @@ Ecompile(pattern, size)
 	 (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
 	 In the whole-line case, we use the pattern:
 	 ^(userpattern)$.
-	 BUG: Using [A-Za-z_] is locale-dependent!  */
+	 BUG: Using [A-Za-z_] is locale-dependent!
+	 so will use the char class */

      char *n = malloc(size + 50);
      int i = 0;
@ -231,14 +227,14 @@ Ecompile(pattern, size)
      if (match_lines)
 	strcpy(n, "^(");
      if (match_words)
-	strcpy(n, "(^|[^0-9A-Za-z_])(");
+	strcpy(n, "(^|[^[:alnum:]_])(");

      i = strlen(n);
      memcpy(n + i, pattern, size);
      i += size;

      if (match_words)
-	strcpy(n + i, ")([^0-9A-Za-z_]|$)");
+	strcpy(n + i, ")([^[:alnum:]_]|$)");
      if (match_lines)
 	strcpy(n + i, ")$");

@ -258,6 +254,7 @@ EGexecute(buf, size, endp)
     char **endp;
 {
  register char *buflim, *beg, *end, save;
+  char eol = eolbyte;
  int backref, start, len;
  struct kwsmatch kwsm;
  static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
@ -275,10 +272,10 @@ EGexecute(buf, size, endp)
 	    goto failure;
 	  /* Narrow down to the line containing the candidate, and
 	     run it through DFA. */
-	  end = memchr(beg, '\n', buflim - beg);
+	  end = memchr(beg, eol, buflim - beg);
 	  if (!end)
 	    end = buflim;
-	  while (beg > buf && beg[-1] != '\n')
+	  while (beg > buf && beg[-1] != eol)
 	    --beg;
 	  save = *end;
 	  if (kwsm.index < lastexact)
@ -302,10 +299,10 @@ EGexecute(buf, size, endp)
 	  if (!beg)
 	    goto failure;
 	  /* Narrow down to the line we've found. */
-	  end = memchr(beg, '\n', buflim - beg);
+	  end = memchr(beg, eol, buflim - beg);
 	  if (!end)
 	    end = buflim;
-	  while (beg > buf && beg[-1] != '\n')
+	  while (beg > buf && beg[-1] != eol)
 	    --beg;
 	  /* Successful, no backreferences encountered! */
 	  if (!backref)
@ -313,8 +310,8 @@ EGexecute(buf, size, endp)
 	}
      /* If we've made it to this point, this means DFA has seen
 	 a probable match, and we need to run it through Regex. */
-      regex.not_eol = 0;
-      if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0)
+      regexbuf.not_eol = 0;
+      if ((start = re_search(&regexbuf, beg, end - beg, 0, end - beg, &regs)) >= 0)
 	{
 	  len = regs.end[0] - start;
 	  if ((!match_lines && !match_words)
@ -337,8 +334,8 @@ EGexecute(buf, size, endp)
 		  {
 		    /* Try a shorter length anchored at the same place. */
 		    --len;
-		    regex.not_eol = 1;
-		    len = re_match(&regex, beg, start + len, start, &regs);
+		    regexbuf.not_eol = 1;
+		    len = re_match(&regexbuf, beg, start + len, start, &regs);
 		  }
 		if (len <= 0)
 		  {
@ -346,8 +343,8 @@ EGexecute(buf, size, endp)
 		    if (start == end - beg)
 		      break;
 		    ++start;
-		    regex.not_eol = 0;
-		    start = re_search(&regex, beg, end - beg,
+		    regexbuf.not_eol = 0;
+		    start = re_search(&regexbuf, beg, end - beg,
 				      start, end - beg - start, &regs);
 		    len = regs.end[0] - start;
 		  }
@ -396,6 +393,7 @@ Fexecute(buf, size, endp)
 {
  register char *beg, *try, *end;
  register size_t len;
+  char eol = eolbyte;
  struct kwsmatch kwsmatch;

  for (beg = buf; beg <= buf + size; ++beg)
@ -405,9 +403,9 @@ Fexecute(buf, size, endp)
      len = kwsmatch.size[0];
      if (match_lines)
 	{
-	  if (beg > buf && beg[-1] != '\n')
+	  if (beg > buf && beg[-1] != eol)
 	    continue;
-	  if (beg + len < buf + size && beg[len] != '\n')
+	  if (beg + len < buf + size && beg[len] != eol)
 	    continue;
 	  goto success;
 	}
@ -431,7 +429,7 @@ Fexecute(buf, size, endp)
  return 0;

 success:
-  if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0)
+  if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0)
    ++end;
  else
    end = buf + size;