Merge FreeBSD chagnes into GNU grep 2.4.

This commit is contained in:
David E. O'Brien 2000-01-04 03:25:40 +00:00
parent e3bfb27984
commit 7e5b33c6cd
6 changed files with 558 additions and 347 deletions

View File

@ -353,15 +353,20 @@ static reg_syntax_t syntax_bits, syntax_bits_set;
/* Flag for case-folding letters into sets. */
static int case_fold;
/* End-of-line byte in data. */
static unsigned char eolbyte;
/* Entry point to set syntax options. */
void
dfasyntax(bits, fold)
dfasyntax(bits, fold, eol)
reg_syntax_t bits;
int fold;
int eol;
{
syntax_bits_set = 1;
syntax_bits = bits;
case_fold = fold;
eolbyte = eol;
}
/* Lexical analyzer. All the dross that deals with the obnoxious
@ -580,11 +585,32 @@ lex()
goto normal_char;
if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
goto normal_char;
minrep = maxrep = 0;
if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
if (syntax_bits & RE_NO_BK_BRACES)
{
/* Scan ahead for a valid interval; if it's not valid,
treat it as a literal '{'. */
int lo = -1, hi = -1;
char const *p = lexptr;
char const *lim = p + lexleft;
for (; p != lim && ISDIGIT (*p); p++)
lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
if (p != lim && *p == ',')
while (++p != lim && ISDIGIT (*p))
hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
else
hi = lo;
if (p == lim || *p != '}'
|| lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
goto normal_char;
}
minrep = 0;
/* Cases:
{M} - exact count
{M,} - minimum count, maximum is infinity
{,M} - 0 through M
{M,N} - M through N */
FETCH(c, _("unfinished repeat count"));
if (ISDIGIT(c))
@ -598,16 +624,27 @@ lex()
minrep = 10 * minrep + c - '0';
}
}
else if (c != ',')
else
dfaerror(_("malformed repeat count"));
if (c == ',')
for (;;)
{
FETCH(c, _("unfinished repeat count"));
if (!ISDIGIT(c))
break;
maxrep = 10 * maxrep + c - '0';
}
{
FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c))
maxrep = -1;
else
{
maxrep = c - '0';
for (;;)
{
FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c))
break;
maxrep = 10 * maxrep + c - '0';
}
if (0 <= maxrep && maxrep < minrep)
dfaerror (_("malformed repeat count"));
}
}
else
maxrep = minrep;
if (!(syntax_bits & RE_NO_BK_BRACES))
@ -659,7 +696,7 @@ lex()
zeroset(ccl);
notset(ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
clrbit('\n', ccl);
clrbit(eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit('\0', ccl);
laststart = 0;
@ -776,7 +813,7 @@ lex()
{
notset(ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit('\n', ccl);
clrbit(eolbyte, ccl);
}
laststart = 0;
return lasttok = CSET + charclass_index(ccl);
@ -942,7 +979,7 @@ closure()
{
ntokens = nsubtoks(dfa->tindex);
tindex = dfa->tindex - ntokens;
if (maxrep == 0)
if (maxrep < 0)
addtok(PLUS);
if (minrep == 0)
addtok(QMARK);
@ -1605,7 +1642,7 @@ dfastate(s, d, trans)
for (i = 0; i < NOTCHAR; ++i)
if (IS_WORD_CONSTITUENT(i))
setbit(i, letters);
setbit('\n', newline);
setbit(eolbyte, newline);
}
zeroset(matches);
@ -1626,7 +1663,7 @@ dfastate(s, d, trans)
{
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 1))
clrbit('\n', matches);
clrbit(eolbyte, matches);
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 0))
for (j = 0; j < CHARCLASS_INTS; ++j)
@ -1737,7 +1774,7 @@ dfastate(s, d, trans)
state_letter = state;
for (i = 0; i < NOTCHAR; ++i)
trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
trans['\n'] = state_newline;
trans[eolbyte] = state_newline;
}
else
for (i = 0; i < NOTCHAR; ++i)
@ -1761,7 +1798,7 @@ dfastate(s, d, trans)
/* Find out if the new state will want any context information. */
wants_newline = 0;
if (tstbit('\n', labels[i]))
if (tstbit(eolbyte, labels[i]))
for (j = 0; j < follows.nelem; ++j)
if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
wants_newline = 1;
@ -1793,7 +1830,7 @@ dfastate(s, d, trans)
{
int c = j * INTBITS + k;
if (c == '\n')
if (c == eolbyte)
trans[c] = state_newline;
else if (IS_WORD_CONSTITUENT(c))
trans[c] = state_letter;
@ -1884,8 +1921,8 @@ build_state(s, d)
/* Keep the newline transition in a special place so we can use it as
a sentinel. */
d->newlines[s] = trans['\n'];
trans['\n'] = -1;
d->newlines[s] = trans[eolbyte];
trans[eolbyte] = -1;
if (ACCEPTING(s, *d))
d->fails[s] = trans;
@ -1933,6 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
register unsigned char *p; /* Current input character. */
register int **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
static int sbit[NOTCHAR]; /* Table for anding with d->success. */
static int sbit_init;
@ -1943,7 +1981,7 @@ dfaexec(d, begin, end, newline, count, backref)
sbit_init = 1;
for (i = 0; i < NOTCHAR; ++i)
sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
sbit['\n'] = 4;
sbit[eol] = 4;
}
if (! d->tralloc)
@ -1952,7 +1990,7 @@ dfaexec(d, begin, end, newline, count, backref)
s = s1 = 0;
p = (unsigned char *) begin;
trans = d->trans;
*end = '\n';
*end = eol;
for (;;)
{
@ -1980,7 +2018,7 @@ dfaexec(d, begin, end, newline, count, backref)
}
/* If the previous character was a newline, count it. */
if (count && (char *) p <= end && p[-1] == '\n')
if (count && (char *) p <= end && p[-1] == eol)
++*count;
/* Check if we've run off the end of the buffer. */
@ -1994,7 +2032,7 @@ dfaexec(d, begin, end, newline, count, backref)
continue;
}
if (p[-1] == '\n' && newline)
if (p[-1] == eol && newline)
{
s = d->newlines[s1];
continue;

View File

@ -322,9 +322,10 @@ struct dfa
/* Entry points. */
/* dfasyntax() takes two arguments; the first sets the syntax bits described
earlier in this file, and the second sets the case-folding flag. */
extern void dfasyntax PARAMS ((reg_syntax_t, int));
/* dfasyntax() takes three arguments; the first sets the syntax bits described
earlier in this file, the second sets the case-folding flag, and the
third specifies the line terminator. */
extern void dfasyntax PARAMS ((reg_syntax_t, int, int));
/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an

View File

@ -1,26 +1,68 @@
.\" grep man page
.\" $FreeBSD$
.if !\n(.g \{\
. if !\w|\*(lq| \{\
. ds lq ``
. if \w'\(lq' .ds lq "\(lq
. \}
. if !\w|\*(rq| \{\
. ds rq ''
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
.de Id
.ds Dt \\$4
..
.Id $Id: grep.1,v 1.1 1998/11/22 06:45:20 alainm Exp $
.Id $Id: grep.1,v 1.7 1999/10/12 20:41:01 alainm Exp $
.TH GREP 1 \*(Dt "GNU Project"
.SH NAME
grep, egrep, fgrep, zgrep \- print lines matching a pattern
.SH SYNOPSIS
.B grep
[-[AB] NUM] [-CEFGVZabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE]
[-d ACTION] [--directories=ACTION]
[--extended-regexp] [--fixed-strings] [--basic-regexp]
[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp]
[--line-regexp] [--line-regexp] [--no-messages] [--revert-match]
[--version] [--help] [--byte-offset] [--line-number]
[--with-filename] [--no-filename] [--quiet] [--silent] [--text]
[--files-without-match] [--files-with-matcces] [--count]
[--before-context=NUM] [--after-context=NUM] [--context]
[--binary] [--unix-byte-offsets] [--recursive]
[--decompress]
.I files...
.RB [ \- [ ABC ]
.IR NUM ]
.RB [ \-EFGHLUVZabchilnqrsuvwxyuz ]
.RB [ \-e
.I PATTERN
|
.B \-f
.IR FILE ]
.RB [ \-d
.IR ACTION ]
.RB [ \-\^\-directories=\fIACTION\fP ]
.RB [ \-\^\-extended-regexp ]
.RB [ \-\^\-fixed-strings ]
.RB [ \-\^\-basic-regexp ]
.RB [ \-\^\-regexp=\fIPATTERN\fP ]
.RB [ \-\^\-file=\fIFILE\fP ]
.RB [ \-\^\-ignore-case ]
.RB [ \-\^\-word-regexp ]
.RB [ \-\^\-line-regexp ]
.RB [ \-\^\-line-regexp ]
.RB [ \-\^\-no-messages ]
.RB [ \-\^\-invert-match ]
.RB [ \-\^\-version ]
.RB [ \-\^\-help ]
.RB [ \-\^\-byte-offset ]
.RB [ \-\^\-line-number ]
.RB [ \-\^\-with-filename ]
.RB [ \-\^\-no-filename ]
.RB [ \-\^\-quiet ]
.RB [ \-\^\-silent ]
.RB [ \-\^\-text ]
.RB [ \-\^\-files-without-match ]
.RB [ \-\^\-files-with-matches ]
.RB [ \-\^\-count ]
.RB [ \-\^\-before-context=\fINUM\fP ]
.RB [ \-\^\-after-context=\fINUM\fP ]
.RB [ \-\^\-context [ =\fINUM\fP ]]
.RB [ \-\^\-binary ]
.RB [ \-\^\-unix-byte-offsets ]
.RB [ \-\^\-mmap ]
.RB [ \-\^\-null ]
.RB [ \-\^\-recursive ]
.RB [ \-\^\-decompress ]
.RI [ file .\|.\|.]
.SH DESCRIPTION
.PP
.B grep
@ -41,83 +83,83 @@ There are three major variants of
controlled by the following options.
.PD 0
.TP
.B \-G, --basic-regexp
.BR \-G ", " \-\^\-basic-regexp
Interpret
.I pattern
as a basic regular expression (see below). This is the default.
.TP
.B \-E, --extended-regexp
.BR \-E ", " \-\^\-extended-regexp
Interpret
.I pattern
as an extended regular expression (see below).
.TP
.B \-F, --fixed-strings
.BR \-F ", " \-\^\-fixed-strings
Interpret
.I pattern
as a list of fixed strings, separated by newlines,
any of which is to be matched.
.LP
.PP
In addition, two variant programs
.B egrep
and
.B fgrep
are available.
.B egrep
is similar (but not identical) to
.BR "grep\ \-E" ,
and is compatible with the historical Unix
.BR egrep .
is the same as
.BR "grep\ \-E" .
.B fgrep
is the same as
.BR "grep\ \-F" .
.B zgrep
is the same as
.BR "grep\ \-Z" .
.BR "grep\ \-z" .
.PD
.LP
.PP
All variants of
.B grep
understand the following options:
.PD 0
.TP
.BI \-A " NUM" ", --after-context=" NUM
.BI \-A " NUM" "\fR,\fP \-\^\-after-context=" NUM
Print
.I NUM
lines of trailing context after matching lines.
.TP
.BI \-B " NUM" ", --before-context=" NUM
.BI \-B " NUM" "\fR,\fP \-\^\-before-context=" NUM
Print
.I NUM
lines of leading context before matching lines.
.TP
.BI \-C ,\ --context"[=NUM]"
Print
.BI \-C " \fR[\fPNUM\fR]\fP" "\fR,\fP \-\^\-context\fR[\fP=" NUM\fR]\fP
Print
.I NUM
lines (default 2) of output context.
.TP
.BI \- NUM \
Same as --context=NUM lines of leading and trailing context. However,
.BI \- NUM
Same as
.BI \-\^\-context= NUM
lines of leading and trailing context. However,
.B grep
will never print any given line more than once.
.TP
.B \-V, --version
.BR \-V ", " \-\^\-version
Print the version number of
.B grep
to standard error. This version number should
be included in all bug reports (see below).
.TP
.B \-b, --byte-offset
.BR \-b ", " \-\^\-byte-offset
Print the byte offset within the input file before
each line of output.
.TP
.B \-c, --count
.BR \-c ", " \-\^\-count
Suppress normal output; instead print a count of
matching lines for each input file.
With the
.B \-v, --revert-match
.BR \-v ", " \-\^\-invert-match
option (see below), count non-matching lines.
.TP
.BI \-d " ACTION" ", --directories=" ACTION
.BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION
If an input file is a directory, use
.I ACTION
to process it. By default,
@ -140,75 +182,78 @@ this is equivalent to the
.B \-r
option.
.TP
.BI \-e " PATTERN" ", --regexp=" PATTERN
.BI \-e " PATTERN" "\fR,\fP \-\^\-regexp=" PATTERN
Use
.I PATTERN
as the pattern; useful to protect patterns beginning with
.BR \- .
.TP
.BI \-f " FILE" ", --file=" FILE
.BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE
Obtain patterns from
.IR FILE ,
one per line.
The empty file contains zero patterns, and therfore matches nothing.
.TP
.B \-h, --no-filename
.BR \-H ", " \-\^\-with-filename
Print the filename for each match.
.TP
.BR \-h ", " \-\^\-no-filename
Suppress the prefixing of filenames on output
when multiple files are searched.
.TP
.B \-i, --ignore-case
.BR \-i ", " \-\^\-ignore-case
Ignore case distinctions in both the
.I pattern
and the input files.
.TP
.B \-L, --files-without-match
.BR \-L ", " \-\^\-files-without-match
Suppress normal output; instead print the name
of each input file from which no output would
normally have been printed. The scanning will stop
normally have been printed. The scanning will stop
on the first match.
.TP
.B \-l, --files-with-matches
.BR \-l ", " \-\^\-files-with-matches
Suppress normal output; instead print
the name of each input file from which output
would normally have been printed. The scanning will
would normally have been printed. The scanning will
stop on the first match.
.TP
.B \-n, --line-number
.BR \-n ", " \-\^\-line-number
Prefix each line of output with the line number
within its input file.
.TP
.B \-q, --quiet, --silent
Quiet; suppress normal output. The scanning will stop
.BR \-q ", " \-\^\-quiet ", " \-\^\-silent
Quiet; suppress normal output. The scanning will stop
on the first match.
Also see the
.B \-s
or
.B --no-messages
.B \-\^\-no-messages
option below.
.TP
.B \-r, --recursive
.BR \-r ", " \-\^\-recursive
Read all files under each directory, recursively;
this is equivalent to the
.B "\-d recurse"
option.
.TP
.B \-s, --no-messages
.BR \-s ", " \-\^\-no-messages
Suppress error messages about nonexistent or unreadable files.
Portability note: unlike GNU
Portability note: unlike \s-1GNU\s0
.BR grep ,
BSD
traditional
.B grep
does not comply with POSIX.2, because BSD
did not conform to \s-1POSIX.2\s0, because traditional
.B grep
lacks a
lacked a
.B \-q
option and its
.B \-s
option behaves like GNU
option behaved like \s-1GNU\s0
.BR grep 's
.B \-q
option.
Shell scripts intended to be portable to BSD
Shell scripts intended to be portable to traditional
.B grep
should avoid both
.B \-q
@ -216,7 +261,7 @@ and
.B \-s
and should redirect output to /dev/null instead.
.TP
.B \-a, --text
.BR \-a ", " \-\^\-text
Do not suppress output lines that contain binary data.
Normally, if the first few bytes of a file indicate that
the file contains binary data,
@ -227,10 +272,10 @@ This option causes
to act as if the file is a text file,
even if it would otherwise be treated as binary.
.TP
.B \-v, --revert-match
.BR \-v ", " \-\^\-invert-match
Invert the sense of matching, to select non-matching lines.
.TP
.B \-w, --word-regexp
.BR \-w ", " \-\^\-word-regexp
Select only those lines containing matches that form whole words.
The test is that the matching substring must either be at the
beginning of the line, or preceded by a non-word constituent
@ -238,14 +283,14 @@ character. Similarly, it must be either at the end of the line
or followed by a non-word constituent character. Word-constituent
characters are letters, digits, and the underscore.
.TP
.B \-x, --line-regexp
.BR \-x ", " \-\^\-line-regexp
Select only those matches that exactly match the whole line.
.TP
.B \-y
Obsolete synonym for
.BR \-i .
.TP
.B \-U, --binary
.BR \-U ", " \-\^\-binary
Treat the file(s) as binary. By default, under MS-DOS and MS-Windows,
.BR grep
guesses the file type by looking at the contents of the first 32KB
@ -261,10 +306,11 @@ work correctly). Specifying
overrules this guesswork, causing all files to be read and passed to the
matching mechanism verbatim; if the file is a text file with CR/LF
pairs at the end of each line, this will cause some regular
expressions to fail. This option is only supported on MS-DOS and
expressions to fail.
This option has no effect on platforms other than MS-DOS and
MS-Windows.
.TP
.B \-u, --unix-byte-offsets
.BR \-u ", " \-\^\-unix-byte-offsets
Report Unix-style byte offsets. This switch causes
.B grep
to report byte offsets as if the file were Unix-style text file, i.e. with
@ -272,13 +318,46 @@ CR characters stripped off. This will produce results identical to running
.B grep
on a Unix machine. This option has no effect unless
.B \-b
option is also used; it is only supported on MS-DOS and MS-Windows.
.PD
option is also used;
it has no effect on platforms other than MS-DOS and MS-Windows.
.TP
.B \-\^\-mmap
If possible, use the
.BR mmap (2)
system call to read input, instead of
the default
.BR read (2)
system call. In some situations,
.B -\^-mmap
yields better performance. However,
.B -\^-mmap
can cause undefined behavior (including core dumps)
if an input file shrinks while
.B grep
is operating, or if an I/O error occurs.
.TP
.BR \-Z ", " \-\^\-null
Output a zero byte (the \s-1ASCII\s0
.B NUL
character) instead of the character that normally follows a file name.
For example,
.B "grep \-lZ"
outputs a zero byte after each file name instead of the usual newline.
This option makes the output unambiguous, even in the presence of file
names containing unusual characters like newlines. This option can be
used with commands like
.BR "find \-print0" ,
.BR "perl \-0" ,
.BR "sort \-z" ,
and
.B "xargs \-0"
to process arbitrary file names,
even those that contain newline characters.
.LP
Following option is only available if compiled with zlib(3) library:
.PD 0
.TP
.B \-Z, --decompress
.B \-z, --decompress
Decompress the input data before searching.
.PD
.SH "REGULAR EXPRESSIONS"
@ -289,8 +368,8 @@ expressions, by using various operators to combine smaller expressions.
.PP
.B grep
understands two different versions of regular expression syntax:
``basic'' and ``extended.'' In
.RB "GNU\ " grep ,
\*(lqbasic\*(rq and \*(lqextended.\*(rq In
.RB "\s-1GNU\s0\ " grep ,
there is no difference in available functionality using either syntax.
In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
@ -402,11 +481,6 @@ The preceding item is matched
.I n
or more times.
.TP
.BI {, m }
The preceding item is optional and is matched at most
.I m
times.
.TP
.BI { n , m }
The preceding item is matched at least
.I n
@ -456,12 +530,35 @@ versions
and
.BR \e) .
.PP
In
Traditional
.B egrep
the metacharacter
did not support the
.B {
loses its special meaning; instead use
.BR \e{ .
metacharacter, and some
.B egrep
implementations support
.B \e{
instead, so portable scripts should avoid
.B {
in
.B egrep
patterns and should use
.B [{]
to match a literal
.BR { .
.PP
\s-1GNU\s0
.B egrep
attempts to support traditional usage by assuming that
.B {
is not special if it would be the start of an invalid interval
specification. For example, the shell command
.B "egrep '{1'"
searches for the two-character string
.B {1
instead of reporting a syntax error in the regular expression.
\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts
should avoid it.
.SH ENVIRONMENT
The environment variable
.B GREP_OPTIONS
@ -483,7 +580,8 @@ other system errors.
.PP
Email bug reports to
.BR bug-gnu-utils@gnu.org .
Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
Be sure to include the word \*(lqgrep\*(rq somewhere in the
\*(lqSubject:\*(rq field.
.PP
Large repetition counts in the
.BI { m , n }
@ -495,3 +593,5 @@ and space, and may cause
to run out of memory.
.PP
Backreferences are very slow, and may require exponential time.
.\" Work around problems with some troff -man implementations.
.br

View File

@ -58,6 +58,17 @@ static int show_help;
/* If non-zero, print the version on standard output and exit. */
static int show_version;
/* If nonzero, use mmap if possible. */
static int mmap_option;
/* Short options. */
static char const short_options[] =
#if HAVE_LIBZ > 0
"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
#else
"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
#endif
/* Long options equivalences. */
static struct option long_options[] =
{
@ -78,18 +89,23 @@ static struct option long_options[] =
{"ignore-case", no_argument, NULL, 'i'},
{"line-number", no_argument, NULL, 'n'},
{"line-regexp", no_argument, NULL, 'x'},
{"mmap", no_argument, &mmap_option, 1},
{"no-filename", no_argument, NULL, 'h'},
{"no-messages", no_argument, NULL, 's'},
#if HAVE_LIBZ > 0
{"null", no_argument, NULL, /*'Z'*/ 1},
#else
{"null", no_argument, NULL, 'Z'},
#endif
{"null-data", no_argument, NULL, 'z'},
{"quiet", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'},
{"regexp", required_argument, NULL, 'e'},
{"revert-match", no_argument, NULL, 'v'},
{"invert-match", no_argument, NULL, 'v'},
{"silent", no_argument, NULL, 'q'},
{"text", no_argument, NULL, 'a'},
#if O_BINARY
{"binary", no_argument, NULL, 'U'},
{"unix-byte-offsets", no_argument, NULL, 'u'},
#endif
{"version", no_argument, NULL, 'V'},
{"with-filename", no_argument, NULL, 'H'},
{"word-regexp", no_argument, NULL, 'w'},
@ -100,10 +116,13 @@ static struct option long_options[] =
};
/* Define flags declared in grep.h. */
/* I do not know why we need this decl, while if you build GNU grep 2.4 by
hand you don't... */
char const *matcher;
int match_icase;
int match_words;
int match_lines;
unsigned char eolbyte;
/* For error messages. */
static char *prog;
@ -121,7 +140,10 @@ static enum
static int ck_atoi PARAMS ((char const *, int *));
static void usage PARAMS ((int)) __attribute__((noreturn));
static void error PARAMS ((const char *, int));
static int setmatcher PARAMS ((char const *));
static void setmatcher PARAMS ((char const *));
static int install_matcher PARAMS ((char const *));
static int prepend_args PARAMS ((char const *, char *, char **));
static void prepend_default_options PARAMS ((char const *, int *, char ***));
static char *page_alloc PARAMS ((size_t, char **));
static int reset PARAMS ((int, char const *, struct stats *));
static int fillbuf PARAMS ((size_t, struct stats *));
@ -221,14 +243,15 @@ static char *ubuffer; /* Unaligned base of buffer. */
static char *buffer; /* Base of buffer. */
static size_t bufsalloc; /* Allocated size of buffer save region. */
static size_t bufalloc; /* Total buffer size. */
#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */
static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */
static size_t pagesize; /* alignment of memory pages */
static off_t bufoffset; /* Read offset; defined on regular files. */
#if defined(HAVE_MMAP)
static int bufmapped; /* True for ordinary files. */
static off_t bufoffset; /* What read() normally remembers. */
static int bufmapped; /* True if buffer is memory-mapped. */
static off_t initial_bufoffset; /* Initial value of bufoffset. */
#endif
@ -245,32 +268,26 @@ static int Zflag; /* uncompress before searching. */
? (val) \
: (val) + ((alignment) - (size_t) (val) % (alignment)))
/* Return the address of a new page-aligned buffer of size SIZE. Set
*UP to the newly allocated (but possibly unaligned) buffer used to
*build the aligned buffer. To free the buffer, free (*UP). */
/* Return the address of a page-aligned buffer of size SIZE,
reallocating it from *UP. Set *UP to the newly allocated (but
possibly unaligned) buffer used to build the aligned buffer. To
free the buffer, free (*UP). */
static char *
page_alloc (size, up)
size_t size;
char **up;
{
/* HAVE_WORKING_VALLOC means that valloc is properly declared, and
you can free the result of valloc. This symbol is not (yet)
autoconfigured. It can be useful to define HAVE_WORKING_VALLOC
while debugging, since some debugging memory allocators might
catch more bugs if this symbol is enabled. */
#if HAVE_WORKING_VALLOC
*up = valloc (size);
return *up;
#else
size_t asize = size + pagesize - 1;
if (size <= asize)
{
*up = malloc (asize);
if (*up)
return ALIGN_TO (*up, pagesize);
char *p = *up ? realloc (*up, asize) : malloc (asize);
if (p)
{
*up = p;
return ALIGN_TO (p, pagesize);
}
}
return NULL;
#endif
}
/* Reset the buffer for a new file, returning zero if we should skip it.
@ -281,7 +298,9 @@ reset (fd, file, stats)
char const *file;
struct stats *stats;
{
if (pagesize == 0)
if (pagesize)
bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
else
{
size_t ubufsalloc;
pagesize = getpagesize ();
@ -293,162 +312,212 @@ reset (fd, file, stats)
ubufsalloc = BUFSALLOC;
#endif
bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
bufalloc = 5 * bufsalloc;
bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
/* The 1 byte of overflow is a kludge for dfaexec(), which
inserts a sentinel newline at the end of the buffer
being searched. There's gotta be a better way... */
if (bufsalloc < ubufsalloc
|| bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
|| bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
|| bufalloc + 1 < bufalloc
|| ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
fatal (_("memory exhausted"), 0);
bufbeg = buffer;
buflim = buffer;
}
#if HAVE_LIBZ > 0
if (Zflag) {
if (Zflag)
{
gzbufdesc = gzdopen(fd, "r");
if (gzbufdesc == NULL)
fatal(_("memory exhausted"), 0);
}
}
#endif
buflim = buffer;
bufdesc = fd;
if (
#if defined(HAVE_MMAP)
1
#else
directories != READ_DIRECTORIES
#endif
)
if (fstat (fd, &stats->stat) != 0)
{
error ("fstat", errno);
return 0;
}
if (fstat (fd, &stats->stat) != 0)
{
error ("fstat", errno);
return 0;
}
if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
return 0;
#if defined(HAVE_MMAP)
if (
#if HAVE_LIBZ > 0
Zflag ||
#endif
!S_ISREG (stats->stat.st_mode))
bufmapped = 0;
S_ISREG (stats->stat.st_mode))
{
if (file)
bufoffset = 0;
else
{
bufoffset = lseek (fd, 0, SEEK_CUR);
if (bufoffset < 0)
{
error ("lseek", errno);
return 0;
}
}
#ifdef HAVE_MMAP
initial_bufoffset = bufoffset;
bufmapped = mmap_option && bufoffset % pagesize == 0;
#endif
}
else
{
bufmapped = 1;
bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1);
}
#ifdef HAVE_MMAP
bufmapped = 0;
#endif
}
return 1;
}
/* Read new stuff into the buffer, saving the specified
amount of old stuff. When we're done, 'bufbeg' points
to the beginning of the buffer contents, and 'buflim'
points just after the end. Return count of new stuff. */
points just after the end. Return zero if there's an error. */
static int
fillbuf (save, stats)
size_t save;
struct stats *stats;
{
int cc;
#if defined(HAVE_MMAP)
caddr_t maddr;
#endif
size_t fillsize = 0;
int cc = 1;
size_t readsize;
if (save > bufsalloc)
/* Offset from start of unaligned buffer to start of old stuff
that we want to save. */
size_t saved_offset = buflim - ubuffer - save;
if (bufsalloc < save)
{
char *nubuffer;
char *nbuffer;
size_t aligned_save = ALIGN_TO (save, pagesize);
size_t maxalloc = (size_t) -1;
size_t newalloc;
while (save > bufsalloc)
bufsalloc *= 2;
bufalloc = 5 * bufsalloc;
if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc
|| ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer)))
if (S_ISREG (stats->stat.st_mode))
{
/* Calculate an upper bound on how much memory we should allocate.
We can't use ALIGN_TO here, since off_t might be longer than
size_t. Watch out for arithmetic overflow. */
off_t to_be_read = stats->stat.st_size - bufoffset;
size_t slop = to_be_read % pagesize;
off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
off_t maxalloc_off = aligned_save + aligned_to_be_read;
if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
maxalloc = maxalloc_off;
}
/* Grow bufsalloc until it is at least as great as `save'; but
if there is an overflow, just grow it to the next page boundary. */
while (bufsalloc < save)
if (bufsalloc < bufsalloc * 2)
bufsalloc *= 2;
else
{
bufsalloc = aligned_save;
break;
}
/* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
bufsalloc.... */
newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
if (maxalloc < newalloc)
{
/* ... except don't grow it more than a pagesize past the
file size, as that might cause unnecessary memory
exhaustion if the file is large. */
newalloc = maxalloc;
bufsalloc = aligned_save;
}
/* Check that the above calculations made progress, which might
not occur if there is arithmetic overflow. If there's no
progress, or if the new buffer size is larger than the old
and buffer reallocation fails, report memory exhaustion. */
if (bufsalloc < save || newalloc < save
|| (newalloc == save && newalloc != maxalloc)
|| (bufalloc < newalloc
&& ! (buffer
= page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
fatal (_("memory exhausted"), 0);
}
bufbeg = nbuffer + bufsalloc - save;
memcpy (bufbeg, buflim - save, save);
free (ubuffer);
ubuffer = nubuffer;
buffer = nbuffer;
}
else
{
bufbeg = buffer + bufsalloc - save;
memcpy (bufbeg, buflim - save, save);
}
bufbeg = buffer + bufsalloc - save;
memmove (bufbeg, ubuffer + saved_offset, save);
readsize = bufalloc - bufsalloc;
#if defined(HAVE_MMAP)
if (bufmapped && bufoffset % pagesize == 0
&& stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
if (bufmapped)
{
maddr = buffer + bufsalloc;
maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset);
if (maddr == (caddr_t) -1)
size_t mmapsize = readsize;
/* Don't mmap past the end of the file; some hosts don't allow this.
Use `read' on the last page. */
if (stats->stat.st_size - bufoffset < mmapsize)
{
/* This used to issue a warning, but on some hosts
(e.g. Solaris 2.5) mmap can fail merely because some
other process has an advisory read lock on the file.
There's no point alarming the user about this misfeature. */
#if 0
fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
strerror (errno));
#endif
goto tryread;
mmapsize = stats->stat.st_size - bufoffset;
mmapsize -= mmapsize % pagesize;
}
#if 0
/* You might thing this (or MADV_WILLNEED) would help,
but it doesn't, at least not on a Sun running 4.1.
In fact, it actually slows us down about 30%! */
madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL);
#endif
cc = bufalloc - bufsalloc;
bufoffset += cc;
}
else
{
tryread:
/* We come here when we're not going to use mmap() any more.
Note that we need to synchronize the file offset the
first time through. */
if (bufmapped)
if (mmapsize
&& (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
bufdesc, bufoffset)
!= (caddr_t) -1))
{
bufmapped = 0;
if (bufoffset != initial_bufoffset)
lseek (bufdesc, bufoffset, 0);
/* Do not bother to use madvise with MADV_SEQUENTIAL or
MADV_WILLNEED on the mmapped memory. One might think it
would help, but it slows us down about 30% on SunOS 4.1. */
fillsize = mmapsize;
}
#if HAVE_LIBZ > 0
if (Zflag)
cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
else
#endif
cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
{
/* Stop using mmap on this file. Synchronize the file
offset. Do not warn about mmap failures. On some hosts
(e.g. Solaris 2.5) mmap can fail merely because some
other process has an advisory read lock on the file.
There's no point alarming the user about this misfeature. */
bufmapped = 0;
if (bufoffset != initial_bufoffset
&& lseek (bufdesc, bufoffset, SEEK_SET) < 0)
{
error ("lseek", errno);
cc = 0;
}
}
}
#else
#if HAVE_LIBZ > 0
if (Zflag)
cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
else
#endif
cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
#endif /*HAVE_MMAP*/
#if O_BINARY
if (cc > 0)
cc = undossify_input (buffer + bufsalloc, cc);
if (! fillsize)
{
ssize_t bytesread;
do
#if HAVE_LIBZ > 0
if (Zflag)
bytesread = gzread (gzbufdesc, buffer + bufsalloc, readsize);
else
#endif
if (cc > 0)
buflim = buffer + bufsalloc + cc;
else
buflim = buffer + bufsalloc;
bytesread = read (bufdesc, buffer + bufsalloc, readsize);
while (bytesread < 0 && errno == EINTR);
if (bytesread < 0)
cc = 0;
else
fillsize = bytesread;
}
bufoffset += fillsize;
#if O_BINARY
if (fillsize)
fillsize = undossify_input (buffer + bufsalloc, fillsize);
#endif
buflim = buffer + bufsalloc + fillsize;
return cc;
}
/* Flags controlling the style of output. */
static int always_text; /* Assume the input is always text. */
static int filename_mask; /* If zero, output nulls after filenames. */
static int out_quiet; /* Suppress all normal output. */
static int out_invert; /* Print nonmatching stuff. */
static int out_file; /* Print filenames. */
@ -480,11 +549,9 @@ nlscan (lim)
char *lim;
{
char *beg;
for (beg = lastnl; beg < lim; ++beg)
if (*beg == '\n')
++totalnl;
lastnl = beg;
for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++)
totalnl++;
lastnl = lim;
}
static void
@ -513,7 +580,7 @@ prline (beg, lim, sep)
int sep;
{
if (out_file)
printf ("%s%c", filename, sep);
printf ("%s%c", filename, sep & filename_mask);
if (out_line)
{
nlscan (beg);
@ -546,7 +613,7 @@ prpending (lim)
while (pending > 0 && lastout < lim)
{
--pending;
if ((nl = memchr (lastout, '\n', lim - lastout)) != 0)
if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
++nl;
else
nl = lim;
@ -564,6 +631,7 @@ prtext (beg, lim, nlinesp)
{
static int used; /* avoid printing "--" before any output */
char *bp, *p, *nl;
char eol = eolbyte;
int i, n;
if (!out_quiet && pending > 0)
@ -580,7 +648,7 @@ prtext (beg, lim, nlinesp)
if (p > bp)
do
--p;
while (p > bp && p[-1] != '\n');
while (p > bp && p[-1] != eol);
/* We only print the "--" separator if our output is
discontiguous from the last output in the file. */
@ -589,7 +657,7 @@ prtext (beg, lim, nlinesp)
while (p < beg)
{
nl = memchr (p, '\n', beg - p);
nl = memchr (p, eol, beg - p);
prline (p, nl + 1, '-');
p = nl + 1;
}
@ -600,7 +668,7 @@ prtext (beg, lim, nlinesp)
/* Caller wants a line count. */
for (n = 0; p < lim; ++n)
{
if ((nl = memchr (p, '\n', lim - p)) != 0)
if ((nl = memchr (p, eol, lim - p)) != 0)
++nl;
else
nl = lim;
@ -614,7 +682,7 @@ prtext (beg, lim, nlinesp)
if (!out_quiet)
prline (beg, lim, ':');
pending = out_after;
pending = out_quiet ? 0 : out_after;
used = 1;
}
@ -629,13 +697,14 @@ grepbuf (beg, lim)
int nlines, n;
register char *p, *b;
char *endp;
char eol = eolbyte;
nlines = 0;
p = beg;
while ((b = (*execute)(p, lim - p, &endp)) != 0)
{
/* Avoid matching the empty line at the end of the buffer. */
if (b == lim && ((b > beg && b[-1] == '\n') || b == beg))
if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
break;
if (!out_invert)
{
@ -672,6 +741,7 @@ grep (fd, file, stats)
int not_text;
size_t residue, save;
char *beg, *lim;
char eol = eolbyte;
if (!reset (fd, file, stats))
return 0;
@ -700,7 +770,7 @@ grep (fd, file, stats)
residue = 0;
save = 0;
if (fillbuf (save, stats) < 0)
if (! fillbuf (save, stats))
{
if (! (is_EISDIR (errno, file) && suppress_errors))
error (filename, errno);
@ -708,7 +778,7 @@ grep (fd, file, stats)
}
not_text = (! (always_text | out_quiet)
&& memchr (bufbeg, '\0', buflim - bufbeg));
&& memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
done_on_match += not_text;
out_quiet += not_text;
@ -720,7 +790,7 @@ grep (fd, file, stats)
if (buflim - bufbeg == save)
break;
beg = bufbeg + save - residue;
for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim)
for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
;
residue = buflim - lim;
if (beg < lim)
@ -738,7 +808,7 @@ grep (fd, file, stats)
++i;
do
--beg;
while (beg > bufbeg && beg[-1] != '\n');
while (beg > bufbeg && beg[-1] != eol);
}
if (beg != lastout)
lastout = 0;
@ -746,7 +816,7 @@ grep (fd, file, stats)
totalcc += buflim - bufbeg - save;
if (out_line)
nlscan (beg);
if (fillbuf (save, stats) < 0)
if (! fillbuf (save, stats))
{
if (! (is_EISDIR (errno, file) && suppress_errors))
error (filename, errno);
@ -784,7 +854,8 @@ grepfile (file, stats)
}
else
{
desc = open (file, O_RDONLY);
while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
continue;
if (desc < 0)
{
@ -843,30 +914,26 @@ grepfile (file, stats)
if (count_matches)
{
if (out_file)
printf ("%s:", filename);
printf ("%s%c", filename, ':' & filename_mask);
printf ("%d\n", count);
}
if (count)
{
status = 0;
if (list_files == 1)
printf ("%s\n", filename);
}
else
{
status = 1;
if (list_files == -1)
printf ("%s\n", filename);
}
status = !count;
if (list_files == 1 - 2 * status)
printf ("%s%c", filename, '\n' & filename_mask);
#if HAVE_LIBZ > 0
if (Zflag)
gzclose(gzbufdesc);
else
#endif
if (file && close (desc) != 0)
error (file, errno);
if (file)
while (close (desc) != 0)
if (errno != EINTR)
{
error (file, errno);
break;
}
}
return status;
@ -882,8 +949,8 @@ grepdir (dir, stats)
char *name_space;
for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino)
| (ancestor->stat.st_dev ^ stats->stat.st_dev)))
if (ancestor->stat.st_ino == stats->stat.st_ino
&& ancestor->stat.st_dev == stats->stat.st_dev)
{
if (!suppress_errors)
fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
@ -946,24 +1013,29 @@ int status;
printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
printf (_("\
Search for PATTERN in each FILE or standard input.\n\
Example: %s -i 'hello.*world' menu.h main.c\n\
\n\
Regexp selection and interpretation:\n\
Regexp selection and interpretation:\n"), prog);
printf (_("\
-E, --extended-regexp PATTERN is an extended regular expression\n\
-F, --fixed-regexp PATTERN is a fixed string separated by newlines\n\
-G, --basic-regexp PATTERN is a basic regular expression\n\
-F, --fixed-strings PATTERN is a set of newline-separated strings\n\
-G, --basic-regexp PATTERN is a basic regular expression\n"));
printf (_("\
-e, --regexp=PATTERN use PATTERN as a regular expression\n\
-f, --file=FILE obtain PATTERN from FILE\n\
-i, --ignore-case ignore case distinctions\n\
-w, --word-regexp force PATTERN to match only whole words\n\
-x, --line-regexp force PATTERN to match only whole lines\n"));
-x, --line-regexp force PATTERN to match only whole lines\n\
-z, --null-data a data line ends in 0 byte, not newline\n"));
printf (_("\
\n\
Miscellaneous:\n\
-s, --no-messages suppress error messages\n\
-v, --revert-match select non-matching lines\n\
-v, --invert-match select non-matching lines\n\
-V, --version print version information and exit\n\
--help display this help and exit\n\
-Z, --decompress decompress input before searching (HAVE_LIBZ=1)\n\
--help display this help and exit\n"));
--mmap use memory-mapped input if possible\n"));
printf (_("\
\n\
Output control:\n\
@ -978,31 +1050,42 @@ Output control:\n\
-r, --recursive equivalent to --directories=recurse.\n\
-L, --files-without-match only print FILE names containing no match\n\
-l, --files-with-matches only print FILE names containing matches\n\
-c, --count only print a count of matching lines per FILE\n"));
-c, --count only print a count of matching lines per FILE\n\
--null print 0 byte after FILE name\n"));
printf (_("\
\n\
Context control:\n\
-B, --before-context=NUM print NUM lines of leading context\n\
-A, --after-context=NUM print NUM lines of trailing context\n\
-C, --context[=NUM] print NUM (default 2) lines of output context\n\
unless overriden by -A or -B\n\
unless overridden by -A or -B\n\
-NUM same as --context=NUM\n\
-U, --binary do not strip CR characters at EOL (MSDOS)\n\
-u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\
\n\
If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\
With no FILE, or when FILE is -, read standard input. If less than\n\
two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\
Exit with 2 if syntax errors or system errors.\n"));
`egrep' means `grep -E'. `fgrep' means `grep -F'.\n\
With no FILE, or when FILE is -, read standard input. If less than\n\
two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\
and 2 if trouble.\n"));
printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
}
exit (status);
}
/* Set the matcher to M, reporting any conflicts. */
static void
setmatcher (m)
char const *m;
{
if (matcher && strcmp (matcher, m) != 0)
fatal (_("conflicting matchers specified"), 0);
matcher = m;
}
/* Go through the matchers vector and look for the specified matcher.
If we find it, install it in compile and execute, and return 1. */
static int
setmatcher (name)
install_matcher (name)
char const *name;
{
int i;
@ -1158,7 +1241,8 @@ main (argc, argv)
keys = NULL;
keycc = 0;
with_filenames = 0;
matcher = NULL;
eolbyte = '\n';
filename_mask = ~0;
/* The value -1 means to use DEFAULT_CONTEXT. */
out_after = out_before = -1;
@ -1179,15 +1263,8 @@ main (argc, argv)
prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
while ((opt = getopt_long (argc, argv,
#if O_BINARY
"0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu",
#elif HAVE_LIBZ > 0
"0123456789A:B:C::EFGHRVX:Zabcd:e:f:hiLlnqrsvwxy",
#else
"0123456789A:B:C::EFGHRVX:abcd:e:f:hiLlnqrsvwxy",
#endif
long_options, NULL)) != EOF)
while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
!= -1)
switch (opt)
{
case '0':
@ -1229,44 +1306,33 @@ main (argc, argv)
default_context = 2;
break;
case 'E':
if (matcher && strcmp (matcher, "posix-egrep") != 0)
fatal (_("you may specify only one of -E, -F, or -G"), 0);
matcher = "posix-egrep";
setmatcher ("egrep");
break;
case 'F':
if (matcher && strcmp(matcher, "fgrep") != 0)
fatal(_("you may specify only one of -E, -F, or -G"), 0);;
matcher = "fgrep";
setmatcher ("fgrep");
break;
case 'G':
if (matcher && strcmp (matcher, "grep") != 0)
fatal (_("you may specify only one of -E, -F, or -G"), 0);
matcher = "grep";
setmatcher ("grep");
break;
case 'H':
with_filenames = 1;
break;
#if O_BINARY
case 'U':
#if O_BINARY
dos_use_file_type = DOS_BINARY;
#endif
break;
case 'u':
#if O_BINARY
dos_report_unix_offset = 1;
break;
#endif
break;
case 'V':
show_version = 1;
break;
case 'X':
if (matcher)
fatal (_("matcher already specified"), 0);
matcher = optarg;
setmatcher (optarg);
break;
#if HAVE_LIBZ > 0
case 'Z':
Zflag = 1;
break;
#endif
case 'a':
always_text = 1;
break;
@ -1357,6 +1423,16 @@ main (argc, argv)
case 'x':
match_lines = 1;
break;
case 'Z':
#if HAVE_LIBZ > 0
Zflag = 1;
#else
filename_mask = 0;
#endif
break;
case 'z':
eolbyte = '\0';
break;
case 0:
/* long options */
break;
@ -1370,9 +1446,12 @@ main (argc, argv)
if (out_before < 0)
out_before = default_context;
if (! matcher)
matcher = prog;
if (show_version)
{
printf (_("grep (GNU grep) %s\n"), VERSION);
printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
printf ("\n");
printf (_("\
Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
@ -1404,10 +1483,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
else
usage (2);
if (! matcher)
matcher = prog;
if (!setmatcher (matcher) && !setmatcher ("default"))
if (!install_matcher (matcher) && !install_matcher ("default"))
abort ();
(*compile)(keys, keycc);

View File

@ -37,14 +37,12 @@ extern struct matcher
char *(*execute) PARAMS ((char *, size_t, char **));
} matchers[];
/* Exported from grep.c. */
extern char const *matcher;
/* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */
extern char const default_matcher[];
extern char const *matcher;
/* The following flags are exported from grep for the matchers
to look at. */
extern int match_icase; /* -i */
extern int match_words; /* -w */
extern int match_lines; /* -x */
extern unsigned char eolbyte; /* -z */

View File

@ -48,7 +48,6 @@ struct matcher matchers[] = {
{ "default", Gcompile, EGexecute },
{ "grep", Gcompile, EGexecute },
{ "egrep", Ecompile, EGexecute },
{ "posix-egrep", Ecompile, EGexecute },
{ "awk", Ecompile, EGexecute },
{ "fgrep", Fcompile, Fexecute },
{ 0, 0, 0 },
@ -61,7 +60,7 @@ struct matcher matchers[] = {
static struct dfa dfa;
/* Regex compiled regexp. */
static struct re_pattern_buffer regex;
static struct re_pattern_buffer regexbuf;
/* KWset compiled pattern. For Ecompile and Gcompile, we compile
a list of strings, at least one of which is known to occur in
@ -140,9 +139,9 @@ Gcompile(pattern, size)
const char *err;
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
fatal(err, 0);
/* In the match_words and match_lines cases, we use a different pattern
@ -155,7 +154,8 @@ Gcompile(pattern, size)
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
In the whole-line case, we use the pattern:
^(userpattern)$.
BUG: Using [A-Za-z_] is locale-dependent! */
BUG: Using [A-Za-z_] is locale-dependent!
So will use [:alnum:] */
char *n = malloc(size + 50);
int i = 0;
@ -165,14 +165,14 @@ Gcompile(pattern, size)
if (match_lines)
strcpy(n, "^\\(");
if (match_words)
strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\(");
i = strlen(n);
memcpy(n + i, pattern, size);
i += size;
if (match_words)
strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)");
strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)");
if (match_lines)
strcpy(n + i, "\\)$");
@ -192,23 +192,18 @@ Ecompile(pattern, size)
{
const char *err;
if (strcmp(matcher, "posix-egrep") == 0)
{
re_set_syntax(RE_SYNTAX_POSIX_EGREP);
dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
}
else if (strcmp(matcher, "awk") == 0)
if (strcmp(matcher, "awk") == 0)
{
re_set_syntax(RE_SYNTAX_AWK);
dfasyntax(RE_SYNTAX_AWK, match_icase);
dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte);
}
else
{
re_set_syntax(RE_SYNTAX_EGREP);
dfasyntax(RE_SYNTAX_EGREP, match_icase);
re_set_syntax (RE_SYNTAX_POSIX_EGREP);
dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
}
if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
fatal(err, 0);
/* In the match_words and match_lines cases, we use a different pattern
@ -221,7 +216,8 @@ Ecompile(pattern, size)
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
In the whole-line case, we use the pattern:
^(userpattern)$.
BUG: Using [A-Za-z_] is locale-dependent! */
BUG: Using [A-Za-z_] is locale-dependent!
so will use the char class */
char *n = malloc(size + 50);
int i = 0;
@ -231,14 +227,14 @@ Ecompile(pattern, size)
if (match_lines)
strcpy(n, "^(");
if (match_words)
strcpy(n, "(^|[^0-9A-Za-z_])(");
strcpy(n, "(^|[^[:alnum:]_])(");
i = strlen(n);
memcpy(n + i, pattern, size);
i += size;
if (match_words)
strcpy(n + i, ")([^0-9A-Za-z_]|$)");
strcpy(n + i, ")([^[:alnum:]_]|$)");
if (match_lines)
strcpy(n + i, ")$");
@ -258,6 +254,7 @@ EGexecute(buf, size, endp)
char **endp;
{
register char *buflim, *beg, *end, save;
char eol = eolbyte;
int backref, start, len;
struct kwsmatch kwsm;
static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
@ -275,10 +272,10 @@ EGexecute(buf, size, endp)
goto failure;
/* Narrow down to the line containing the candidate, and
run it through DFA. */
end = memchr(beg, '\n', buflim - beg);
end = memchr(beg, eol, buflim - beg);
if (!end)
end = buflim;
while (beg > buf && beg[-1] != '\n')
while (beg > buf && beg[-1] != eol)
--beg;
save = *end;
if (kwsm.index < lastexact)
@ -302,10 +299,10 @@ EGexecute(buf, size, endp)
if (!beg)
goto failure;
/* Narrow down to the line we've found. */
end = memchr(beg, '\n', buflim - beg);
end = memchr(beg, eol, buflim - beg);
if (!end)
end = buflim;
while (beg > buf && beg[-1] != '\n')
while (beg > buf && beg[-1] != eol)
--beg;
/* Successful, no backreferences encountered! */
if (!backref)
@ -313,8 +310,8 @@ EGexecute(buf, size, endp)
}
/* If we've made it to this point, this means DFA has seen
a probable match, and we need to run it through Regex. */
regex.not_eol = 0;
if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0)
regexbuf.not_eol = 0;
if ((start = re_search(&regexbuf, beg, end - beg, 0, end - beg, &regs)) >= 0)
{
len = regs.end[0] - start;
if ((!match_lines && !match_words)
@ -337,8 +334,8 @@ EGexecute(buf, size, endp)
{
/* Try a shorter length anchored at the same place. */
--len;
regex.not_eol = 1;
len = re_match(&regex, beg, start + len, start, &regs);
regexbuf.not_eol = 1;
len = re_match(&regexbuf, beg, start + len, start, &regs);
}
if (len <= 0)
{
@ -346,8 +343,8 @@ EGexecute(buf, size, endp)
if (start == end - beg)
break;
++start;
regex.not_eol = 0;
start = re_search(&regex, beg, end - beg,
regexbuf.not_eol = 0;
start = re_search(&regexbuf, beg, end - beg,
start, end - beg - start, &regs);
len = regs.end[0] - start;
}
@ -396,6 +393,7 @@ Fexecute(buf, size, endp)
{
register char *beg, *try, *end;
register size_t len;
char eol = eolbyte;
struct kwsmatch kwsmatch;
for (beg = buf; beg <= buf + size; ++beg)
@ -405,9 +403,9 @@ Fexecute(buf, size, endp)
len = kwsmatch.size[0];
if (match_lines)
{
if (beg > buf && beg[-1] != '\n')
if (beg > buf && beg[-1] != eol)
continue;
if (beg + len < buf + size && beg[len] != '\n')
if (beg + len < buf + size && beg[len] != eol)
continue;
goto success;
}
@ -431,7 +429,7 @@ Fexecute(buf, size, endp)
return 0;
success:
if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0)
if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0)
++end;
else
end = buf + size;