Merge FreeBSD chagnes into GNU grep 2.4.

This commit is contained in:
David E. O'Brien 2000-01-04 03:25:40 +00:00
parent e3bfb27984
commit 7e5b33c6cd
6 changed files with 558 additions and 347 deletions

View File

@ -353,15 +353,20 @@ static reg_syntax_t syntax_bits, syntax_bits_set;
/* Flag for case-folding letters into sets. */ /* Flag for case-folding letters into sets. */
static int case_fold; static int case_fold;
/* End-of-line byte in data. */
static unsigned char eolbyte;
/* Entry point to set syntax options. */ /* Entry point to set syntax options. */
void void
dfasyntax(bits, fold) dfasyntax(bits, fold, eol)
reg_syntax_t bits; reg_syntax_t bits;
int fold; int fold;
int eol;
{ {
syntax_bits_set = 1; syntax_bits_set = 1;
syntax_bits = bits; syntax_bits = bits;
case_fold = fold; case_fold = fold;
eolbyte = eol;
} }
/* Lexical analyzer. All the dross that deals with the obnoxious /* Lexical analyzer. All the dross that deals with the obnoxious
@ -580,11 +585,32 @@ lex()
goto normal_char; goto normal_char;
if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
goto normal_char; goto normal_char;
minrep = maxrep = 0; if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
goto normal_char;
if (syntax_bits & RE_NO_BK_BRACES)
{
/* Scan ahead for a valid interval; if it's not valid,
treat it as a literal '{'. */
int lo = -1, hi = -1;
char const *p = lexptr;
char const *lim = p + lexleft;
for (; p != lim && ISDIGIT (*p); p++)
lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
if (p != lim && *p == ',')
while (++p != lim && ISDIGIT (*p))
hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
else
hi = lo;
if (p == lim || *p != '}'
|| lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo))
goto normal_char;
}
minrep = 0;
/* Cases: /* Cases:
{M} - exact count {M} - exact count
{M,} - minimum count, maximum is infinity {M,} - minimum count, maximum is infinity
{,M} - 0 through M
{M,N} - M through N */ {M,N} - M through N */
FETCH(c, _("unfinished repeat count")); FETCH(c, _("unfinished repeat count"));
if (ISDIGIT(c)) if (ISDIGIT(c))
@ -598,16 +624,27 @@ lex()
minrep = 10 * minrep + c - '0'; minrep = 10 * minrep + c - '0';
} }
} }
else if (c != ',') else
dfaerror(_("malformed repeat count")); dfaerror(_("malformed repeat count"));
if (c == ',') if (c == ',')
for (;;) {
{ FETCH (c, _("unfinished repeat count"));
FETCH(c, _("unfinished repeat count")); if (! ISDIGIT (c))
if (!ISDIGIT(c)) maxrep = -1;
break; else
maxrep = 10 * maxrep + c - '0'; {
} maxrep = c - '0';
for (;;)
{
FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c))
break;
maxrep = 10 * maxrep + c - '0';
}
if (0 <= maxrep && maxrep < minrep)
dfaerror (_("malformed repeat count"));
}
}
else else
maxrep = minrep; maxrep = minrep;
if (!(syntax_bits & RE_NO_BK_BRACES)) if (!(syntax_bits & RE_NO_BK_BRACES))
@ -659,7 +696,7 @@ lex()
zeroset(ccl); zeroset(ccl);
notset(ccl); notset(ccl);
if (!(syntax_bits & RE_DOT_NEWLINE)) if (!(syntax_bits & RE_DOT_NEWLINE))
clrbit('\n', ccl); clrbit(eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL) if (syntax_bits & RE_DOT_NOT_NULL)
clrbit('\0', ccl); clrbit('\0', ccl);
laststart = 0; laststart = 0;
@ -776,7 +813,7 @@ lex()
{ {
notset(ccl); notset(ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit('\n', ccl); clrbit(eolbyte, ccl);
} }
laststart = 0; laststart = 0;
return lasttok = CSET + charclass_index(ccl); return lasttok = CSET + charclass_index(ccl);
@ -942,7 +979,7 @@ closure()
{ {
ntokens = nsubtoks(dfa->tindex); ntokens = nsubtoks(dfa->tindex);
tindex = dfa->tindex - ntokens; tindex = dfa->tindex - ntokens;
if (maxrep == 0) if (maxrep < 0)
addtok(PLUS); addtok(PLUS);
if (minrep == 0) if (minrep == 0)
addtok(QMARK); addtok(QMARK);
@ -1605,7 +1642,7 @@ dfastate(s, d, trans)
for (i = 0; i < NOTCHAR; ++i) for (i = 0; i < NOTCHAR; ++i)
if (IS_WORD_CONSTITUENT(i)) if (IS_WORD_CONSTITUENT(i))
setbit(i, letters); setbit(i, letters);
setbit('\n', newline); setbit(eolbyte, newline);
} }
zeroset(matches); zeroset(matches);
@ -1626,7 +1663,7 @@ dfastate(s, d, trans)
{ {
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 1)) d->states[s].newline, 1))
clrbit('\n', matches); clrbit(eolbyte, matches);
if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, if (! MATCHES_NEWLINE_CONTEXT(pos.constraint,
d->states[s].newline, 0)) d->states[s].newline, 0))
for (j = 0; j < CHARCLASS_INTS; ++j) for (j = 0; j < CHARCLASS_INTS; ++j)
@ -1737,7 +1774,7 @@ dfastate(s, d, trans)
state_letter = state; state_letter = state;
for (i = 0; i < NOTCHAR; ++i) for (i = 0; i < NOTCHAR; ++i)
trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state; trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
trans['\n'] = state_newline; trans[eolbyte] = state_newline;
} }
else else
for (i = 0; i < NOTCHAR; ++i) for (i = 0; i < NOTCHAR; ++i)
@ -1761,7 +1798,7 @@ dfastate(s, d, trans)
/* Find out if the new state will want any context information. */ /* Find out if the new state will want any context information. */
wants_newline = 0; wants_newline = 0;
if (tstbit('\n', labels[i])) if (tstbit(eolbyte, labels[i]))
for (j = 0; j < follows.nelem; ++j) for (j = 0; j < follows.nelem; ++j)
if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
wants_newline = 1; wants_newline = 1;
@ -1793,7 +1830,7 @@ dfastate(s, d, trans)
{ {
int c = j * INTBITS + k; int c = j * INTBITS + k;
if (c == '\n') if (c == eolbyte)
trans[c] = state_newline; trans[c] = state_newline;
else if (IS_WORD_CONSTITUENT(c)) else if (IS_WORD_CONSTITUENT(c))
trans[c] = state_letter; trans[c] = state_letter;
@ -1884,8 +1921,8 @@ build_state(s, d)
/* Keep the newline transition in a special place so we can use it as /* Keep the newline transition in a special place so we can use it as
a sentinel. */ a sentinel. */
d->newlines[s] = trans['\n']; d->newlines[s] = trans[eolbyte];
trans['\n'] = -1; trans[eolbyte] = -1;
if (ACCEPTING(s, *d)) if (ACCEPTING(s, *d))
d->fails[s] = trans; d->fails[s] = trans;
@ -1933,6 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
register unsigned char *p; /* Current input character. */ register unsigned char *p; /* Current input character. */
register int **trans, *t; /* Copy of d->trans so it can be optimized register int **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */ into a register. */
register unsigned char eol = eolbyte; /* Likewise for eolbyte. */
static int sbit[NOTCHAR]; /* Table for anding with d->success. */ static int sbit[NOTCHAR]; /* Table for anding with d->success. */
static int sbit_init; static int sbit_init;
@ -1943,7 +1981,7 @@ dfaexec(d, begin, end, newline, count, backref)
sbit_init = 1; sbit_init = 1;
for (i = 0; i < NOTCHAR; ++i) for (i = 0; i < NOTCHAR; ++i)
sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1; sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
sbit['\n'] = 4; sbit[eol] = 4;
} }
if (! d->tralloc) if (! d->tralloc)
@ -1952,7 +1990,7 @@ dfaexec(d, begin, end, newline, count, backref)
s = s1 = 0; s = s1 = 0;
p = (unsigned char *) begin; p = (unsigned char *) begin;
trans = d->trans; trans = d->trans;
*end = '\n'; *end = eol;
for (;;) for (;;)
{ {
@ -1980,7 +2018,7 @@ dfaexec(d, begin, end, newline, count, backref)
} }
/* If the previous character was a newline, count it. */ /* If the previous character was a newline, count it. */
if (count && (char *) p <= end && p[-1] == '\n') if (count && (char *) p <= end && p[-1] == eol)
++*count; ++*count;
/* Check if we've run off the end of the buffer. */ /* Check if we've run off the end of the buffer. */
@ -1994,7 +2032,7 @@ dfaexec(d, begin, end, newline, count, backref)
continue; continue;
} }
if (p[-1] == '\n' && newline) if (p[-1] == eol && newline)
{ {
s = d->newlines[s1]; s = d->newlines[s1];
continue; continue;

View File

@ -322,9 +322,10 @@ struct dfa
/* Entry points. */ /* Entry points. */
/* dfasyntax() takes two arguments; the first sets the syntax bits described /* dfasyntax() takes three arguments; the first sets the syntax bits described
earlier in this file, and the second sets the case-folding flag. */ earlier in this file, the second sets the case-folding flag, and the
extern void dfasyntax PARAMS ((reg_syntax_t, int)); third specifies the line terminator. */
extern void dfasyntax PARAMS ((reg_syntax_t, int, int));
/* Compile the given string of the given length into the given struct dfa. /* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an Final argument is a flag specifying whether to build a searching or an

View File

@ -1,26 +1,68 @@
.\" grep man page .\" grep man page
.\" $FreeBSD$ .\" $FreeBSD$
.if !\n(.g \{\
. if !\w|\*(lq| \{\
. ds lq ``
. if \w'\(lq' .ds lq "\(lq
. \}
. if !\w|\*(rq| \{\
. ds rq ''
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
.de Id .de Id
.ds Dt \\$4 .ds Dt \\$4
.. ..
.Id $Id: grep.1,v 1.1 1998/11/22 06:45:20 alainm Exp $ .Id $Id: grep.1,v 1.7 1999/10/12 20:41:01 alainm Exp $
.TH GREP 1 \*(Dt "GNU Project" .TH GREP 1 \*(Dt "GNU Project"
.SH NAME .SH NAME
grep, egrep, fgrep, zgrep \- print lines matching a pattern grep, egrep, fgrep, zgrep \- print lines matching a pattern
.SH SYNOPSIS .SH SYNOPSIS
.B grep .B grep
[-[AB] NUM] [-CEFGVZabchiLlnqrsvwxyUu] [-e PATTERN | -f FILE] .RB [ \- [ ABC ]
[-d ACTION] [--directories=ACTION] .IR NUM ]
[--extended-regexp] [--fixed-strings] [--basic-regexp] .RB [ \-EFGHLUVZabchilnqrsuvwxyuz ]
[--regexp=PATTERN] [--file=FILE] [--ignore-case] [--word-regexp] .RB [ \-e
[--line-regexp] [--line-regexp] [--no-messages] [--revert-match] .I PATTERN
[--version] [--help] [--byte-offset] [--line-number] |
[--with-filename] [--no-filename] [--quiet] [--silent] [--text] .B \-f
[--files-without-match] [--files-with-matcces] [--count] .IR FILE ]
[--before-context=NUM] [--after-context=NUM] [--context] .RB [ \-d
[--binary] [--unix-byte-offsets] [--recursive] .IR ACTION ]
[--decompress] .RB [ \-\^\-directories=\fIACTION\fP ]
.I files... .RB [ \-\^\-extended-regexp ]
.RB [ \-\^\-fixed-strings ]
.RB [ \-\^\-basic-regexp ]
.RB [ \-\^\-regexp=\fIPATTERN\fP ]
.RB [ \-\^\-file=\fIFILE\fP ]
.RB [ \-\^\-ignore-case ]
.RB [ \-\^\-word-regexp ]
.RB [ \-\^\-line-regexp ]
.RB [ \-\^\-line-regexp ]
.RB [ \-\^\-no-messages ]
.RB [ \-\^\-invert-match ]
.RB [ \-\^\-version ]
.RB [ \-\^\-help ]
.RB [ \-\^\-byte-offset ]
.RB [ \-\^\-line-number ]
.RB [ \-\^\-with-filename ]
.RB [ \-\^\-no-filename ]
.RB [ \-\^\-quiet ]
.RB [ \-\^\-silent ]
.RB [ \-\^\-text ]
.RB [ \-\^\-files-without-match ]
.RB [ \-\^\-files-with-matches ]
.RB [ \-\^\-count ]
.RB [ \-\^\-before-context=\fINUM\fP ]
.RB [ \-\^\-after-context=\fINUM\fP ]
.RB [ \-\^\-context [ =\fINUM\fP ]]
.RB [ \-\^\-binary ]
.RB [ \-\^\-unix-byte-offsets ]
.RB [ \-\^\-mmap ]
.RB [ \-\^\-null ]
.RB [ \-\^\-recursive ]
.RB [ \-\^\-decompress ]
.RI [ file .\|.\|.]
.SH DESCRIPTION .SH DESCRIPTION
.PP .PP
.B grep .B grep
@ -41,83 +83,83 @@ There are three major variants of
controlled by the following options. controlled by the following options.
.PD 0 .PD 0
.TP .TP
.B \-G, --basic-regexp .BR \-G ", " \-\^\-basic-regexp
Interpret Interpret
.I pattern .I pattern
as a basic regular expression (see below). This is the default. as a basic regular expression (see below). This is the default.
.TP .TP
.B \-E, --extended-regexp .BR \-E ", " \-\^\-extended-regexp
Interpret Interpret
.I pattern .I pattern
as an extended regular expression (see below). as an extended regular expression (see below).
.TP .TP
.B \-F, --fixed-strings .BR \-F ", " \-\^\-fixed-strings
Interpret Interpret
.I pattern .I pattern
as a list of fixed strings, separated by newlines, as a list of fixed strings, separated by newlines,
any of which is to be matched. any of which is to be matched.
.LP .PP
In addition, two variant programs In addition, two variant programs
.B egrep .B egrep
and and
.B fgrep .B fgrep
are available. are available.
.B egrep .B egrep
is similar (but not identical) to is the same as
.BR "grep\ \-E" , .BR "grep\ \-E" .
and is compatible with the historical Unix
.BR egrep .
.B fgrep .B fgrep
is the same as is the same as
.BR "grep\ \-F" . .BR "grep\ \-F" .
.B zgrep .B zgrep
is the same as is the same as
.BR "grep\ \-Z" . .BR "grep\ \-z" .
.PD .PD
.LP .PP
All variants of All variants of
.B grep .B grep
understand the following options: understand the following options:
.PD 0 .PD 0
.TP .TP
.BI \-A " NUM" ", --after-context=" NUM .BI \-A " NUM" "\fR,\fP \-\^\-after-context=" NUM
Print Print
.I NUM .I NUM
lines of trailing context after matching lines. lines of trailing context after matching lines.
.TP .TP
.BI \-B " NUM" ", --before-context=" NUM .BI \-B " NUM" "\fR,\fP \-\^\-before-context=" NUM
Print Print
.I NUM .I NUM
lines of leading context before matching lines. lines of leading context before matching lines.
.TP .TP
.BI \-C ,\ --context"[=NUM]" .BI \-C " \fR[\fPNUM\fR]\fP" "\fR,\fP \-\^\-context\fR[\fP=" NUM\fR]\fP
Print Print
.I NUM .I NUM
lines (default 2) of output context. lines (default 2) of output context.
.TP .TP
.BI \- NUM \ .BI \- NUM
Same as --context=NUM lines of leading and trailing context. However, Same as
.BI \-\^\-context= NUM
lines of leading and trailing context. However,
.B grep .B grep
will never print any given line more than once. will never print any given line more than once.
.TP .TP
.B \-V, --version .BR \-V ", " \-\^\-version
Print the version number of Print the version number of
.B grep .B grep
to standard error. This version number should to standard error. This version number should
be included in all bug reports (see below). be included in all bug reports (see below).
.TP .TP
.B \-b, --byte-offset .BR \-b ", " \-\^\-byte-offset
Print the byte offset within the input file before Print the byte offset within the input file before
each line of output. each line of output.
.TP .TP
.B \-c, --count .BR \-c ", " \-\^\-count
Suppress normal output; instead print a count of Suppress normal output; instead print a count of
matching lines for each input file. matching lines for each input file.
With the With the
.B \-v, --revert-match .BR \-v ", " \-\^\-invert-match
option (see below), count non-matching lines. option (see below), count non-matching lines.
.TP .TP
.BI \-d " ACTION" ", --directories=" ACTION .BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION
If an input file is a directory, use If an input file is a directory, use
.I ACTION .I ACTION
to process it. By default, to process it. By default,
@ -140,75 +182,78 @@ this is equivalent to the
.B \-r .B \-r
option. option.
.TP .TP
.BI \-e " PATTERN" ", --regexp=" PATTERN .BI \-e " PATTERN" "\fR,\fP \-\^\-regexp=" PATTERN
Use Use
.I PATTERN .I PATTERN
as the pattern; useful to protect patterns beginning with as the pattern; useful to protect patterns beginning with
.BR \- . .BR \- .
.TP .TP
.BI \-f " FILE" ", --file=" FILE .BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE
Obtain patterns from Obtain patterns from
.IR FILE , .IR FILE ,
one per line. one per line.
The empty file contains zero patterns, and therfore matches nothing. The empty file contains zero patterns, and therfore matches nothing.
.TP .TP
.B \-h, --no-filename .BR \-H ", " \-\^\-with-filename
Print the filename for each match.
.TP
.BR \-h ", " \-\^\-no-filename
Suppress the prefixing of filenames on output Suppress the prefixing of filenames on output
when multiple files are searched. when multiple files are searched.
.TP .TP
.B \-i, --ignore-case .BR \-i ", " \-\^\-ignore-case
Ignore case distinctions in both the Ignore case distinctions in both the
.I pattern .I pattern
and the input files. and the input files.
.TP .TP
.B \-L, --files-without-match .BR \-L ", " \-\^\-files-without-match
Suppress normal output; instead print the name Suppress normal output; instead print the name
of each input file from which no output would of each input file from which no output would
normally have been printed. The scanning will stop normally have been printed. The scanning will stop
on the first match. on the first match.
.TP .TP
.B \-l, --files-with-matches .BR \-l ", " \-\^\-files-with-matches
Suppress normal output; instead print Suppress normal output; instead print
the name of each input file from which output the name of each input file from which output
would normally have been printed. The scanning will would normally have been printed. The scanning will
stop on the first match. stop on the first match.
.TP .TP
.B \-n, --line-number .BR \-n ", " \-\^\-line-number
Prefix each line of output with the line number Prefix each line of output with the line number
within its input file. within its input file.
.TP .TP
.B \-q, --quiet, --silent .BR \-q ", " \-\^\-quiet ", " \-\^\-silent
Quiet; suppress normal output. The scanning will stop Quiet; suppress normal output. The scanning will stop
on the first match. on the first match.
Also see the Also see the
.B \-s .B \-s
or or
.B --no-messages .B \-\^\-no-messages
option below. option below.
.TP .TP
.B \-r, --recursive .BR \-r ", " \-\^\-recursive
Read all files under each directory, recursively; Read all files under each directory, recursively;
this is equivalent to the this is equivalent to the
.B "\-d recurse" .B "\-d recurse"
option. option.
.TP .TP
.B \-s, --no-messages .BR \-s ", " \-\^\-no-messages
Suppress error messages about nonexistent or unreadable files. Suppress error messages about nonexistent or unreadable files.
Portability note: unlike GNU Portability note: unlike \s-1GNU\s0
.BR grep , .BR grep ,
BSD traditional
.B grep .B grep
does not comply with POSIX.2, because BSD did not conform to \s-1POSIX.2\s0, because traditional
.B grep .B grep
lacks a lacked a
.B \-q .B \-q
option and its option and its
.B \-s .B \-s
option behaves like GNU option behaved like \s-1GNU\s0
.BR grep 's .BR grep 's
.B \-q .B \-q
option. option.
Shell scripts intended to be portable to BSD Shell scripts intended to be portable to traditional
.B grep .B grep
should avoid both should avoid both
.B \-q .B \-q
@ -216,7 +261,7 @@ and
.B \-s .B \-s
and should redirect output to /dev/null instead. and should redirect output to /dev/null instead.
.TP .TP
.B \-a, --text .BR \-a ", " \-\^\-text
Do not suppress output lines that contain binary data. Do not suppress output lines that contain binary data.
Normally, if the first few bytes of a file indicate that Normally, if the first few bytes of a file indicate that
the file contains binary data, the file contains binary data,
@ -227,10 +272,10 @@ This option causes
to act as if the file is a text file, to act as if the file is a text file,
even if it would otherwise be treated as binary. even if it would otherwise be treated as binary.
.TP .TP
.B \-v, --revert-match .BR \-v ", " \-\^\-invert-match
Invert the sense of matching, to select non-matching lines. Invert the sense of matching, to select non-matching lines.
.TP .TP
.B \-w, --word-regexp .BR \-w ", " \-\^\-word-regexp
Select only those lines containing matches that form whole words. Select only those lines containing matches that form whole words.
The test is that the matching substring must either be at the The test is that the matching substring must either be at the
beginning of the line, or preceded by a non-word constituent beginning of the line, or preceded by a non-word constituent
@ -238,14 +283,14 @@ character. Similarly, it must be either at the end of the line
or followed by a non-word constituent character. Word-constituent or followed by a non-word constituent character. Word-constituent
characters are letters, digits, and the underscore. characters are letters, digits, and the underscore.
.TP .TP
.B \-x, --line-regexp .BR \-x ", " \-\^\-line-regexp
Select only those matches that exactly match the whole line. Select only those matches that exactly match the whole line.
.TP .TP
.B \-y .B \-y
Obsolete synonym for Obsolete synonym for
.BR \-i . .BR \-i .
.TP .TP
.B \-U, --binary .BR \-U ", " \-\^\-binary
Treat the file(s) as binary. By default, under MS-DOS and MS-Windows, Treat the file(s) as binary. By default, under MS-DOS and MS-Windows,
.BR grep .BR grep
guesses the file type by looking at the contents of the first 32KB guesses the file type by looking at the contents of the first 32KB
@ -261,10 +306,11 @@ work correctly). Specifying
overrules this guesswork, causing all files to be read and passed to the overrules this guesswork, causing all files to be read and passed to the
matching mechanism verbatim; if the file is a text file with CR/LF matching mechanism verbatim; if the file is a text file with CR/LF
pairs at the end of each line, this will cause some regular pairs at the end of each line, this will cause some regular
expressions to fail. This option is only supported on MS-DOS and expressions to fail.
This option has no effect on platforms other than MS-DOS and
MS-Windows. MS-Windows.
.TP .TP
.B \-u, --unix-byte-offsets .BR \-u ", " \-\^\-unix-byte-offsets
Report Unix-style byte offsets. This switch causes Report Unix-style byte offsets. This switch causes
.B grep .B grep
to report byte offsets as if the file were Unix-style text file, i.e. with to report byte offsets as if the file were Unix-style text file, i.e. with
@ -272,13 +318,46 @@ CR characters stripped off. This will produce results identical to running
.B grep .B grep
on a Unix machine. This option has no effect unless on a Unix machine. This option has no effect unless
.B \-b .B \-b
option is also used; it is only supported on MS-DOS and MS-Windows. option is also used;
.PD it has no effect on platforms other than MS-DOS and MS-Windows.
.TP
.B \-\^\-mmap
If possible, use the
.BR mmap (2)
system call to read input, instead of
the default
.BR read (2)
system call. In some situations,
.B -\^-mmap
yields better performance. However,
.B -\^-mmap
can cause undefined behavior (including core dumps)
if an input file shrinks while
.B grep
is operating, or if an I/O error occurs.
.TP
.BR \-Z ", " \-\^\-null
Output a zero byte (the \s-1ASCII\s0
.B NUL
character) instead of the character that normally follows a file name.
For example,
.B "grep \-lZ"
outputs a zero byte after each file name instead of the usual newline.
This option makes the output unambiguous, even in the presence of file
names containing unusual characters like newlines. This option can be
used with commands like
.BR "find \-print0" ,
.BR "perl \-0" ,
.BR "sort \-z" ,
and
.B "xargs \-0"
to process arbitrary file names,
even those that contain newline characters.
.LP .LP
Following option is only available if compiled with zlib(3) library: Following option is only available if compiled with zlib(3) library:
.PD 0 .PD 0
.TP .TP
.B \-Z, --decompress .B \-z, --decompress
Decompress the input data before searching. Decompress the input data before searching.
.PD .PD
.SH "REGULAR EXPRESSIONS" .SH "REGULAR EXPRESSIONS"
@ -289,8 +368,8 @@ expressions, by using various operators to combine smaller expressions.
.PP .PP
.B grep .B grep
understands two different versions of regular expression syntax: understands two different versions of regular expression syntax:
``basic'' and ``extended.'' In \*(lqbasic\*(rq and \*(lqextended.\*(rq In
.RB "GNU\ " grep , .RB "\s-1GNU\s0\ " grep ,
there is no difference in available functionality using either syntax. there is no difference in available functionality using either syntax.
In other implementations, basic regular expressions are less powerful. In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions; The following description applies to extended regular expressions;
@ -402,11 +481,6 @@ The preceding item is matched
.I n .I n
or more times. or more times.
.TP .TP
.BI {, m }
The preceding item is optional and is matched at most
.I m
times.
.TP
.BI { n , m } .BI { n , m }
The preceding item is matched at least The preceding item is matched at least
.I n .I n
@ -456,12 +530,35 @@ versions
and and
.BR \e) . .BR \e) .
.PP .PP
In Traditional
.B egrep .B egrep
the metacharacter did not support the
.B { .B {
loses its special meaning; instead use metacharacter, and some
.BR \e{ . .B egrep
implementations support
.B \e{
instead, so portable scripts should avoid
.B {
in
.B egrep
patterns and should use
.B [{]
to match a literal
.BR { .
.PP
\s-1GNU\s0
.B egrep
attempts to support traditional usage by assuming that
.B {
is not special if it would be the start of an invalid interval
specification. For example, the shell command
.B "egrep '{1'"
searches for the two-character string
.B {1
instead of reporting a syntax error in the regular expression.
\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts
should avoid it.
.SH ENVIRONMENT .SH ENVIRONMENT
The environment variable The environment variable
.B GREP_OPTIONS .B GREP_OPTIONS
@ -483,7 +580,8 @@ other system errors.
.PP .PP
Email bug reports to Email bug reports to
.BR bug-gnu-utils@gnu.org . .BR bug-gnu-utils@gnu.org .
Be sure to include the word ``grep'' somewhere in the ``Subject:'' field. Be sure to include the word \*(lqgrep\*(rq somewhere in the
\*(lqSubject:\*(rq field.
.PP .PP
Large repetition counts in the Large repetition counts in the
.BI { m , n } .BI { m , n }
@ -495,3 +593,5 @@ and space, and may cause
to run out of memory. to run out of memory.
.PP .PP
Backreferences are very slow, and may require exponential time. Backreferences are very slow, and may require exponential time.
.\" Work around problems with some troff -man implementations.
.br

View File

@ -58,6 +58,17 @@ static int show_help;
/* If non-zero, print the version on standard output and exit. */ /* If non-zero, print the version on standard output and exit. */
static int show_version; static int show_version;
/* If nonzero, use mmap if possible. */
static int mmap_option;
/* Short options. */
static char const short_options[] =
#if HAVE_LIBZ > 0
"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
#else
"0123456789A:B:C::EFGHUVX:abcd:e:f:hiLlnqrsuvwxyZz";
#endif
/* Long options equivalences. */ /* Long options equivalences. */
static struct option long_options[] = static struct option long_options[] =
{ {
@ -78,18 +89,23 @@ static struct option long_options[] =
{"ignore-case", no_argument, NULL, 'i'}, {"ignore-case", no_argument, NULL, 'i'},
{"line-number", no_argument, NULL, 'n'}, {"line-number", no_argument, NULL, 'n'},
{"line-regexp", no_argument, NULL, 'x'}, {"line-regexp", no_argument, NULL, 'x'},
{"mmap", no_argument, &mmap_option, 1},
{"no-filename", no_argument, NULL, 'h'}, {"no-filename", no_argument, NULL, 'h'},
{"no-messages", no_argument, NULL, 's'}, {"no-messages", no_argument, NULL, 's'},
#if HAVE_LIBZ > 0
{"null", no_argument, NULL, /*'Z'*/ 1},
#else
{"null", no_argument, NULL, 'Z'},
#endif
{"null-data", no_argument, NULL, 'z'},
{"quiet", no_argument, NULL, 'q'}, {"quiet", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'}, {"recursive", no_argument, NULL, 'r'},
{"regexp", required_argument, NULL, 'e'}, {"regexp", required_argument, NULL, 'e'},
{"revert-match", no_argument, NULL, 'v'}, {"invert-match", no_argument, NULL, 'v'},
{"silent", no_argument, NULL, 'q'}, {"silent", no_argument, NULL, 'q'},
{"text", no_argument, NULL, 'a'}, {"text", no_argument, NULL, 'a'},
#if O_BINARY
{"binary", no_argument, NULL, 'U'}, {"binary", no_argument, NULL, 'U'},
{"unix-byte-offsets", no_argument, NULL, 'u'}, {"unix-byte-offsets", no_argument, NULL, 'u'},
#endif
{"version", no_argument, NULL, 'V'}, {"version", no_argument, NULL, 'V'},
{"with-filename", no_argument, NULL, 'H'}, {"with-filename", no_argument, NULL, 'H'},
{"word-regexp", no_argument, NULL, 'w'}, {"word-regexp", no_argument, NULL, 'w'},
@ -100,10 +116,13 @@ static struct option long_options[] =
}; };
/* Define flags declared in grep.h. */ /* Define flags declared in grep.h. */
/* I do not know why we need this decl, while if you build GNU grep 2.4 by
hand you don't... */
char const *matcher; char const *matcher;
int match_icase; int match_icase;
int match_words; int match_words;
int match_lines; int match_lines;
unsigned char eolbyte;
/* For error messages. */ /* For error messages. */
static char *prog; static char *prog;
@ -121,7 +140,10 @@ static enum
static int ck_atoi PARAMS ((char const *, int *)); static int ck_atoi PARAMS ((char const *, int *));
static void usage PARAMS ((int)) __attribute__((noreturn)); static void usage PARAMS ((int)) __attribute__((noreturn));
static void error PARAMS ((const char *, int)); static void error PARAMS ((const char *, int));
static int setmatcher PARAMS ((char const *)); static void setmatcher PARAMS ((char const *));
static int install_matcher PARAMS ((char const *));
static int prepend_args PARAMS ((char const *, char *, char **));
static void prepend_default_options PARAMS ((char const *, int *, char ***));
static char *page_alloc PARAMS ((size_t, char **)); static char *page_alloc PARAMS ((size_t, char **));
static int reset PARAMS ((int, char const *, struct stats *)); static int reset PARAMS ((int, char const *, struct stats *));
static int fillbuf PARAMS ((size_t, struct stats *)); static int fillbuf PARAMS ((size_t, struct stats *));
@ -221,14 +243,15 @@ static char *ubuffer; /* Unaligned base of buffer. */
static char *buffer; /* Base of buffer. */ static char *buffer; /* Base of buffer. */
static size_t bufsalloc; /* Allocated size of buffer save region. */ static size_t bufsalloc; /* Allocated size of buffer save region. */
static size_t bufalloc; /* Total buffer size. */ static size_t bufalloc; /* Total buffer size. */
#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */
static int bufdesc; /* File descriptor. */ static int bufdesc; /* File descriptor. */
static char *bufbeg; /* Beginning of user-visible stuff. */ static char *bufbeg; /* Beginning of user-visible stuff. */
static char *buflim; /* Limit of user-visible stuff. */ static char *buflim; /* Limit of user-visible stuff. */
static size_t pagesize; /* alignment of memory pages */ static size_t pagesize; /* alignment of memory pages */
static off_t bufoffset; /* Read offset; defined on regular files. */
#if defined(HAVE_MMAP) #if defined(HAVE_MMAP)
static int bufmapped; /* True for ordinary files. */ static int bufmapped; /* True if buffer is memory-mapped. */
static off_t bufoffset; /* What read() normally remembers. */
static off_t initial_bufoffset; /* Initial value of bufoffset. */ static off_t initial_bufoffset; /* Initial value of bufoffset. */
#endif #endif
@ -245,32 +268,26 @@ static int Zflag; /* uncompress before searching. */
? (val) \ ? (val) \
: (val) + ((alignment) - (size_t) (val) % (alignment))) : (val) + ((alignment) - (size_t) (val) % (alignment)))
/* Return the address of a new page-aligned buffer of size SIZE. Set /* Return the address of a page-aligned buffer of size SIZE,
*UP to the newly allocated (but possibly unaligned) buffer used to reallocating it from *UP. Set *UP to the newly allocated (but
*build the aligned buffer. To free the buffer, free (*UP). */ possibly unaligned) buffer used to build the aligned buffer. To
free the buffer, free (*UP). */
static char * static char *
page_alloc (size, up) page_alloc (size, up)
size_t size; size_t size;
char **up; char **up;
{ {
/* HAVE_WORKING_VALLOC means that valloc is properly declared, and
you can free the result of valloc. This symbol is not (yet)
autoconfigured. It can be useful to define HAVE_WORKING_VALLOC
while debugging, since some debugging memory allocators might
catch more bugs if this symbol is enabled. */
#if HAVE_WORKING_VALLOC
*up = valloc (size);
return *up;
#else
size_t asize = size + pagesize - 1; size_t asize = size + pagesize - 1;
if (size <= asize) if (size <= asize)
{ {
*up = malloc (asize); char *p = *up ? realloc (*up, asize) : malloc (asize);
if (*up) if (p)
return ALIGN_TO (*up, pagesize); {
*up = p;
return ALIGN_TO (p, pagesize);
}
} }
return NULL; return NULL;
#endif
} }
/* Reset the buffer for a new file, returning zero if we should skip it. /* Reset the buffer for a new file, returning zero if we should skip it.
@ -281,7 +298,9 @@ reset (fd, file, stats)
char const *file; char const *file;
struct stats *stats; struct stats *stats;
{ {
if (pagesize == 0) if (pagesize)
bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
else
{ {
size_t ubufsalloc; size_t ubufsalloc;
pagesize = getpagesize (); pagesize = getpagesize ();
@ -293,162 +312,212 @@ reset (fd, file, stats)
ubufsalloc = BUFSALLOC; ubufsalloc = BUFSALLOC;
#endif #endif
bufsalloc = ALIGN_TO (ubufsalloc, pagesize); bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
bufalloc = 5 * bufsalloc; bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
/* The 1 byte of overflow is a kludge for dfaexec(), which /* The 1 byte of overflow is a kludge for dfaexec(), which
inserts a sentinel newline at the end of the buffer inserts a sentinel newline at the end of the buffer
being searched. There's gotta be a better way... */ being searched. There's gotta be a better way... */
if (bufsalloc < ubufsalloc if (bufsalloc < ubufsalloc
|| bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
|| bufalloc + 1 < bufalloc
|| ! (buffer = page_alloc (bufalloc + 1, &ubuffer))) || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
fatal (_("memory exhausted"), 0); fatal (_("memory exhausted"), 0);
bufbeg = buffer;
buflim = buffer;
} }
#if HAVE_LIBZ > 0 #if HAVE_LIBZ > 0
if (Zflag) { if (Zflag)
{
gzbufdesc = gzdopen(fd, "r"); gzbufdesc = gzdopen(fd, "r");
if (gzbufdesc == NULL) if (gzbufdesc == NULL)
fatal(_("memory exhausted"), 0); fatal(_("memory exhausted"), 0);
} }
#endif #endif
buflim = buffer;
bufdesc = fd; bufdesc = fd;
if ( if (fstat (fd, &stats->stat) != 0)
#if defined(HAVE_MMAP) {
1 error ("fstat", errno);
#else return 0;
directories != READ_DIRECTORIES }
#endif
)
if (fstat (fd, &stats->stat) != 0)
{
error ("fstat", errno);
return 0;
}
if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
return 0; return 0;
#if defined(HAVE_MMAP)
if ( if (
#if HAVE_LIBZ > 0 #if HAVE_LIBZ > 0
Zflag || Zflag ||
#endif #endif
!S_ISREG (stats->stat.st_mode)) S_ISREG (stats->stat.st_mode))
bufmapped = 0; {
if (file)
bufoffset = 0;
else
{
bufoffset = lseek (fd, 0, SEEK_CUR);
if (bufoffset < 0)
{
error ("lseek", errno);
return 0;
}
}
#ifdef HAVE_MMAP
initial_bufoffset = bufoffset;
bufmapped = mmap_option && bufoffset % pagesize == 0;
#endif
}
else else
{ {
bufmapped = 1; #ifdef HAVE_MMAP
bufoffset = initial_bufoffset = file ? 0 : lseek (fd, 0, 1); bufmapped = 0;
}
#endif #endif
}
return 1; return 1;
} }
/* Read new stuff into the buffer, saving the specified /* Read new stuff into the buffer, saving the specified
amount of old stuff. When we're done, 'bufbeg' points amount of old stuff. When we're done, 'bufbeg' points
to the beginning of the buffer contents, and 'buflim' to the beginning of the buffer contents, and 'buflim'
points just after the end. Return count of new stuff. */ points just after the end. Return zero if there's an error. */
static int static int
fillbuf (save, stats) fillbuf (save, stats)
size_t save; size_t save;
struct stats *stats; struct stats *stats;
{ {
int cc; size_t fillsize = 0;
#if defined(HAVE_MMAP) int cc = 1;
caddr_t maddr; size_t readsize;
#endif
if (save > bufsalloc) /* Offset from start of unaligned buffer to start of old stuff
that we want to save. */
size_t saved_offset = buflim - ubuffer - save;
if (bufsalloc < save)
{ {
char *nubuffer; size_t aligned_save = ALIGN_TO (save, pagesize);
char *nbuffer; size_t maxalloc = (size_t) -1;
size_t newalloc;
while (save > bufsalloc) if (S_ISREG (stats->stat.st_mode))
bufsalloc *= 2; {
bufalloc = 5 * bufsalloc; /* Calculate an upper bound on how much memory we should allocate.
if (bufalloc / 5 != bufsalloc || bufalloc + 1 < bufalloc We can't use ALIGN_TO here, since off_t might be longer than
|| ! (nbuffer = page_alloc (bufalloc + 1, &nubuffer))) size_t. Watch out for arithmetic overflow. */
off_t to_be_read = stats->stat.st_size - bufoffset;
size_t slop = to_be_read % pagesize;
off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
off_t maxalloc_off = aligned_save + aligned_to_be_read;
if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
maxalloc = maxalloc_off;
}
/* Grow bufsalloc until it is at least as great as `save'; but
if there is an overflow, just grow it to the next page boundary. */
while (bufsalloc < save)
if (bufsalloc < bufsalloc * 2)
bufsalloc *= 2;
else
{
bufsalloc = aligned_save;
break;
}
/* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
bufsalloc.... */
newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
if (maxalloc < newalloc)
{
/* ... except don't grow it more than a pagesize past the
file size, as that might cause unnecessary memory
exhaustion if the file is large. */
newalloc = maxalloc;
bufsalloc = aligned_save;
}
/* Check that the above calculations made progress, which might
not occur if there is arithmetic overflow. If there's no
progress, or if the new buffer size is larger than the old
and buffer reallocation fails, report memory exhaustion. */
if (bufsalloc < save || newalloc < save
|| (newalloc == save && newalloc != maxalloc)
|| (bufalloc < newalloc
&& ! (buffer
= page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
fatal (_("memory exhausted"), 0); fatal (_("memory exhausted"), 0);
}
bufbeg = nbuffer + bufsalloc - save; bufbeg = buffer + bufsalloc - save;
memcpy (bufbeg, buflim - save, save); memmove (bufbeg, ubuffer + saved_offset, save);
free (ubuffer); readsize = bufalloc - bufsalloc;
ubuffer = nubuffer;
buffer = nbuffer;
}
else
{
bufbeg = buffer + bufsalloc - save;
memcpy (bufbeg, buflim - save, save);
}
#if defined(HAVE_MMAP) #if defined(HAVE_MMAP)
if (bufmapped && bufoffset % pagesize == 0 if (bufmapped)
&& stats->stat.st_size - bufoffset >= bufalloc - bufsalloc)
{ {
maddr = buffer + bufsalloc; size_t mmapsize = readsize;
maddr = mmap (maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset); /* Don't mmap past the end of the file; some hosts don't allow this.
if (maddr == (caddr_t) -1) Use `read' on the last page. */
if (stats->stat.st_size - bufoffset < mmapsize)
{ {
/* This used to issue a warning, but on some hosts mmapsize = stats->stat.st_size - bufoffset;
(e.g. Solaris 2.5) mmap can fail merely because some mmapsize -= mmapsize % pagesize;
other process has an advisory read lock on the file.
There's no point alarming the user about this misfeature. */
#if 0
fprintf (stderr, _("%s: warning: %s: %s\n"), prog, filename,
strerror (errno));
#endif
goto tryread;
} }
#if 0
/* You might thing this (or MADV_WILLNEED) would help, if (mmapsize
but it doesn't, at least not on a Sun running 4.1. && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
In fact, it actually slows us down about 30%! */ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
madvise (maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL); bufdesc, bufoffset)
#endif != (caddr_t) -1))
cc = bufalloc - bufsalloc;
bufoffset += cc;
}
else
{
tryread:
/* We come here when we're not going to use mmap() any more.
Note that we need to synchronize the file offset the
first time through. */
if (bufmapped)
{ {
bufmapped = 0; /* Do not bother to use madvise with MADV_SEQUENTIAL or
if (bufoffset != initial_bufoffset) MADV_WILLNEED on the mmapped memory. One might think it
lseek (bufdesc, bufoffset, 0); would help, but it slows us down about 30% on SunOS 4.1. */
fillsize = mmapsize;
} }
#if HAVE_LIBZ > 0
if (Zflag)
cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
else else
#endif {
cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); /* Stop using mmap on this file. Synchronize the file
offset. Do not warn about mmap failures. On some hosts
(e.g. Solaris 2.5) mmap can fail merely because some
other process has an advisory read lock on the file.
There's no point alarming the user about this misfeature. */
bufmapped = 0;
if (bufoffset != initial_bufoffset
&& lseek (bufdesc, bufoffset, SEEK_SET) < 0)
{
error ("lseek", errno);
cc = 0;
}
}
} }
#else
#if HAVE_LIBZ > 0
if (Zflag)
cc = gzread (gzbufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
else
#endif
cc = read (bufdesc, buffer + bufsalloc, bufalloc - bufsalloc);
#endif /*HAVE_MMAP*/ #endif /*HAVE_MMAP*/
#if O_BINARY
if (cc > 0) if (! fillsize)
cc = undossify_input (buffer + bufsalloc, cc); {
ssize_t bytesread;
do
#if HAVE_LIBZ > 0
if (Zflag)
bytesread = gzread (gzbufdesc, buffer + bufsalloc, readsize);
else
#endif #endif
if (cc > 0) bytesread = read (bufdesc, buffer + bufsalloc, readsize);
buflim = buffer + bufsalloc + cc; while (bytesread < 0 && errno == EINTR);
else if (bytesread < 0)
buflim = buffer + bufsalloc; cc = 0;
else
fillsize = bytesread;
}
bufoffset += fillsize;
#if O_BINARY
if (fillsize)
fillsize = undossify_input (buffer + bufsalloc, fillsize);
#endif
buflim = buffer + bufsalloc + fillsize;
return cc; return cc;
} }
/* Flags controlling the style of output. */ /* Flags controlling the style of output. */
static int always_text; /* Assume the input is always text. */ static int always_text; /* Assume the input is always text. */
static int filename_mask; /* If zero, output nulls after filenames. */
static int out_quiet; /* Suppress all normal output. */ static int out_quiet; /* Suppress all normal output. */
static int out_invert; /* Print nonmatching stuff. */ static int out_invert; /* Print nonmatching stuff. */
static int out_file; /* Print filenames. */ static int out_file; /* Print filenames. */
@ -480,11 +549,9 @@ nlscan (lim)
char *lim; char *lim;
{ {
char *beg; char *beg;
for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++)
for (beg = lastnl; beg < lim; ++beg) totalnl++;
if (*beg == '\n') lastnl = lim;
++totalnl;
lastnl = beg;
} }
static void static void
@ -513,7 +580,7 @@ prline (beg, lim, sep)
int sep; int sep;
{ {
if (out_file) if (out_file)
printf ("%s%c", filename, sep); printf ("%s%c", filename, sep & filename_mask);
if (out_line) if (out_line)
{ {
nlscan (beg); nlscan (beg);
@ -546,7 +613,7 @@ prpending (lim)
while (pending > 0 && lastout < lim) while (pending > 0 && lastout < lim)
{ {
--pending; --pending;
if ((nl = memchr (lastout, '\n', lim - lastout)) != 0) if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
++nl; ++nl;
else else
nl = lim; nl = lim;
@ -564,6 +631,7 @@ prtext (beg, lim, nlinesp)
{ {
static int used; /* avoid printing "--" before any output */ static int used; /* avoid printing "--" before any output */
char *bp, *p, *nl; char *bp, *p, *nl;
char eol = eolbyte;
int i, n; int i, n;
if (!out_quiet && pending > 0) if (!out_quiet && pending > 0)
@ -580,7 +648,7 @@ prtext (beg, lim, nlinesp)
if (p > bp) if (p > bp)
do do
--p; --p;
while (p > bp && p[-1] != '\n'); while (p > bp && p[-1] != eol);
/* We only print the "--" separator if our output is /* We only print the "--" separator if our output is
discontiguous from the last output in the file. */ discontiguous from the last output in the file. */
@ -589,7 +657,7 @@ prtext (beg, lim, nlinesp)
while (p < beg) while (p < beg)
{ {
nl = memchr (p, '\n', beg - p); nl = memchr (p, eol, beg - p);
prline (p, nl + 1, '-'); prline (p, nl + 1, '-');
p = nl + 1; p = nl + 1;
} }
@ -600,7 +668,7 @@ prtext (beg, lim, nlinesp)
/* Caller wants a line count. */ /* Caller wants a line count. */
for (n = 0; p < lim; ++n) for (n = 0; p < lim; ++n)
{ {
if ((nl = memchr (p, '\n', lim - p)) != 0) if ((nl = memchr (p, eol, lim - p)) != 0)
++nl; ++nl;
else else
nl = lim; nl = lim;
@ -614,7 +682,7 @@ prtext (beg, lim, nlinesp)
if (!out_quiet) if (!out_quiet)
prline (beg, lim, ':'); prline (beg, lim, ':');
pending = out_after; pending = out_quiet ? 0 : out_after;
used = 1; used = 1;
} }
@ -629,13 +697,14 @@ grepbuf (beg, lim)
int nlines, n; int nlines, n;
register char *p, *b; register char *p, *b;
char *endp; char *endp;
char eol = eolbyte;
nlines = 0; nlines = 0;
p = beg; p = beg;
while ((b = (*execute)(p, lim - p, &endp)) != 0) while ((b = (*execute)(p, lim - p, &endp)) != 0)
{ {
/* Avoid matching the empty line at the end of the buffer. */ /* Avoid matching the empty line at the end of the buffer. */
if (b == lim && ((b > beg && b[-1] == '\n') || b == beg)) if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
break; break;
if (!out_invert) if (!out_invert)
{ {
@ -672,6 +741,7 @@ grep (fd, file, stats)
int not_text; int not_text;
size_t residue, save; size_t residue, save;
char *beg, *lim; char *beg, *lim;
char eol = eolbyte;
if (!reset (fd, file, stats)) if (!reset (fd, file, stats))
return 0; return 0;
@ -700,7 +770,7 @@ grep (fd, file, stats)
residue = 0; residue = 0;
save = 0; save = 0;
if (fillbuf (save, stats) < 0) if (! fillbuf (save, stats))
{ {
if (! (is_EISDIR (errno, file) && suppress_errors)) if (! (is_EISDIR (errno, file) && suppress_errors))
error (filename, errno); error (filename, errno);
@ -708,7 +778,7 @@ grep (fd, file, stats)
} }
not_text = (! (always_text | out_quiet) not_text = (! (always_text | out_quiet)
&& memchr (bufbeg, '\0', buflim - bufbeg)); && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
done_on_match += not_text; done_on_match += not_text;
out_quiet += not_text; out_quiet += not_text;
@ -720,7 +790,7 @@ grep (fd, file, stats)
if (buflim - bufbeg == save) if (buflim - bufbeg == save)
break; break;
beg = bufbeg + save - residue; beg = bufbeg + save - residue;
for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim) for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
; ;
residue = buflim - lim; residue = buflim - lim;
if (beg < lim) if (beg < lim)
@ -738,7 +808,7 @@ grep (fd, file, stats)
++i; ++i;
do do
--beg; --beg;
while (beg > bufbeg && beg[-1] != '\n'); while (beg > bufbeg && beg[-1] != eol);
} }
if (beg != lastout) if (beg != lastout)
lastout = 0; lastout = 0;
@ -746,7 +816,7 @@ grep (fd, file, stats)
totalcc += buflim - bufbeg - save; totalcc += buflim - bufbeg - save;
if (out_line) if (out_line)
nlscan (beg); nlscan (beg);
if (fillbuf (save, stats) < 0) if (! fillbuf (save, stats))
{ {
if (! (is_EISDIR (errno, file) && suppress_errors)) if (! (is_EISDIR (errno, file) && suppress_errors))
error (filename, errno); error (filename, errno);
@ -784,7 +854,8 @@ grepfile (file, stats)
} }
else else
{ {
desc = open (file, O_RDONLY); while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
continue;
if (desc < 0) if (desc < 0)
{ {
@ -843,30 +914,26 @@ grepfile (file, stats)
if (count_matches) if (count_matches)
{ {
if (out_file) if (out_file)
printf ("%s:", filename); printf ("%s%c", filename, ':' & filename_mask);
printf ("%d\n", count); printf ("%d\n", count);
} }
if (count) status = !count;
{ if (list_files == 1 - 2 * status)
status = 0; printf ("%s%c", filename, '\n' & filename_mask);
if (list_files == 1)
printf ("%s\n", filename);
}
else
{
status = 1;
if (list_files == -1)
printf ("%s\n", filename);
}
#if HAVE_LIBZ > 0 #if HAVE_LIBZ > 0
if (Zflag) if (Zflag)
gzclose(gzbufdesc); gzclose(gzbufdesc);
else else
#endif #endif
if (file && close (desc) != 0) if (file)
error (file, errno); while (close (desc) != 0)
if (errno != EINTR)
{
error (file, errno);
break;
}
} }
return status; return status;
@ -882,8 +949,8 @@ grepdir (dir, stats)
char *name_space; char *name_space;
for (ancestor = stats; (ancestor = ancestor->parent) != 0; ) for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
if (! ((ancestor->stat.st_ino ^ stats->stat.st_ino) if (ancestor->stat.st_ino == stats->stat.st_ino
| (ancestor->stat.st_dev ^ stats->stat.st_dev))) && ancestor->stat.st_dev == stats->stat.st_dev)
{ {
if (!suppress_errors) if (!suppress_errors)
fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir, fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
@ -946,24 +1013,29 @@ int status;
printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog); printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
printf (_("\ printf (_("\
Search for PATTERN in each FILE or standard input.\n\ Search for PATTERN in each FILE or standard input.\n\
Example: %s -i 'hello.*world' menu.h main.c\n\
\n\ \n\
Regexp selection and interpretation:\n\ Regexp selection and interpretation:\n"), prog);
printf (_("\
-E, --extended-regexp PATTERN is an extended regular expression\n\ -E, --extended-regexp PATTERN is an extended regular expression\n\
-F, --fixed-regexp PATTERN is a fixed string separated by newlines\n\ -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
-G, --basic-regexp PATTERN is a basic regular expression\n\ -G, --basic-regexp PATTERN is a basic regular expression\n"));
printf (_("\
-e, --regexp=PATTERN use PATTERN as a regular expression\n\ -e, --regexp=PATTERN use PATTERN as a regular expression\n\
-f, --file=FILE obtain PATTERN from FILE\n\ -f, --file=FILE obtain PATTERN from FILE\n\
-i, --ignore-case ignore case distinctions\n\ -i, --ignore-case ignore case distinctions\n\
-w, --word-regexp force PATTERN to match only whole words\n\ -w, --word-regexp force PATTERN to match only whole words\n\
-x, --line-regexp force PATTERN to match only whole lines\n")); -x, --line-regexp force PATTERN to match only whole lines\n\
-z, --null-data a data line ends in 0 byte, not newline\n"));
printf (_("\ printf (_("\
\n\ \n\
Miscellaneous:\n\ Miscellaneous:\n\
-s, --no-messages suppress error messages\n\ -s, --no-messages suppress error messages\n\
-v, --revert-match select non-matching lines\n\ -v, --invert-match select non-matching lines\n\
-V, --version print version information and exit\n\ -V, --version print version information and exit\n\
--help display this help and exit\n\
-Z, --decompress decompress input before searching (HAVE_LIBZ=1)\n\ -Z, --decompress decompress input before searching (HAVE_LIBZ=1)\n\
--help display this help and exit\n")); --mmap use memory-mapped input if possible\n"));
printf (_("\ printf (_("\
\n\ \n\
Output control:\n\ Output control:\n\
@ -978,31 +1050,42 @@ Output control:\n\
-r, --recursive equivalent to --directories=recurse.\n\ -r, --recursive equivalent to --directories=recurse.\n\
-L, --files-without-match only print FILE names containing no match\n\ -L, --files-without-match only print FILE names containing no match\n\
-l, --files-with-matches only print FILE names containing matches\n\ -l, --files-with-matches only print FILE names containing matches\n\
-c, --count only print a count of matching lines per FILE\n")); -c, --count only print a count of matching lines per FILE\n\
--null print 0 byte after FILE name\n"));
printf (_("\ printf (_("\
\n\ \n\
Context control:\n\ Context control:\n\
-B, --before-context=NUM print NUM lines of leading context\n\ -B, --before-context=NUM print NUM lines of leading context\n\
-A, --after-context=NUM print NUM lines of trailing context\n\ -A, --after-context=NUM print NUM lines of trailing context\n\
-C, --context[=NUM] print NUM (default 2) lines of output context\n\ -C, --context[=NUM] print NUM (default 2) lines of output context\n\
unless overriden by -A or -B\n\ unless overridden by -A or -B\n\
-NUM same as --context=NUM\n\ -NUM same as --context=NUM\n\
-U, --binary do not strip CR characters at EOL (MSDOS)\n\ -U, --binary do not strip CR characters at EOL (MSDOS)\n\
-u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\ -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\
\n\ \n\
If no -[GEF], then `egrep' assumes -E, `fgrep' -F, else -G.\n\ `egrep' means `grep -E'. `fgrep' means `grep -F'.\n\
With no FILE, or when FILE is -, read standard input. If less than\n\ With no FILE, or when FILE is -, read standard input. If less than\n\
two FILEs given, assume -h. Exit with 0 if matches, with 1 if none.\n\ two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\
Exit with 2 if syntax errors or system errors.\n")); and 2 if trouble.\n"));
printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n")); printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
} }
exit (status); exit (status);
} }
/* Set the matcher to M, reporting any conflicts. */
static void
setmatcher (m)
char const *m;
{
if (matcher && strcmp (matcher, m) != 0)
fatal (_("conflicting matchers specified"), 0);
matcher = m;
}
/* Go through the matchers vector and look for the specified matcher. /* Go through the matchers vector and look for the specified matcher.
If we find it, install it in compile and execute, and return 1. */ If we find it, install it in compile and execute, and return 1. */
static int static int
setmatcher (name) install_matcher (name)
char const *name; char const *name;
{ {
int i; int i;
@ -1158,7 +1241,8 @@ main (argc, argv)
keys = NULL; keys = NULL;
keycc = 0; keycc = 0;
with_filenames = 0; with_filenames = 0;
matcher = NULL; eolbyte = '\n';
filename_mask = ~0;
/* The value -1 means to use DEFAULT_CONTEXT. */ /* The value -1 means to use DEFAULT_CONTEXT. */
out_after = out_before = -1; out_after = out_before = -1;
@ -1179,15 +1263,8 @@ main (argc, argv)
prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
while ((opt = getopt_long (argc, argv, while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
#if O_BINARY != -1)
"0123456789A:B:C::EFGHVX:abcd:e:f:hiLlnqrsvwxyUu",
#elif HAVE_LIBZ > 0
"0123456789A:B:C::EFGHRVX:Zabcd:e:f:hiLlnqrsvwxy",
#else
"0123456789A:B:C::EFGHRVX:abcd:e:f:hiLlnqrsvwxy",
#endif
long_options, NULL)) != EOF)
switch (opt) switch (opt)
{ {
case '0': case '0':
@ -1229,44 +1306,33 @@ main (argc, argv)
default_context = 2; default_context = 2;
break; break;
case 'E': case 'E':
if (matcher && strcmp (matcher, "posix-egrep") != 0) setmatcher ("egrep");
fatal (_("you may specify only one of -E, -F, or -G"), 0);
matcher = "posix-egrep";
break; break;
case 'F': case 'F':
if (matcher && strcmp(matcher, "fgrep") != 0) setmatcher ("fgrep");
fatal(_("you may specify only one of -E, -F, or -G"), 0);;
matcher = "fgrep";
break; break;
case 'G': case 'G':
if (matcher && strcmp (matcher, "grep") != 0) setmatcher ("grep");
fatal (_("you may specify only one of -E, -F, or -G"), 0);
matcher = "grep";
break; break;
case 'H': case 'H':
with_filenames = 1; with_filenames = 1;
break; break;
#if O_BINARY
case 'U': case 'U':
#if O_BINARY
dos_use_file_type = DOS_BINARY; dos_use_file_type = DOS_BINARY;
#endif
break; break;
case 'u': case 'u':
#if O_BINARY
dos_report_unix_offset = 1; dos_report_unix_offset = 1;
break;
#endif #endif
break;
case 'V': case 'V':
show_version = 1; show_version = 1;
break; break;
case 'X': case 'X':
if (matcher) setmatcher (optarg);
fatal (_("matcher already specified"), 0);
matcher = optarg;
break; break;
#if HAVE_LIBZ > 0
case 'Z':
Zflag = 1;
break;
#endif
case 'a': case 'a':
always_text = 1; always_text = 1;
break; break;
@ -1357,6 +1423,16 @@ main (argc, argv)
case 'x': case 'x':
match_lines = 1; match_lines = 1;
break; break;
case 'Z':
#if HAVE_LIBZ > 0
Zflag = 1;
#else
filename_mask = 0;
#endif
break;
case 'z':
eolbyte = '\0';
break;
case 0: case 0:
/* long options */ /* long options */
break; break;
@ -1370,9 +1446,12 @@ main (argc, argv)
if (out_before < 0) if (out_before < 0)
out_before = default_context; out_before = default_context;
if (! matcher)
matcher = prog;
if (show_version) if (show_version)
{ {
printf (_("grep (GNU grep) %s\n"), VERSION); printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
printf ("\n"); printf ("\n");
printf (_("\ printf (_("\
Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n")); Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
@ -1404,10 +1483,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"))
else else
usage (2); usage (2);
if (! matcher) if (!install_matcher (matcher) && !install_matcher ("default"))
matcher = prog;
if (!setmatcher (matcher) && !setmatcher ("default"))
abort (); abort ();
(*compile)(keys, keycc); (*compile)(keys, keycc);

View File

@ -37,14 +37,12 @@ extern struct matcher
char *(*execute) PARAMS ((char *, size_t, char **)); char *(*execute) PARAMS ((char *, size_t, char **));
} matchers[]; } matchers[];
/* Exported from grep.c. */
extern char const *matcher;
/* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */ /* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */
extern char const default_matcher[]; extern char const *matcher;
/* The following flags are exported from grep for the matchers /* The following flags are exported from grep for the matchers
to look at. */ to look at. */
extern int match_icase; /* -i */ extern int match_icase; /* -i */
extern int match_words; /* -w */ extern int match_words; /* -w */
extern int match_lines; /* -x */ extern int match_lines; /* -x */
extern unsigned char eolbyte; /* -z */

View File

@ -48,7 +48,6 @@ struct matcher matchers[] = {
{ "default", Gcompile, EGexecute }, { "default", Gcompile, EGexecute },
{ "grep", Gcompile, EGexecute }, { "grep", Gcompile, EGexecute },
{ "egrep", Ecompile, EGexecute }, { "egrep", Ecompile, EGexecute },
{ "posix-egrep", Ecompile, EGexecute },
{ "awk", Ecompile, EGexecute }, { "awk", Ecompile, EGexecute },
{ "fgrep", Fcompile, Fexecute }, { "fgrep", Fcompile, Fexecute },
{ 0, 0, 0 }, { 0, 0, 0 },
@ -61,7 +60,7 @@ struct matcher matchers[] = {
static struct dfa dfa; static struct dfa dfa;
/* Regex compiled regexp. */ /* Regex compiled regexp. */
static struct re_pattern_buffer regex; static struct re_pattern_buffer regexbuf;
/* KWset compiled pattern. For Ecompile and Gcompile, we compile /* KWset compiled pattern. For Ecompile and Gcompile, we compile
a list of strings, at least one of which is known to occur in a list of strings, at least one of which is known to occur in
@ -140,9 +139,9 @@ Gcompile(pattern, size)
const char *err; const char *err;
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase); dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
if ((err = re_compile_pattern(pattern, size, &regex)) != 0) if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
fatal(err, 0); fatal(err, 0);
/* In the match_words and match_lines cases, we use a different pattern /* In the match_words and match_lines cases, we use a different pattern
@ -155,7 +154,8 @@ Gcompile(pattern, size)
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$). (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
In the whole-line case, we use the pattern: In the whole-line case, we use the pattern:
^(userpattern)$. ^(userpattern)$.
BUG: Using [A-Za-z_] is locale-dependent! */ BUG: Using [A-Za-z_] is locale-dependent!
So will use [:alnum:] */
char *n = malloc(size + 50); char *n = malloc(size + 50);
int i = 0; int i = 0;
@ -165,14 +165,14 @@ Gcompile(pattern, size)
if (match_lines) if (match_lines)
strcpy(n, "^\\("); strcpy(n, "^\\(");
if (match_words) if (match_words)
strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\("); strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\(");
i = strlen(n); i = strlen(n);
memcpy(n + i, pattern, size); memcpy(n + i, pattern, size);
i += size; i += size;
if (match_words) if (match_words)
strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)"); strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)");
if (match_lines) if (match_lines)
strcpy(n + i, "\\)$"); strcpy(n + i, "\\)$");
@ -192,23 +192,18 @@ Ecompile(pattern, size)
{ {
const char *err; const char *err;
if (strcmp(matcher, "posix-egrep") == 0) if (strcmp(matcher, "awk") == 0)
{
re_set_syntax(RE_SYNTAX_POSIX_EGREP);
dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
}
else if (strcmp(matcher, "awk") == 0)
{ {
re_set_syntax(RE_SYNTAX_AWK); re_set_syntax(RE_SYNTAX_AWK);
dfasyntax(RE_SYNTAX_AWK, match_icase); dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte);
} }
else else
{ {
re_set_syntax(RE_SYNTAX_EGREP); re_set_syntax (RE_SYNTAX_POSIX_EGREP);
dfasyntax(RE_SYNTAX_EGREP, match_icase); dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
} }
if ((err = re_compile_pattern(pattern, size, &regex)) != 0) if ((err = re_compile_pattern(pattern, size, &regexbuf)) != 0)
fatal(err, 0); fatal(err, 0);
/* In the match_words and match_lines cases, we use a different pattern /* In the match_words and match_lines cases, we use a different pattern
@ -221,7 +216,8 @@ Ecompile(pattern, size)
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$). (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
In the whole-line case, we use the pattern: In the whole-line case, we use the pattern:
^(userpattern)$. ^(userpattern)$.
BUG: Using [A-Za-z_] is locale-dependent! */ BUG: Using [A-Za-z_] is locale-dependent!
so will use the char class */
char *n = malloc(size + 50); char *n = malloc(size + 50);
int i = 0; int i = 0;
@ -231,14 +227,14 @@ Ecompile(pattern, size)
if (match_lines) if (match_lines)
strcpy(n, "^("); strcpy(n, "^(");
if (match_words) if (match_words)
strcpy(n, "(^|[^0-9A-Za-z_])("); strcpy(n, "(^|[^[:alnum:]_])(");
i = strlen(n); i = strlen(n);
memcpy(n + i, pattern, size); memcpy(n + i, pattern, size);
i += size; i += size;
if (match_words) if (match_words)
strcpy(n + i, ")([^0-9A-Za-z_]|$)"); strcpy(n + i, ")([^[:alnum:]_]|$)");
if (match_lines) if (match_lines)
strcpy(n + i, ")$"); strcpy(n + i, ")$");
@ -258,6 +254,7 @@ EGexecute(buf, size, endp)
char **endp; char **endp;
{ {
register char *buflim, *beg, *end, save; register char *buflim, *beg, *end, save;
char eol = eolbyte;
int backref, start, len; int backref, start, len;
struct kwsmatch kwsm; struct kwsmatch kwsm;
static struct re_registers regs; /* This is static on account of a BRAIN-DEAD static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
@ -275,10 +272,10 @@ EGexecute(buf, size, endp)
goto failure; goto failure;
/* Narrow down to the line containing the candidate, and /* Narrow down to the line containing the candidate, and
run it through DFA. */ run it through DFA. */
end = memchr(beg, '\n', buflim - beg); end = memchr(beg, eol, buflim - beg);
if (!end) if (!end)
end = buflim; end = buflim;
while (beg > buf && beg[-1] != '\n') while (beg > buf && beg[-1] != eol)
--beg; --beg;
save = *end; save = *end;
if (kwsm.index < lastexact) if (kwsm.index < lastexact)
@ -302,10 +299,10 @@ EGexecute(buf, size, endp)
if (!beg) if (!beg)
goto failure; goto failure;
/* Narrow down to the line we've found. */ /* Narrow down to the line we've found. */
end = memchr(beg, '\n', buflim - beg); end = memchr(beg, eol, buflim - beg);
if (!end) if (!end)
end = buflim; end = buflim;
while (beg > buf && beg[-1] != '\n') while (beg > buf && beg[-1] != eol)
--beg; --beg;
/* Successful, no backreferences encountered! */ /* Successful, no backreferences encountered! */
if (!backref) if (!backref)
@ -313,8 +310,8 @@ EGexecute(buf, size, endp)
} }
/* If we've made it to this point, this means DFA has seen /* If we've made it to this point, this means DFA has seen
a probable match, and we need to run it through Regex. */ a probable match, and we need to run it through Regex. */
regex.not_eol = 0; regexbuf.not_eol = 0;
if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0) if ((start = re_search(&regexbuf, beg, end - beg, 0, end - beg, &regs)) >= 0)
{ {
len = regs.end[0] - start; len = regs.end[0] - start;
if ((!match_lines && !match_words) if ((!match_lines && !match_words)
@ -337,8 +334,8 @@ EGexecute(buf, size, endp)
{ {
/* Try a shorter length anchored at the same place. */ /* Try a shorter length anchored at the same place. */
--len; --len;
regex.not_eol = 1; regexbuf.not_eol = 1;
len = re_match(&regex, beg, start + len, start, &regs); len = re_match(&regexbuf, beg, start + len, start, &regs);
} }
if (len <= 0) if (len <= 0)
{ {
@ -346,8 +343,8 @@ EGexecute(buf, size, endp)
if (start == end - beg) if (start == end - beg)
break; break;
++start; ++start;
regex.not_eol = 0; regexbuf.not_eol = 0;
start = re_search(&regex, beg, end - beg, start = re_search(&regexbuf, beg, end - beg,
start, end - beg - start, &regs); start, end - beg - start, &regs);
len = regs.end[0] - start; len = regs.end[0] - start;
} }
@ -396,6 +393,7 @@ Fexecute(buf, size, endp)
{ {
register char *beg, *try, *end; register char *beg, *try, *end;
register size_t len; register size_t len;
char eol = eolbyte;
struct kwsmatch kwsmatch; struct kwsmatch kwsmatch;
for (beg = buf; beg <= buf + size; ++beg) for (beg = buf; beg <= buf + size; ++beg)
@ -405,9 +403,9 @@ Fexecute(buf, size, endp)
len = kwsmatch.size[0]; len = kwsmatch.size[0];
if (match_lines) if (match_lines)
{ {
if (beg > buf && beg[-1] != '\n') if (beg > buf && beg[-1] != eol)
continue; continue;
if (beg + len < buf + size && beg[len] != '\n') if (beg + len < buf + size && beg[len] != eol)
continue; continue;
goto success; goto success;
} }
@ -431,7 +429,7 @@ Fexecute(buf, size, endp)
return 0; return 0;
success: success:
if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0) if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0)
++end; ++end;
else else
end = buf + size; end = buf + size;