Add BSD grep to the base system and make it our default grep.

Deliverables: Small and clean code (1,4 KSLOC vs GNU's 8,5 KSLOC),
              lower memory usage than GNU grep, GNU compatibility,
              BSD license.

TODO:         Performance is somewhat behind GNU grep but it is only
              significant for bigger searches.  The reason is complex, the
              most important factor is that GNU grep uses lots of
              optimizations to improve the speed of the regex library.
              First, we need a modern regex library (practically by adopting
              TRE), add support for GNU-style non-standard regexes and then
              reevalute the performance issues and look for bottlenecks.  In
              the meantime, for those, who need better performance, it is
              possible to build GNU grep by setting WITH_GNU_GREP.

Approved by:            delphij (mentor)
Obtained from:          OpenBSD (http://www.openbsd.org/cgi-bin/cvsweb/src/usr.bin/grep/),
                        freegrep (http://github.com/howardjp/freegrep)
Sponsored by:           Google SoC 2008
Portbuild tests run by: kris, pav, erwin
Acknowledgements to:    fjoe (as SoC 2008 mentor),
                        everyone who helped in reviewing and testing
This commit is contained in:
Gabor Kovesdan 2010-07-22 19:11:57 +00:00
parent 582a734e13
commit 4dc88ebedf
19 changed files with 2583 additions and 3 deletions

View File

@ -22,6 +22,18 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
machines to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20100722:
BSD grep has been imported to the base system and it is built by
default. It is completely BSD licensed, highly GNU-compatible, uses
less memory than its GNU counterpart and has a small codebase.
However, it is slower than its GNU counterpart, which is mostly
noticeable for larger searches, for smaller ones it is measurable
but not significant. The reason is complex, the most important factor
is that we lack a modern and efficient regex library and GNU
overcomes this by optimizing the searches internally. Future work
on improving the regex performance is planned, for the meantime,
users that need better performance, can build GNU grep instead by
setting the WITH_GNU_GREP knob.
20100713:
Due to the import of powerpc64 support, all existing powerpc kernel

View File

@ -334,7 +334,6 @@ _clang_no=CLANG
GCOV \
GDB \
GNU \
GNU_GREP \
GPIB \
GROFF \
HTML \
@ -422,6 +421,7 @@ MK_${var}:= yes
BIND_XML \
${_clang_no} \
FDT \
GNU_GREP \
HESIOD \
IDEA
.if defined(WITH_${var}) && defined(WITHOUT_${var})

View File

@ -1,2 +0,0 @@
.\" $FreeBSD$
Set to not build GNU grep as a part of the base system.

View File

@ -0,0 +1,2 @@
.\" $FreeBSD$
Set to build the base system with GNU grep instead of BSD grep

View File

@ -79,6 +79,7 @@ SUBDIR= alias \
getent \
getopt \
${_gprof} \
${_grep} \
gzip \
head \
${_hesinfo} \
@ -284,6 +285,10 @@ _calendar= calendar
_clang= clang
.endif
.if ${MK_GNU_GREP} != "yes"
_grep= grep
.endif
.if ${MK_HESIOD} != "no"
_hesinfo= hesinfo
.endif

35
usr.bin/grep/Makefile Normal file
View File

@ -0,0 +1,35 @@
# $FreeBSD$
# $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
PROG= grep
SRCS= fastgrep.c file.c grep.c queue.c util.c
LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
${BINDIR}/grep ${BINDIR}/fgrep \
${BINDIR}/grep ${BINDIR}/zgrep \
${BINDIR}/grep ${BINDIR}/zegrep \
${BINDIR}/grep ${BINDIR}/zfgrep \
MLINKS= grep.1 egrep.1 \
grep.1 fgrep.1 \
grep.1 zgrep.1 \
grep.1 zegrep.1 \
grep.1 zfgrep.1
WARNS?= 6
LDADD= -lz -lbz2
DPADD= ${LIBZ} ${LIBBZ2}
.if !defined(WITHOUT_GNU_COMPAT)
CFLAGS+= -I/usr/include/gnu
LDADD+= -lgnuregex
DPADD+= ${LIBGNUREGEX}
.endif
.if !defined(WITHOUT_NLS)
.include "${.CURDIR}/nls/Makefile.inc"
.else
CFLAGS+= -DWITHOUT_NLS
.endif
.include <bsd.prog.mk>

333
usr.bin/grep/fastgrep.c Normal file
View File

@ -0,0 +1,333 @@
/* $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* XXX: This file is a speed up for grep to cover the defects of the
* regex library. These optimizations should practically be implemented
* there keeping this code clean. This is a future TODO, but for the
* meantime, we need to use this workaround.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "grep.h"
static int grep_cmp(const unsigned char *, const unsigned char *, size_t);
static void grep_revstr(unsigned char *, int);
void
fgrepcomp(fastgrep_t *fg, const char *pat)
{
unsigned int i;
/* Initialize. */
fg->len = strlen(pat);
fg->bol = false;
fg->eol = false;
fg->reversed = false;
fg->pattern = grep_malloc(strlen(pat) + 1);
strcpy(fg->pattern, pat);
/* Preprocess pattern. */
for (i = 0; i <= UCHAR_MAX; i++)
fg->qsBc[i] = fg->len;
for (i = 1; i < fg->len; i++)
fg->qsBc[fg->pattern[i]] = fg->len - i;
}
/*
* Returns: -1 on failure, 0 on success
*/
int
fastcomp(fastgrep_t *fg, const char *pat)
{
unsigned int i;
int firstHalfDot = -1;
int firstLastHalfDot = -1;
int hasDot = 0;
int lastHalfDot = 0;
int shiftPatternLen;
bool bol = false;
bool eol = false;
/* Initialize. */
fg->len = strlen(pat);
fg->bol = false;
fg->eol = false;
fg->reversed = false;
/* Remove end-of-line character ('$'). */
if (fg->len > 0 && pat[fg->len - 1] == '$') {
eol = true;
fg->eol = true;
fg->len--;
}
/* Remove beginning-of-line character ('^'). */
if (pat[0] == '^') {
bol = true;
fg->bol = true;
fg->len--;
}
if (fg->len >= 14 &&
strncmp(pat + (fg->bol ? 1 : 0), "[[:<:]]", 7) == 0 &&
strncmp(pat + (fg->bol ? 1 : 0) + fg->len - 7, "[[:>:]]", 7) == 0) {
fg->len -= 14;
/* Word boundary is handled separately in util.c */
wflag = true;
}
/*
* Copy pattern minus '^' and '$' characters as well as word
* match character classes at the beginning and ending of the
* string respectively.
*/
fg->pattern = grep_malloc(fg->len + 1);
memcpy(fg->pattern, pat + (bol ? 1 : 0) + wflag, fg->len);
fg->pattern[fg->len] = '\0';
/* Look for ways to cheat...er...avoid the full regex engine. */
for (i = 0; i < fg->len; i++) {
/* Can still cheat? */
if (fg->pattern[i] == '.') {
hasDot = i;
if (i < fg->len / 2) {
if (firstHalfDot < 0)
/* Closest dot to the beginning */
firstHalfDot = i;
} else {
/* Closest dot to the end of the pattern. */
lastHalfDot = i;
if (firstLastHalfDot < 0)
firstLastHalfDot = i;
}
} else {
/* Free memory and let others know this is empty. */
free(fg->pattern);
fg->pattern = NULL;
return (-1);
}
}
/*
* Determine if a reverse search would be faster based on the placement
* of the dots.
*/
if ((!(lflag || cflag)) && ((!(bol || eol)) &&
((lastHalfDot) && ((firstHalfDot < 0) ||
((fg->len - (lastHalfDot + 1)) < (size_t)firstHalfDot)))) &&
!oflag && !color) {
fg->reversed = true;
hasDot = fg->len - (firstHalfDot < 0 ?
firstLastHalfDot : firstHalfDot) - 1;
grep_revstr(fg->pattern, fg->len);
}
/*
* Normal Quick Search would require a shift based on the position the
* next character after the comparison is within the pattern. With
* wildcards, the position of the last dot effects the maximum shift
* distance.
* The closer to the end the wild card is the slower the search. A
* reverse version of this algorithm would be useful for wildcards near
* the end of the string.
*
* Examples:
* Pattern Max shift
* ------- ---------
* this 5
* .his 4
* t.is 3
* th.s 2
* thi. 1
*/
/* Adjust the shift based on location of the last dot ('.'). */
shiftPatternLen = fg->len - hasDot;
/* Preprocess pattern. */
for (i = 0; i <= (signed)UCHAR_MAX; i++)
fg->qsBc[i] = shiftPatternLen;
for (i = hasDot + 1; i < fg->len; i++) {
fg->qsBc[fg->pattern[i]] = fg->len - i;
}
/*
* Put pattern back to normal after pre-processing to allow for easy
* comparisons later.
*/
if (fg->reversed)
grep_revstr(fg->pattern, fg->len);
return (0);
}
int
grep_search(fastgrep_t *fg, unsigned char *data, size_t len, regmatch_t *pmatch)
{
unsigned int j;
int ret = REG_NOMATCH;
if (pmatch->rm_so == (ssize_t)len)
return (ret);
if (fg->bol && pmatch->rm_so != 0) {
pmatch->rm_so = len;
pmatch->rm_eo = len;
return (ret);
}
/* No point in going farther if we do not have enough data. */
if (len < fg->len)
return (ret);
/* Only try once at the beginning or ending of the line. */
if (fg->bol || fg->eol) {
/* Simple text comparison. */
/* Verify data is >= pattern length before searching on it. */
if (len >= fg->len) {
/* Determine where in data to start search at. */
j = fg->eol ? len - fg->len : 0;
if (!((fg->bol && fg->eol) && (len != fg->len)))
if (grep_cmp(fg->pattern, data + j,
fg->len) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->len;
ret = 0;
}
}
} else if (fg->reversed) {
/* Quick Search algorithm. */
j = len;
do {
if (grep_cmp(fg->pattern, data + j - fg->len,
fg->len) == -1) {
pmatch->rm_so = j - fg->len;
pmatch->rm_eo = j;
ret = 0;
break;
}
/* Shift if within bounds, otherwise, we are done. */
if (j == fg->len)
break;
j -= fg->qsBc[data[j - fg->len - 1]];
} while (j >= fg->len);
} else {
/* Quick Search algorithm. */
j = pmatch->rm_so;
do {
if (grep_cmp(fg->pattern, data + j, fg->len) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->len;
ret = 0;
break;
}
/* Shift if within bounds, otherwise, we are done. */
if (j + fg->len == len)
break;
else
j += fg->qsBc[data[j + fg->len]];
} while (j <= (len - fg->len));
}
return (ret);
}
/*
* Returns: i >= 0 on failure (position that it failed)
* -1 on success
*/
static int
grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len)
{
size_t size;
wchar_t *wdata, *wpat;
unsigned int i;
if (iflag) {
if ((size = mbstowcs(NULL, (const char *)data, 0)) ==
((size_t) - 1))
return (-1);
wdata = grep_malloc(size * sizeof(wint_t));
if (mbstowcs(wdata, (const char *)data, size) ==
((size_t) - 1))
return (-1);
if ((size = mbstowcs(NULL, (const char *)pat, 0)) ==
((size_t) - 1))
return (-1);
wpat = grep_malloc(size * sizeof(wint_t));
if (mbstowcs(wpat, (const char *)pat, size) == ((size_t) - 1))
return (-1);
for (i = 0; i < len; i++) {
if ((towlower(wpat[i]) == towlower(wdata[i])) ||
((grepbehave != GREP_FIXED) && wpat[i] == L'.'))
continue;
free(wpat);
free(wdata);
return (i);
}
} else {
for (i = 0; i < len; i++) {
if ((pat[i] == data[i]) || ((grepbehave != GREP_FIXED) &&
pat[i] == '.'))
continue;
return (i);
}
}
return (-1);
}
static void
grep_revstr(unsigned char *str, int len)
{
int i;
char c;
for (i = 0; i < len / 2; i++) {
c = str[i];
str[i] = str[len - i - 1];
str[len - i - 1] = c;
}
}

255
usr.bin/grep/file.c Normal file
View File

@ -0,0 +1,255 @@
/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <bzlib.h>
#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#include <zlib.h>
#include "grep.h"
static char fname[MAXPATHLEN]; /* file name */
#define MAXBUFSIZ (16 * 1024)
#define PREREAD_M 0.2
/* Some global variables for the buffering and reading. */
static char *lnbuf;
static size_t lnbuflen;
static unsigned char *binbuf;
static int binbufsiz;
unsigned char *binbufptr;
static int bzerr;
#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \
(ch != L'\b') && (ch != L'\0'))
/*
* Returns a single character according to the file type.
* Returns -1 on failure.
*/
int
grep_fgetc(struct file *f)
{
unsigned char c;
switch (filebehave) {
case FILE_STDIO:
return (fgetc(f->f));
case FILE_GZIP:
return (gzgetc(f->gzf));
case FILE_BZIP:
BZ2_bzRead(&bzerr, f->bzf, &c, 1);
if (bzerr == BZ_STREAM_END)
return (-1);
else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK)
errx(2, "%s", getstr(2));
return (c);
}
return (-1);
}
/*
* Returns true if the file position is a EOF, returns false
* otherwise.
*/
int
grep_feof(struct file *f)
{
switch (filebehave) {
case FILE_STDIO:
return (feof(f->f));
case FILE_GZIP:
return (gzeof(f->gzf));
case FILE_BZIP:
return (bzerr == BZ_STREAM_END);
}
return (1);
}
/*
* At the first call, fills in an internal buffer and checks if the given
* file is a binary file and sets the binary flag accordingly. Then returns
* a single line and sets len to the length of the returned line.
* At any other call returns a single line either from the internal buffer
* or from the file if the buffer is exhausted and sets len to the length
* of the line.
*/
char *
grep_fgetln(struct file *f, size_t *len)
{
struct stat st;
size_t bufsiz, i = 0;
int ch = 0;
/* Fill in the buffer if it is empty. */
if (binbufptr == NULL) {
/* Only pre-read to the buffer if we need the binary check. */
if (binbehave != BINFILE_TEXT) {
if (f->stdin)
st.st_size = MAXBUFSIZ;
else if (stat(fname, &st) != 0)
err(2, NULL);
bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
(st.st_size / 2) : MAXBUFSIZ;
binbuf = grep_malloc(sizeof(char) * bufsiz);
while (i < bufsiz) {
ch = grep_fgetc(f);
if (ch == EOF)
break;
binbuf[i++] = ch;
}
f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
'\0' : '\200', i - 1) != NULL;
}
binbufsiz = i;
binbufptr = binbuf;
}
/* Read a line whether from the buffer or from the file itself. */
for (i = 0; !(grep_feof(f) &&
(binbufptr == &binbuf[binbufsiz])); i++) {
if (binbufptr == &binbuf[binbufsiz]) {
ch = grep_fgetc(f);
} else {
ch = binbufptr[0];
binbufptr++;
}
if (i >= lnbuflen) {
lnbuflen *= 2;
lnbuf = grep_realloc(lnbuf, ++lnbuflen);
}
if ((ch == '\n') || (ch == EOF)) {
lnbuf[i] = '\0';
break;
} else
lnbuf[i] = ch;
}
if (grep_feof(f) && (i == 0) && (ch != '\n'))
return (NULL);
*len = i;
return (lnbuf);
}
/*
* Opens the standard input for processing.
*/
struct file *
grep_stdin_open(void)
{
struct file *f;
snprintf(fname, sizeof fname, "%s", getstr(1));
f = grep_malloc(sizeof *f);
if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
f->stdin = true;
return (f);
}
free(f);
return (NULL);
}
/*
* Opens a normal, a gzipped or a bzip2 compressed file for processing.
*/
struct file *
grep_open(const char *path)
{
struct file *f;
snprintf(fname, sizeof fname, "%s", path);
f = grep_malloc(sizeof *f);
f->stdin = false;
switch (filebehave) {
case FILE_STDIO:
if ((f->f = fopen(path, "r")) != NULL)
return (f);
break;
case FILE_GZIP:
if ((f->gzf = gzopen(fname, "r")) != NULL)
return (f);
break;
case FILE_BZIP:
if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL)
return (f);
break;
}
free(f);
return (NULL);
}
/*
* Closes a normal, a gzipped or a bzip2 compressed file.
*/
void
grep_close(struct file *f)
{
switch (filebehave) {
case FILE_STDIO:
fclose(f->f);
break;
case FILE_GZIP:
gzclose(f->gzf);
break;
case FILE_BZIP:
BZ2_bzclose(f->bzf);
break;
}
/* Reset read buffer for the file we are closing */
binbufptr = NULL;
free(binbuf);
}

461
usr.bin/grep/grep.1 Normal file
View File

@ -0,0 +1,461 @@
.\" $FreeBSD$
.\" $OpenBSD: grep.1,v 1.38 2010/04/05 06:30:59 jmc Exp $
.\" Copyright (c) 1980, 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)grep.1 8.3 (Berkeley) 4/18/94
.\"
.Dd 19 September, 2009
.Dt GREP 1
.Os
.Sh NAME
.Nm grep , egrep , fgrep ,
.Nm zgrep , zegrep , zfgrep
.Nd file pattern searcher
.Sh SYNOPSIS
.Nm grep
.Bk -words
.Op Fl abcdDEFGHhIiJLlmnOopqRSsUVvwxZ
.Op Fl A Ar num
.Op Fl B Ar num
.Op Fl C Ns Op Ar num
.Op Fl e Ar pattern
.Op Fl f Ar file
.Op Fl Fl binary-files Ns = Ns Ar value
.Op Fl Fl color Ns Op = Ns Ar when
.Op Fl Fl colour Ns Op = Ns Ar when
.Op Fl Fl context Ns Op = Ns Ar num
.Op Fl Fl label
.Op Fl Fl line-buffered
.Op Fl Fl null
.Op Ar pattern
.Op Ar
.Ek
.Sh DESCRIPTION
The
.Nm grep
utility searches any given input files,
selecting lines that match one or more patterns.
By default, a pattern matches an input line if the regular expression
(RE) in the pattern matches the input line
without its trailing newline.
An empty expression matches every line.
Each input line that matches at least one of the patterns is written
to the standard output.
.Pp
.Nm grep
is used for simple patterns and
basic regular expressions
.Pq BREs ;
.Nm egrep
can handle extended regular expressions
.Pq EREs .
See
.Xr re_format 7
for more information on regular expressions.
.Nm fgrep
is quicker than both
.Nm grep
and
.Nm egrep ,
but can only handle fixed patterns
(i.e. it does not interpret regular expressions).
Patterns may consist of one or more lines,
allowing any of the pattern lines to match a portion of the input.
.Pp
.Nm zgrep ,
.Nm zegrep ,
and
.Nm zfgrep
act like
.Nm grep ,
.Nm egrep ,
and
.Nm fgrep ,
respectively, but accept input files compressed with the
.Xr compress 1
or
.Xr gzip 1
compression utilities.
.Pp
The following options are available:
.Bl -tag -width indent
.It Fl A Ar num , Fl Fl after-context Ns = Ns Ar num
Print
.Ar num
lines of trailing context after each match.
See also the
.Fl B
and
.Fl C
options.
.It Fl a , Fl Fl text
Treat all files as ASCII text.
Normally
.Nm
will simply print
.Dq Binary file ... matches
if files contain binary characters.
Use of this option forces
.Nm
to output lines matching the specified pattern.
.It Fl B Ar num , Fl Fl before-context Ns = Ns Ar num
Print
.Ar num
lines of leading context before each match.
See also the
.Fl A
and
.Fl C
options.
.It Fl b , Fl Fl byte-offset
The offset in bytes of a matched pattern is
displayed in front of the respective matched line.
.It Fl C Ns Op Ar num , Fl Fl context Ns = Ns Ar num
Print
.Ar num
lines of leading and trailing context surrounding each match.
The default is 2 and is equivalent to
.Fl A
.Ar 2
.Fl B
.Ar 2 .
Note:
no whitespace may be given between the option and its argument.
.It Fl c , Fl Fl count
Only a count of selected lines is written to standard output.
.It Fl Fl colour Ns = Ns Op Ar when , Fl Fl color Ns = Ns Op Ar when
Mark up the matching text with the expression stored in
.Ev GREP_COLOR
environment variable.
The possible values of when can be `never', `always' or `auto'.
.It Fl D Ar action , Fl Fl devices Ns = Ns Ar action
Specify the demanded action for devices, FIFOs and sockets.
The default action is `read', which means, that they are read
as if they were normal files.
If the action is set to `skip', devices will be silently skipped.
.It Fl d Ar action , Fl Fl directories Ns = Ns Ar action
Specify the demanded action for directories.
It is `read' by default, which means that the directories
are read in the same manner as normal files.
Other possible values are `skip' to silently ignore the
directories, and `recurse' to read them recursively, which
has the same effect as the
.Fl R
and
.Fl r
option.
.It Fl E , Fl Fl extended-regexp
Interpret
.Ar pattern
as an extended regular expression
(i.e. force
.Nm grep
to behave as
.Nm egrep ) .
.It Fl e Ar pattern , Fl Fl regexp Ns = Ns Ar pattern
Specify a pattern used during the search of the input:
an input line is selected if it matches any of the specified patterns.
This option is most useful when multiple
.Fl e
options are used to specify multiple patterns,
or when a pattern begins with a dash
.Pq Sq - .
.It Fl Fl exclude
If
.Fl R
is specified, it excludes files matching the given
filename pattern.
.It Fl Fl exclude-dir
If
.Fl R
is specified, it excludes directories matching the
given filename pattern.
.It Fl F , Fl Fl fixed-strings
Interpret
.Ar pattern
as a set of fixed strings
(i.e. force
.Nm grep
to behave as
.Nm fgrep ) .
.It Fl f Ar file , Fl Fl file Ns = Ns Ar file
Read one or more newline separated patterns from
.Ar file .
Empty pattern lines match every input line.
Newlines are not considered part of a pattern.
If
.Ar file
is empty, nothing is matched.
.It Fl G , Fl Fl basic-regexp
Interpret
.Ar pattern
as a basic regular expression
(i.e. force
.Nm grep
to behave as traditional
.Nm grep ) .
.It Fl H
Always print filename headers with output lines.
.It Fl h , Fl Fl no-filename
Never print filename headers
.Pq i.e. filenames
with output lines.
.It Fl Fl help
Print a brief help message.
.It Fl I
Ignore binary files.
This option is equivalent to
.Fl Fl binary-file Ns = Ns Ar without-match
option.
.It Fl i , Fl Fl ignore-case
Perform case insensitive matching.
By default,
.Nm grep
is case sensitive.
.It Fl Fl include
If
.Fl R
is specified, it includes the files matching the
given filename pattern.
.It Fl Fl include-dir
If
.Fl R
is specified, it includes the directories matching the
given filename pattern.
.It Fl J, Fl Fl bz2decompress
Decompress the
.Xr bzip2 1
compressed file before looking for the text.
.It Fl L , Fl Fl files-without-match
Only the names of files not containing selected lines are written to
standard output.
Pathnames are listed once per file searched.
If the standard input is searched, the string
.Dq (standard input)
is written.
.It Fl l , Fl Fl files-with-matches
Only the names of files containing selected lines are written to
standard output.
.Nm grep
will only search a file until a match has been found,
making searches potentially less expensive.
Pathnames are listed once per file searched.
If the standard input is searched, the string
.Dq (standard input)
is written.
.It Fl Fl mmap
Use
.Xr mmap 2
instead of
.Xr read 2
to read input, which can result in better performance under some
circumstances but can cause undefined behaiour.
.It Fl m Ar num, Fl Fl max-count Ns = Ns Ar num
Stop reading the file after
.Ar num
matches.
.It Fl n , Fl Fl line-number
Each output line is preceded by its relative line number in the file,
starting at line 1.
The line number counter is reset for each file processed.
This option is ignored if
.Fl c ,
.Fl L ,
.Fl l ,
or
.Fl q
is
specified.
.It Fl Fl null
Prints a zero-byte after the file name.
.It Fl O
If
.Fl R
is specified, follow symbolic links only if they were explicitly listed
on the command line.
The default is not to follow symbolic links.
.It Fl o, Fl Fl only-matching
Prints only the matching part of the lines.
.It Fl p
If
.Fl R
is specified, no symbolic links are followed.
This is the default.
.It Fl q , Fl Fl quiet , Fl Fl silent
Quiet mode:
suppress normal output.
.Nm grep
will only search a file until a match has been found,
making searches potentially less expensive.
.It Fl R , Fl r , Fl Fl recursive
Recursively search subdirectories listed.
.It Fl S
If
.Fl R
is specified, all symbolic links are followed.
The default is not to follow symbolic links.
.It Fl s , Fl Fl no-messages
Silent mode.
Nonexistent and unreadable files are ignored
(i.e. their error messages are suppressed).
.It Fl U , Fl Fl binary
Search binary files, but do not attempt to print them.
.It Fl V , Fl Fl version
Display version information and exit.
.It Fl v , Fl Fl invert-match
Selected lines are those
.Em not
matching any of the specified patterns.
.It Fl w , Fl Fl word-regexp
The expression is searched for as a word (as if surrounded by
.Sq [[:<:]]
and
.Sq [[:>:]] ;
see
.Xr re_format 7 ) .
.It Fl x , Fl Fl line-regexp
Only input lines selected against an entire fixed string or regular
expression are considered to be matching lines.
.It Fl y
Equivalent to
.Fl i .
Obsoleted.
.It Fl Z , Fl z , Fl Fl decompress
Force
.Nm grep
to behave as
.Nm zgrep .
.It Fl Fl binary-files Ns = Ns Ar value
Controls searching and printing of binary files.
Options are
.Ar binary ,
the default: search binary files but do not print them;
.Ar without-match :
do not search binary files;
and
.Ar text :
treat all files as text.
.Sm off
.It Fl Fl context Op = Ar num
.Sm on
Print
.Ar num
lines of leading and trailing context.
The default is 2.
.It Fl Fl line-buffered
Force output to be line buffered.
By default, output is line buffered when standard output is a terminal
and block buffered otherwise.
.Pp
.El
If no file arguments are specified, the standard input is used.
.Sh RETURN VALUES
The
.Nm grep
utility exits with one of the following values:
.Pp
.Bl -tag -width flag -compact
.It Li 0
One or more lines were selected.
.It Li 1
No lines were selected.
.It Li \*(Gt1
An error occurred.
.El
.Sh EXAMPLES
To find all occurrences of the word
.Sq patricia
in a file:
.Pp
.Dl $ grep 'patricia' myfile
.Pp
To find all occurrences of the pattern
.Ql .Pp
at the beginning of a line:
.Pp
.Dl $ grep '^\e.Pp' myfile
.Pp
The apostrophes ensure the entire expression is evaluated by
.Nm grep
instead of by the user's shell.
The caret
.Ql ^
matches the null string at the beginning of a line,
and the
.Ql \e
escapes the
.Ql \&. ,
which would otherwise match any character.
.Pp
To find all lines in a file which do not contain the words
.Sq foo
or
.Sq bar :
.Pp
.Dl $ grep -v -e 'foo' -e 'bar' myfile
.Pp
A simple example of an extended regular expression:
.Pp
.Dl $ egrep '19|20|25' calendar
.Pp
Peruses the file
.Sq calendar
looking for either 19, 20, or 25.
.Sh SEE ALSO
.Xr ed 1 ,
.Xr ex 1 ,
.Xr gzip 1 ,
.Xr sed 1 ,
.Xr re_format 7
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.
.Pp
The flags
.Op Fl AaBbCDdGHhIJLmoPRSUVwZ
are extensions to that specification, and the behaviour of the
.Fl f
flag when used with an empty pattern file is left undefined.
.Pp
All long options are provided for compatibility with
GNU versions of this utility.
.Pp
Historic versions of the
.Nm grep
utility also supported the flags
.Op Fl ruy .
This implementation supports those options;
however, their use is strongly discouraged.
.Sh HISTORY
The
.Nm grep
command first appeared in
.At v6 .

667
usr.bin/grep/grep.c Normal file
View File

@ -0,0 +1,667 @@
/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <getopt.h>
#include <limits.h>
#include <libgen.h>
#include <locale.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "grep.h"
#ifndef WITHOUT_NLS
#include <nl_types.h>
nl_catd catalog;
#endif
/*
* Default messags to use when NLS is disabled or no catalogue
* is found.
*/
const char *errstr[] = {
"",
/* 1*/ "(standard input)",
/* 2*/ "cannot read bzip2 compressed file",
/* 3*/ "unknown --color option",
/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
/* 7*/ "\t[--null] [pattern] [file ...]\n",
/* 8*/ "unknown --binary-files option",
/* 9*/ "Binary file %s matches\n",
/*10*/ "%s (BSD grep) %s\n",
};
/* Flags passed to regcomp() and regexec() */
int cflags = 0;
int eflags = REG_STARTEND;
/* Shortcut for matching all cases like empty regex */
bool matchall;
/* Searching patterns */
unsigned int patterns, pattern_sz;
char **pattern;
regex_t *r_pattern;
fastgrep_t *fg_pattern;
/* Filename exclusion/inclusion patterns */
unsigned int epatterns, epattern_sz;
struct epat *epattern;
/* For regex errors */
char re_error[RE_ERROR_BUF + 1];
/* Command-line flags */
unsigned long long Aflag; /* -A x: print x lines trailing each match */
unsigned long long Bflag; /* -B x: print x lines leading each match */
bool Hflag; /* -H: always print file name */
bool Lflag; /* -L: only show names of files with no matches */
bool bflag; /* -b: show block numbers for each match */
bool cflag; /* -c: only show a count of matching lines */
bool hflag; /* -h: don't print filename headers */
bool iflag; /* -i: ignore case */
bool lflag; /* -l: only show names of files with matches */
bool mflag; /* -m x: stop reading the files after x matches */
unsigned long long mcount; /* count for -m */
bool nflag; /* -n: show line numbers in front of matching lines */
bool oflag; /* -o: print only matching part */
bool qflag; /* -q: quiet mode (don't output anything) */
bool sflag; /* -s: silent mode (ignore errors) */
bool vflag; /* -v: only show non-matching lines */
bool wflag; /* -w: pattern must start and end on word boundaries */
bool xflag; /* -x: pattern must match entire line */
bool lbflag; /* --line-buffered */
bool nullflag; /* --null */
bool exclflag; /* --exclude */
char *label; /* --label */
char *color; /* --color */
int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
int devbehave = DEV_GREP; /* -D: handling of devices */
int dirbehave = DIR_GREP; /* -dRr: handling of directories */
int linkbehave = LINK_GREP; /* -OpS: handling of symlinks */
enum {
BIN_OPT = CHAR_MAX + 1,
COLOR_OPT,
HELP_OPT,
MMAP_OPT,
LINEBUF_OPT,
LABEL_OPT,
NULL_OPT,
R_EXCLUDE_OPT,
R_INCLUDE_OPT,
R_DEXCLUDE_OPT,
R_DINCLUDE_OPT
};
/* Housekeeping */
bool first = true; /* flag whether we are processing the first match */
bool prev; /* flag whether or not the previous line matched */
int tail; /* lines left to print */
bool notfound; /* file not found */
extern char *__progname;
/*
* Prints usage information and returns 2.
*/
static void
usage(void)
{
fprintf(stderr, getstr(4), __progname);
fprintf(stderr, "%s", getstr(5));
fprintf(stderr, "%s", getstr(5));
fprintf(stderr, "%s", getstr(6));
fprintf(stderr, "%s", getstr(7));
exit(2);
}
static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
struct option long_options[] =
{
{"binary-files", required_argument, NULL, BIN_OPT},
{"help", no_argument, NULL, HELP_OPT},
{"mmap", no_argument, NULL, MMAP_OPT},
{"line-buffered", no_argument, NULL, LINEBUF_OPT},
{"label", required_argument, NULL, LABEL_OPT},
{"null", no_argument, NULL, NULL_OPT},
{"color", optional_argument, NULL, COLOR_OPT},
{"colour", optional_argument, NULL, COLOR_OPT},
{"exclude", required_argument, NULL, R_EXCLUDE_OPT},
{"include", required_argument, NULL, R_INCLUDE_OPT},
{"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
{"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
{"after-context", required_argument, NULL, 'A'},
{"text", no_argument, NULL, 'a'},
{"before-context", required_argument, NULL, 'B'},
{"byte-offset", no_argument, NULL, 'b'},
{"context", optional_argument, NULL, 'C'},
{"count", no_argument, NULL, 'c'},
{"devices", required_argument, NULL, 'D'},
{"directories", required_argument, NULL, 'd'},
{"extended-regexp", no_argument, NULL, 'E'},
{"regexp", required_argument, NULL, 'e'},
{"fixed-strings", no_argument, NULL, 'F'},
{"file", required_argument, NULL, 'f'},
{"basic-regexp", no_argument, NULL, 'G'},
{"no-filename", no_argument, NULL, 'h'},
{"with-filename", no_argument, NULL, 'H'},
{"ignore-case", no_argument, NULL, 'i'},
{"bz2decompress", no_argument, NULL, 'J'},
{"files-with-matches", no_argument, NULL, 'l'},
{"files-without-match", no_argument, NULL, 'L'},
{"max-count", required_argument, NULL, 'm'},
{"line-number", no_argument, NULL, 'n'},
{"only-matching", no_argument, NULL, 'o'},
{"quiet", no_argument, NULL, 'q'},
{"silent", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'},
{"no-messages", no_argument, NULL, 's'},
{"binary", no_argument, NULL, 'U'},
{"unix-byte-offsets", no_argument, NULL, 'u'},
{"invert-match", no_argument, NULL, 'v'},
{"version", no_argument, NULL, 'V'},
{"word-regexp", no_argument, NULL, 'w'},
{"line-regexp", no_argument, NULL, 'x'},
{"decompress", no_argument, NULL, 'Z'},
{NULL, no_argument, NULL, 0}
};
/*
* Adds a searching pattern to the internal array.
*/
static void
add_pattern(char *pat, size_t len)
{
/* Check if we can do a shortcut */
if (len == 0 || matchall) {
matchall = true;
return;
}
/* Increase size if necessary */
if (patterns == pattern_sz) {
pattern_sz *= 2;
pattern = grep_realloc(pattern, ++pattern_sz *
sizeof(*pattern));
}
if (len > 0 && pat[len - 1] == '\n')
--len;
/* pat may not be NUL-terminated */
pattern[patterns] = grep_malloc(len + 1);
memcpy(pattern[patterns], pat, len);
pattern[patterns][len] = '\0';
++patterns;
}
/*
* Adds an include/exclude pattern to the internal array.
*/
static void
add_epattern(char *pat, size_t len, int type, int mode)
{
/* Increase size if necessary */
if (epatterns == epattern_sz) {
epattern_sz *= 2;
epattern = grep_realloc(epattern, ++epattern_sz *
sizeof(struct epat));
}
if (len > 0 && pat[len - 1] == '\n')
--len;
epattern[epatterns].pat = grep_malloc(len + 1);
memcpy(epattern[epatterns].pat, pat, len);
epattern[epatterns].pat[len] = '\0';
epattern[epatterns].type = type;
epattern[epatterns].mode = mode;
++epatterns;
}
/*
* Reads searching patterns from a file and adds them with add_pattern().
*/
static void
read_patterns(const char *fn)
{
FILE *f;
char *line;
size_t len;
if ((f = fopen(fn, "r")) == NULL)
err(2, "%s", fn);
while ((line = fgetln(f, &len)) != NULL)
add_pattern(line, *line == '\n' ? 0 : len);
if (ferror(f))
err(2, "%s", fn);
fclose(f);
}
int
main(int argc, char *argv[])
{
char **aargv, **eargv, *eopts;
char *ep;
unsigned long long l;
unsigned int aargc, eargc, i;
int c, lastc, needpattern, newarg, prevoptind;
setlocale(LC_ALL, "");
#ifndef WITHOUT_NLS
catalog = catopen("grep", NL_CAT_LOCALE);
#endif
/* Check what is the program name of the binary. In this
way we can have all the funcionalities in one binary
without the need of scripting and using ugly hacks. */
switch (__progname[0]) {
case 'e':
grepbehave = GREP_EXTENDED;
break;
case 'f':
grepbehave = GREP_FIXED;
break;
case 'g':
grepbehave = GREP_BASIC;
break;
case 'z':
filebehave = FILE_GZIP;
switch(__progname[1]) {
case 'e':
grepbehave = GREP_EXTENDED;
break;
case 'f':
grepbehave = GREP_FIXED;
break;
case 'g':
grepbehave = GREP_BASIC;
break;
}
break;
}
lastc = '\0';
newarg = 1;
prevoptind = 1;
needpattern = 1;
eopts = getenv("GREP_OPTIONS");
eargc = 1;
if (eopts != NULL) {
char *str;
for(i = 0; i < strlen(eopts); i++)
if (eopts[i] == ' ')
eargc++;
eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
str = strtok(eopts, " ");
eargc = 0;
while(str != NULL) {
eargv[++eargc] = (char *)grep_malloc(sizeof(char) *
(strlen(str) + 1));
strlcpy(eargv[eargc], str, strlen(str) + 1);
str = strtok(NULL, " ");
}
eargv[++eargc] = NULL;
aargv = (char **)grep_malloc(sizeof(char *) *
(eargc + argc + 1));
aargv[0] = argv[0];
for(i = 1; i < eargc; i++)
aargv[i] = eargv[i];
for(int j = 1; j < argc; j++)
aargv[i++] = argv[j];
aargc = eargc + argc - 1;
} else {
aargv = argv;
aargc = argc;
}
while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
-1)) {
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (newarg || !isdigit(lastc))
Aflag = 0;
else if (Aflag > LLONG_MAX / 10) {
errno = ERANGE;
err(2, NULL);
}
Aflag = Bflag = (Aflag * 10) + (c - '0');
break;
case 'C':
if (optarg == NULL) {
Aflag = Bflag = 2;
break;
}
/* FALLTHROUGH */
case 'A':
/* FALLTHROUGH */
case 'B':
errno = 0;
l = strtoull(optarg, &ep, 10);
if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
((errno == EINVAL) && (l == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
errno = EINVAL;
err(2, NULL);
}
if (c == 'A')
Aflag = l;
else if (c == 'B')
Bflag = l;
else
Aflag = Bflag = l;
break;
case 'a':
binbehave = BINFILE_TEXT;
break;
case 'b':
bflag = true;
break;
case 'c':
cflag = true;
break;
case 'D':
if (strcmp(optarg, "skip") == 0)
devbehave = DEV_SKIP;
break;
case 'd':
if (strcmp("recurse", optarg) == 0) {
Hflag = true;
dirbehave = DIR_RECURSE;
} else if (strcmp("skip", optarg) == 0)
dirbehave = DIR_SKIP;
else if (strcmp("read", optarg) != 0) {
errno = EINVAL;
err(2, NULL);
}
break;
case 'E':
grepbehave = GREP_EXTENDED;
break;
case 'e':
add_pattern(optarg, strlen(optarg));
needpattern = 0;
break;
case 'F':
grepbehave = GREP_FIXED;
break;
case 'f':
read_patterns(optarg);
needpattern = 0;
break;
case 'G':
grepbehave = GREP_BASIC;
break;
case 'H':
Hflag = true;
break;
case 'h':
Hflag = false;
hflag = true;
break;
case 'I':
binbehave = BINFILE_SKIP;
break;
case 'i':
case 'y':
iflag = true;
cflags |= REG_ICASE;
break;
case 'J':
filebehave = FILE_BZIP;
break;
case 'L':
lflag = false;
Lflag = qflag = true;
break;
case 'l':
Lflag = false;
lflag = qflag = true;
break;
case 'm':
mflag = true;
errno = 0;
mcount = strtoull(optarg, &ep, 10);
if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
((errno == EINVAL) && (mcount == 0)))
err(2, NULL);
else if (ep[0] != '\0') {
errno = EINVAL;
err(2, NULL);
}
break;
case 'n':
nflag = true;
break;
case 'O':
linkbehave = LINK_EXPLICIT;
break;
case 'o':
oflag = true;
break;
case 'p':
linkbehave = LINK_SKIP;
break;
case 'q':
qflag = true;
break;
case 'S':
linkbehave = LINK_GREP;
break;
case 'R':
case 'r':
dirbehave = DIR_RECURSE;
Hflag = true;
break;
case 's':
sflag = true;
break;
case 'U':
binbehave = BINFILE_BIN;
break;
case 'u':
case MMAP_OPT:
/* noop, compatibility */
break;
case 'V':
printf(getstr(10), __progname, VERSION);
exit(0);
case 'v':
vflag = true;
break;
case 'w':
wflag = true;
break;
case 'x':
xflag = true;
break;
case 'Z':
filebehave = FILE_GZIP;
break;
case BIN_OPT:
if (strcmp("binary", optarg) == 0)
binbehave = BINFILE_BIN;
else if (strcmp("without-match", optarg) == 0)
binbehave = BINFILE_SKIP;
else if (strcmp("text", optarg) == 0)
binbehave = BINFILE_TEXT;
else
errx(2, "%s", getstr(8));
break;
case COLOR_OPT:
if (optarg == NULL || strcmp("auto", optarg) == 0 ||
strcmp("always", optarg) == 0 ) {
color = getenv("GREP_COLOR");
if (color == NULL) {
color = grep_malloc(sizeof(char) * 6);
strcpy(color, "01;31");
}
} else if (strcmp("never", optarg) == 0)
color = NULL;
else
errx(2, "%s", getstr(3));
break;
case LABEL_OPT:
label = optarg;
break;
case LINEBUF_OPT:
lbflag = true;
break;
case NULL_OPT:
nullflag = true;
break;
case R_INCLUDE_OPT:
exclflag = true;
add_epattern(basename(optarg), strlen(basename(optarg)),
FILE_PAT, INCL_PAT);
break;
case R_EXCLUDE_OPT:
exclflag = true;
add_epattern(basename(optarg), strlen(basename(optarg)),
FILE_PAT, EXCL_PAT);
break;
case R_DINCLUDE_OPT:
exclflag = true;
add_epattern(basename(optarg), strlen(basename(optarg)),
DIR_PAT, INCL_PAT);
break;
case R_DEXCLUDE_OPT:
exclflag = true;
add_epattern(basename(optarg), strlen(basename(optarg)),
DIR_PAT, EXCL_PAT);
break;
case HELP_OPT:
default:
usage();
}
lastc = c;
newarg = optind != prevoptind;
prevoptind = optind;
}
aargc -= optind;
aargv += optind;
/* Fail if we don't have any pattern */
if (aargc == 0 && needpattern)
usage();
/* Process patterns from command line */
if (aargc != 0 && needpattern) {
add_pattern(*aargv, strlen(*aargv));
--aargc;
++aargv;
}
switch (grepbehave) {
case GREP_FIXED:
case GREP_BASIC:
break;
case GREP_EXTENDED:
cflags |= REG_EXTENDED;
break;
default:
/* NOTREACHED */
usage();
}
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
/*
* XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
* Optimizations should be done there.
*/
/* Check if cheating is allowed (always is for fgrep). */
if (grepbehave == GREP_FIXED) {
for (i = 0; i < patterns; ++i)
fgrepcomp(&fg_pattern[i], pattern[i]);
} else {
for (i = 0; i < patterns; ++i) {
if (fastcomp(&fg_pattern[i], pattern[i])) {
/* Fall back to full regex library */
c = regcomp(&r_pattern[i], pattern[i], cflags);
if (c != 0) {
regerror(c, &r_pattern[i], re_error,
RE_ERROR_BUF);
errx(2, "%s", re_error);
}
}
}
}
if (lbflag)
setlinebuf(stdout);
if ((aargc == 0 || aargc == 1) && !Hflag)
hflag = true;
if (aargc == 0)
exit(!procfile("-"));
if (dirbehave == DIR_RECURSE)
c = grep_tree(aargv);
else
for (c = 0; aargc--; ++aargv)
c+= procfile(*aargv);
#ifndef WITHOUT_NLS
catclose(catalog);
#endif
/* Find out the correct return value according to the
results and the command line option. */
exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
}

162
usr.bin/grep/grep.h Normal file
View File

@ -0,0 +1,162 @@
/* $OpenBSD: grep.h,v 1.15 2010/04/05 03:03:55 tedu Exp $ */
/* $FreeBSD$ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (c) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <bzlib.h>
#include <limits.h>
#include <regex.h>
#include <stdbool.h>
#include <stdio.h>
#include <zlib.h>
#ifdef WITHOUT_NLS
#define getstr(n) errstr[n]
#else
#include <nl_types.h>
extern nl_catd catalog;
#define getstr(n) catgets(catalog, 1, n, errstr[n])
#endif
extern const char *errstr[];
#define VERSION "2.5.1-FreeBSD"
#define GREP_FIXED 0
#define GREP_BASIC 1
#define GREP_EXTENDED 2
#define BINFILE_BIN 0
#define BINFILE_SKIP 1
#define BINFILE_TEXT 2
#define FILE_STDIO 0
#define FILE_GZIP 1
#define FILE_BZIP 2
#define DIR_GREP 0
#define DIR_SKIP 1
#define DIR_RECURSE 2
#define DEV_GREP 0
#define DEV_SKIP 1
#define LINK_GREP 0
#define LINK_EXPLICIT 1
#define LINK_SKIP 2
#define FILE_PAT 0
#define DIR_PAT 1
#define EXCL_PAT 0
#define INCL_PAT 1
#define MAX_LINE_MATCHES 32
struct file {
struct mmfile *mmf;
BZFILE *bzf;
FILE *f;
gzFile *gzf;
bool binary;
bool stdin;
};
struct str {
off_t off;
size_t len;
char *dat;
char *file;
int line_no;
};
struct epat {
char *pat;
int mode;
int type;
};
typedef struct {
size_t len;
unsigned char *pattern;
int qsBc[UCHAR_MAX + 1];
/* flags */
bool bol;
bool eol;
bool reversed;
} fastgrep_t;
/* Flags passed to regcomp() and regexec() */
extern int cflags, eflags;
/* Command line flags */
extern bool Eflag, Fflag, Gflag, Hflag, Lflag,
bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag,
qflag, sflag, vflag, wflag, xflag;
extern bool exclflag, nullflag;
extern unsigned long long Aflag, Bflag, mcount;
extern char *color, *label;
extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave;
extern bool first, matchall, notfound, prev;
extern int tail;
extern unsigned int epatterns, patterns;
extern char **pattern;
extern struct epat *epattern;
extern regex_t *er_pattern, *r_pattern;
extern fastgrep_t *fg_pattern;
/* For regex errors */
#define RE_ERROR_BUF 512
extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */
/* util.c */
int procfile(const char *fn);
int grep_tree(char **argv);
void *grep_malloc(size_t size);
void *grep_calloc(size_t nmemb, size_t size);
void *grep_realloc(void *ptr, size_t size);
void printline(struct str *line, int sep, regmatch_t *matches, int m);
/* queue.c */
void enqueue(struct str *x);
void printqueue(void);
void clearqueue(void);
/* file.c */
void grep_close(struct file *f);
struct file *grep_stdin_open(void);
struct file *grep_open(const char *path);
int grep_feof(struct file *f);
int grep_fgetc(struct file *f);
char *grep_fgetln(struct file *f, size_t *len);
/* fastgrep.c */
int fastcomp(fastgrep_t *, const char *);
void fgrepcomp(fastgrep_t *, const char *);
int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *);

14
usr.bin/grep/nls/C.msg Normal file
View File

@ -0,0 +1,14 @@
$ $FreeBSD$
$
$set 1
$quote "
1 "(standard input)"
2 "cannot read bzip2 compressed file"
3 "unknown --color option"
4 "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
5 "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n"
6 "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n"
7 "\t[--null] [pattern] [file ...]\n"
8 "unknown --binary-files option"
9 "Binary file %s matches\n"
10 "%s (BSD grep) %s\n"

View File

@ -0,0 +1,12 @@
# $FreeBSD$
NLSNAME= grep
NLS= es_ES.ISO8859-1
NLS+= gl_ES.ISO8859-1
NLS+= hu_HU.ISO8859-2
NLS+= pt_BR.ISO8859-1
NLSSRCDIR= ${.CURDIR}/nls
.for lang in ${NLS}
NLSSRCFILES_${lang}=${lang}.msg
.endfor

View File

@ -0,0 +1,14 @@
$ $FreeBSD$
$
$set 1
$quote "
1 "(entrada estándar)"
2 "no se puede leer el fichero comprimido bzip2"
3 "opción desconocida de --color"
4 "uso: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A no] [-B no] [-C[no]]\n"
5 "\t[-e pauta] [-f fichero] [--binary-files=valor] [--color=cuando]\n"
6 "\t[--context[=no]] [--directories=acción] [--label] [--line-buffered]\n"
7 "\t[--null] [pauta] [fichero ...]\n"
8 "opción desconocida de --binary-files"
9 "fichero binario %s se ajusta\n"
10 "%s (BSD grep) %s\n"

View File

@ -0,0 +1,14 @@
$ $FreeBSD$
$
$set 1
$quote "
1 "(entrada estándar)"
2 "non se pode ler o ficheiro comprimido bzip2"
3 "opción descoñecida de --color"
4 "uso: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A no] [-B no] [-C[no]]\n"
5 "\t[-e pauta] [-f ficheiro] [--binary-files=valor] [--color=cando]\n"
6 "\t[--context[=no]] [--directories=acción] [--label] [--line-buffered]\n"
7 "\t[--null] [pauta] [ficheiro ...]\n"
8 "opción descoñecida de --binary-files"
9 "ficheiro binario %s conforma\n"
10 "%s (BSD grep) %s\n"

View File

@ -0,0 +1,14 @@
$ $FreeBSD$
$
$set 1
$quote "
1 "(szabványos bemenet)"
2 "bzip2 tömörített fájl nem olvasható"
3 "ismeretlen --color opció"
4 "használat: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A szám] [-B szám] [-C[szám]]\n"
5 "\t[-e minta] [-f fájl] [--binary-files=érték] [--color=mikor]\n"
6 "\t[--context[=szám]] [--directories=művelet] [--label] [--line-buffered]\n"
7 "\t[--null] [minta] [fájl ...]\n"
8 "ismeretlen --binary-files opció"
9 "%s bináris fájl illeszkedik\n"
10 "%s (BSD grep) %s\n"

View File

@ -0,0 +1,14 @@
$ $FreeBSD$
$
$set 1
$quote "
1 "(entrada padrão)"
2 "não se posso ler o fichero comprimido bzip2"
3 "opcão não conhecida de --color"
4 "uso: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
5 "\t[-e padrão] [-f arquivo] [--binary-files=valor] [--color=quando]\n"
6 "\t[--context[=num]] [--directories=ação] [--label] [--line-buffered]\n"
7 "\t[--null] [padrão] [arquivo ...]\n"
8 "opcão não conhecida de --binary-files"
9 "arquivo binário %s casa com o padrão\n"
10 "%s (BSD grep) %s\n"

104
usr.bin/grep/queue.c Normal file
View File

@ -0,0 +1,104 @@
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* A really poor man's queue. It does only what it has to and gets out of
* Dodge. It is used in place of <sys/queue.h> to get a better performance.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
#include <stdlib.h>
#include <string.h>
#include "grep.h"
struct qentry {
STAILQ_ENTRY(qentry) list;
struct str data;
};
static STAILQ_HEAD(, qentry) queue = STAILQ_HEAD_INITIALIZER(queue);
static unsigned long long count;
static struct qentry *dequeue(void);
void
enqueue(struct str *x)
{
struct qentry *item;
item = grep_malloc(sizeof(struct qentry));
item->data.dat = grep_malloc(sizeof(char) * x->len);
item->data.len = x->len;
item->data.line_no = x->line_no;
item->data.off = x->off;
memcpy(item->data.dat, x->dat, x->len);
item->data.file = x->file;
STAILQ_INSERT_TAIL(&queue, item, list);
if (++count > Bflag)
free(dequeue());
}
static struct qentry *
dequeue(void)
{
struct qentry *item;
item = STAILQ_FIRST(&queue);
if (item == NULL)
return (NULL);
STAILQ_REMOVE_HEAD(&queue, list);
--count;
return (item);
}
void
printqueue(void)
{
struct qentry *item;
while ((item = dequeue()) != NULL) {
printline(&item->data, '-', (regmatch_t *)NULL, 0);
free(item);
}
}
void
clearqueue(void)
{
struct qentry *item;
while ((item = dequeue()) != NULL)
free(item);
}

464
usr.bin/grep/util.c Normal file
View File

@ -0,0 +1,464 @@
/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fnmatch.h>
#include <fts.h>
#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include <wctype.h>
#include "grep.h"
static int linesqueued;
static int procline(struct str *l, int);
/*
* Processes a directory when a recursive search is performed with
* the -R option. Each appropriate file is passed to procfile().
*/
int
grep_tree(char **argv)
{
FTS *fts;
FTSENT *p;
char *d, *dir;
unsigned int i;
int c, fts_flags;
bool ok;
c = fts_flags = 0;
switch(linkbehave) {
case LINK_EXPLICIT:
fts_flags = FTS_COMFOLLOW;
break;
case LINK_SKIP:
fts_flags = FTS_PHYSICAL;
break;
default:
fts_flags = FTS_LOGICAL;
}
fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
if (!(fts = fts_open(argv, fts_flags, NULL)))
err(2, NULL);
while ((p = fts_read(fts)) != NULL) {
switch (p->fts_info) {
case FTS_DNR:
/* FALLTHROUGH */
case FTS_ERR:
errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
break;
case FTS_D:
/* FALLTHROUGH */
case FTS_DP:
break;
case FTS_DC:
/* Print a warning for recursive directory loop */
warnx("warning: %s: recursive directory loop",
p->fts_path);
break;
default:
/* Check for file exclusion/inclusion */
ok = true;
if (exclflag) {
d = strrchr(p->fts_path, '/');
dir = grep_malloc(sizeof(char) *
(d - p->fts_path + 2));
strlcpy(dir, p->fts_path,
(d - p->fts_path + 1));
for (i = 0; i < epatterns; ++i) {
switch(epattern[i].type) {
case FILE_PAT:
if (fnmatch(epattern[i].pat,
basename(p->fts_path), 0) == 0)
ok = epattern[i].mode != EXCL_PAT;
break;
case DIR_PAT:
if (strstr(dir,
epattern[i].pat) != NULL)
ok = epattern[i].mode != EXCL_PAT;
break;
}
}
free(dir);
}
if (ok)
c += procfile(p->fts_path);
break;
}
}
return (c);
}
/*
* Opens a file and processes it. Each file is processed line-by-line
* passing the lines to procline().
*/
int
procfile(const char *fn)
{
struct file *f;
struct stat sb;
struct str ln;
mode_t s;
int c, t;
if (mflag && (mcount <= 0))
return (0);
if (strcmp(fn, "-") == 0) {
fn = label != NULL ? label : getstr(1);
f = grep_stdin_open();
} else {
if (!stat(fn, &sb)) {
/* Check if we need to process the file */
s = sb.st_mode & S_IFMT;
if (s == S_IFDIR && dirbehave == DIR_SKIP)
return (0);
if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
|| s == S_IFSOCK) && devbehave == DEV_SKIP)
return (0);
}
f = grep_open(fn);
}
if (f == NULL) {
if (!sflag)
warn("%s", fn);
if (errno == ENOENT)
notfound = true;
return (0);
}
ln.file = grep_malloc(strlen(fn) + 1);
strcpy(ln.file, fn);
ln.line_no = 0;
ln.len = 0;
linesqueued = 0;
tail = 0;
ln.off = -1;
for (c = 0; c == 0 || !(lflag || qflag); ) {
ln.off += ln.len + 1;
if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) {
if (ln.line_no == 0 && matchall)
exit(0);
else
break;
}
if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
--ln.len;
ln.line_no++;
/* Return if we need to skip a binary file */
if (f->binary && binbehave == BINFILE_SKIP) {
grep_close(f);
free(f);
return (0);
}
/* Process the file line-by-line */
if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
enqueue(&ln);
linesqueued++;
}
c += t;
/* Count the matches if we have a match limit */
if (mflag) {
mcount -= t;
if (mcount <= 0)
break;
}
}
if (Bflag > 0)
clearqueue();
grep_close(f);
if (cflag) {
if (!hflag)
printf("%s:", ln.file);
printf("%u\n", c);
}
if (lflag && c != 0)
printf("%s\n", fn);
if (Lflag && c == 0)
printf("%s\n", fn);
if (c && !cflag && !lflag && !Lflag &&
binbehave == BINFILE_BIN && f->binary && !qflag)
printf(getstr(9), fn);
free(f);
return (c);
}
#define iswword(x) (iswalnum((x)) || (x) == L'_')
/*
* Processes a line comparing it with the specified patterns. Each pattern
* is looped to be compared along with the full string, saving each and every
* match, which is necessary to colorize the output and to count the
* matches. The matching lines are passed to printline() to display the
* appropriate output.
*/
static int
procline(struct str *l, int nottext)
{
regmatch_t matches[MAX_LINE_MATCHES];
regmatch_t pmatch;
size_t st = 0;
unsigned int i;
int c = 0, m = 0, r = 0;
if (!matchall) {
/* Loop to process the whole line */
while (st <= l->len) {
pmatch.rm_so = st;
pmatch.rm_eo = l->len;
/* Loop to compare with all the patterns */
for (i = 0; i < patterns; i++) {
/*
* XXX: grep_search() is a workaround for speed up and should be
* removed in the future. See fastgrep.c.
*/
if (fg_pattern[i].pattern) {
r = grep_search(&fg_pattern[i],
(unsigned char *)l->dat,
l->len, &pmatch);
r = (r == 0) ? 0 : REG_NOMATCH;
st = pmatch.rm_eo;
} else {
r = regexec(&r_pattern[i], l->dat, 1,
&pmatch, eflags);
r = (r == 0) ? 0 : REG_NOMATCH;
st = pmatch.rm_eo;
}
if (r == REG_NOMATCH)
continue;
/* Check for full match */
if (r == 0 && xflag)
if (pmatch.rm_so != 0 ||
(size_t)pmatch.rm_eo != l->len)
r = REG_NOMATCH;
/* Check for whole word match */
if (r == 0 && wflag && pmatch.rm_so != 0 &&
(size_t)pmatch.rm_eo != l->len) {
wchar_t *wbegin;
wint_t wend;
size_t size;
size = mbstowcs(NULL, l->dat,
pmatch.rm_so);
if (size == ((size_t) - 1))
r = REG_NOMATCH;
else {
wbegin = grep_malloc(size);
if (mbstowcs(wbegin, l->dat,
pmatch.rm_so) == ((size_t) - 1))
r = REG_NOMATCH;
else if (sscanf(&l->dat[pmatch.rm_eo],
"%lc", &wend) != 1)
r = REG_NOMATCH;
else if (iswword(wbegin[wcslen(wbegin)]) ||
iswword(wend))
r = REG_NOMATCH;
free(wbegin);
}
}
if (r == 0) {
if (m == 0)
c++;
if (m < MAX_LINE_MATCHES)
matches[m++] = pmatch;
/* matches - skip further patterns */
break;
}
}
if (vflag) {
c = !c;
break;
}
/* One pass if we are not recording matches */
if (!oflag && !color)
break;
if (st == (size_t)pmatch.rm_so)
break; /* No matches */
}
} else
c = !vflag;
if (c && binbehave == BINFILE_BIN && nottext)
return (c); /* Binary file */
/* Dealing with the context */
if ((tail || c) && !cflag && !qflag) {
if (c) {
if (!first && !prev && !tail && Aflag)
printf("--\n");
tail = Aflag;
if (Bflag > 0) {
if (!first && !prev)
printf("--\n");
printqueue();
}
linesqueued = 0;
printline(l, ':', matches, m);
} else {
printline(l, '-', matches, m);
tail--;
}
}
if (c) {
prev = true;
first = false;
} else
prev = false;
return (c);
}
/*
* Safe malloc() for internal use.
*/
void *
grep_malloc(size_t size)
{
void *ptr;
if ((ptr = malloc(size)) == NULL)
err(2, "malloc");
return (ptr);
}
/*
* Safe calloc() for internal use.
*/
void *
grep_calloc(size_t nmemb, size_t size)
{
void *ptr;
if ((ptr = calloc(nmemb, size)) == NULL)
err(2, "calloc");
return (ptr);
}
/*
* Safe realloc() for internal use.
*/
void *
grep_realloc(void *ptr, size_t size)
{
if ((ptr = realloc(ptr, size)) == NULL)
err(2, "realloc");
return (ptr);
}
/*
* Prints a matching line according to the command line options.
*/
void
printline(struct str *line, int sep, regmatch_t *matches, int m)
{
size_t a = 0;
int i, n = 0;
if (!hflag) {
if (nullflag == 0)
fputs(line->file, stdout);
else {
printf("%s", line->file);
putchar(0);
}
++n;
}
if (nflag) {
if (n > 0)
putchar(sep);
printf("%d", line->line_no);
++n;
}
if (bflag) {
if (n > 0)
putchar(sep);
printf("%lld", (long long)line->off);
++n;
}
if (n)
putchar(sep);
/* --color and -o */
if ((oflag || color) && m > 0) {
for (i = 0; i < m; i++) {
if (!oflag)
fwrite(line->dat + a, matches[i].rm_so - a, 1,
stdout);
if (color)
fprintf(stdout, "\33[%sm\33[K", color);
fwrite(line->dat + matches[i].rm_so,
matches[i].rm_eo - matches[i].rm_so, 1,
stdout);
if (color)
fprintf(stdout, "\33[m\33[K");
a = matches[i].rm_eo;
if (oflag)
putchar('\n');
}
if (!oflag) {
if (line->len - a > 0)
fwrite(line->dat + a, line->len - a, 1, stdout);
putchar('\n');
}
} else {
fwrite(line->dat, line->len, 1, stdout);
putchar('\n');
}
}