Back out the botched attempt to update to gnu grep 2.3 (lots of history
was lost). Restore original version to try and avoid breaking the build while David O'brien does a proper set of imports and merges. Requested by: obrien
This commit is contained in:
parent
ed03d56d4b
commit
74515e9b09
@ -1,15 +1,15 @@
|
||||
# $FreeBSD$
|
||||
|
||||
GREP_LIBZ=YES
|
||||
MAINTAINER= wosch
|
||||
|
||||
GREPDIR=${.CURDIR}/../../../contrib/grep
|
||||
|
||||
.PATH: ${GREPDIR}/src ${GREPDIR}/doc
|
||||
GREP_LIBZ= YES
|
||||
GREP_FTS= YES
|
||||
|
||||
PROG= grep
|
||||
SRCS= dfa.c getopt.c getopt1.c grep.c kwset.c obstack.c \
|
||||
savedir.c search.c stpcpy.c
|
||||
CFLAGS+=-I${.CURDIR} -DHAVE_CONFIG_H
|
||||
SRCS= dfa.c grep.c getopt.c kwset.c obstack.c search.c
|
||||
CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \
|
||||
-DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 \
|
||||
-DHAVE_VALLOC=1 -DHAVE_WORKING_MMAP=1
|
||||
|
||||
LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \
|
||||
${BINDIR}/grep ${BINDIR}/fgrep
|
||||
@ -21,34 +21,17 @@ LDADD+= -lgnuregex
|
||||
.if defined(GREP_LIBZ) && !empty(GREP_LIBZ)
|
||||
LDADD+= -lz
|
||||
DPADD+= ${LIBZ}
|
||||
CFLAGS+=-DHAVE_LIBZ=1
|
||||
CFLAGS+= -DHAVE_LIBZ=1
|
||||
LINKS+= ${BINDIR}/grep ${BINDIR}/zgrep \
|
||||
${BINDIR}/grep ${BINDIR}/zegrep \
|
||||
${BINDIR}/grep ${BINDIR}/zfgrep
|
||||
MLINKS+=grep.1 zgrep.1 grep.1 zegrep.1 grep.1 zfgrep.1
|
||||
${BINDIR}/grep ${BINDIR}/zfgrep
|
||||
MLINKS+= grep.1 zgrep.1 grep.1 zegrep.1 grep.1 zfgrep.1
|
||||
.endif
|
||||
.if defined(GREP_FTS) && !empty(GREP_FTS)
|
||||
CFLAGS+= -DHAVE_FTS=1
|
||||
.endif
|
||||
|
||||
SUBDIR+=doc
|
||||
|
||||
check: all
|
||||
@failed=0; total=0; \
|
||||
for tst in ${TESTS}; do \
|
||||
total=$$(($$total+1)); \
|
||||
if GREP=${.OBJDIR}/${PROG} srcdir=${GREPDIR}/tests \
|
||||
${GREPDIR}/tests/$$tst; then \
|
||||
echo "PASS: $$tst"; \
|
||||
else \
|
||||
failed=$$(($$failed+1)); \
|
||||
echo "FAIL: $$tst"; \
|
||||
fi; \
|
||||
done; \
|
||||
if [ "$$failed" -eq 0 ]; then \
|
||||
echo "All $$total tests passed"; \
|
||||
else \
|
||||
echo "$$failed of $$total tests failed"; \
|
||||
fi
|
||||
|
||||
TESTS= warning.sh khadafy.sh spencer1.sh bre.sh ere.sh \
|
||||
status.sh empty.sh options.sh
|
||||
sh ${.CURDIR}/tests/check.sh ${.CURDIR}/tests
|
||||
|
||||
.include <bsd.prog.mk>
|
||||
|
15
gnu/usr.bin/grep/PROJECTS
Normal file
15
gnu/usr.bin/grep/PROJECTS
Normal file
@ -0,0 +1,15 @@
|
||||
Write Texinfo documentation for grep. The manual page would be a good
|
||||
place to start, but Info documents are also supposed to contain a
|
||||
tutorial and examples.
|
||||
|
||||
Fix the DFA matcher to never use exponential space. (Fortunately, these
|
||||
cases are rare.)
|
||||
|
||||
Improve the performance of the regex backtracking matcher. This matcher
|
||||
is agonizingly slow, and is responsible for grep sometimes being slower
|
||||
than Unix grep when backreferences are used.
|
||||
|
||||
Provide support for the Posix [= =] and [. .] constructs. This is
|
||||
difficult because it requires locale-dependent details of the character
|
||||
set and collating sequence, but Posix does not standardize any method
|
||||
for accessing this information!
|
28
gnu/usr.bin/grep/README
Normal file
28
gnu/usr.bin/grep/README
Normal file
@ -0,0 +1,28 @@
|
||||
This is GNU grep 2.0, the "fastest grep in the west" (we hope). All
|
||||
bugs reported in previous releases have been fixed. Many exciting new
|
||||
bugs have probably been introduced in this major revision.
|
||||
|
||||
GNU grep is provided "as is" with no warranty. The exact terms
|
||||
under which you may use and (re)distribute this program are detailed
|
||||
in the GNU General Public License, in the file COPYING.
|
||||
|
||||
GNU grep is based on a fast lazy-state deterministic matcher (about
|
||||
twice as fast as stock Unix egrep) hybridized with a Boyer-Moore-Gosper
|
||||
search for a fixed string that eliminates impossible text from being
|
||||
considered by the full regexp matcher without necessarily having to
|
||||
look at every character. The result is typically many times faster
|
||||
than Unix grep or egrep. (Regular expressions containing backreferencing
|
||||
will run more slowly, however.)
|
||||
|
||||
See the file AUTHORS for a list of authors and other contributors.
|
||||
|
||||
See the file INSTALL for compilation and installation instructions.
|
||||
|
||||
See the file MANIFEST for a list of files in this distribution.
|
||||
|
||||
See the file NEWS for a description of major changes in this release.
|
||||
|
||||
See the file PROJECTS if you want to be mentioned in AUTHORS.
|
||||
|
||||
Send bug reports to bug-gnu-utils@prep.ai.mit.edu. Be sure to
|
||||
include the word "grep" in your Subject: header field.
|
@ -1,188 +0,0 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
/* config.h. Generated automatically by configure. */
|
||||
/* config.hin. Generated automatically from configure.in by autoheader. */
|
||||
|
||||
/* Define if using alloca.c. */
|
||||
/* #undef C_ALLOCA */
|
||||
|
||||
/* Define if the closedir function returns void instead of int. */
|
||||
/* #undef CLOSEDIR_VOID */
|
||||
|
||||
/* Define to empty if the keyword does not work. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
|
||||
This function is required for alloca.c support on those systems. */
|
||||
/* #undef CRAY_STACKSEG_END */
|
||||
|
||||
/* Define if you have alloca, as a function or macro. */
|
||||
#define HAVE_ALLOCA 1
|
||||
|
||||
/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
|
||||
/* #undef HAVE_ALLOCA_H */
|
||||
|
||||
/* Define if you have a working `mmap' system call. */
|
||||
#define HAVE_MMAP 1
|
||||
|
||||
/* Define as __inline if that's what the C compiler calls it. */
|
||||
/* #undef inline */
|
||||
|
||||
/* Define to `long' if <sys/types.h> doesn't define. */
|
||||
/* #undef off_t */
|
||||
|
||||
/* Define to `unsigned' if <sys/types.h> doesn't define. */
|
||||
/* #undef size_t */
|
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be
|
||||
automatically deduced at run-time.
|
||||
STACK_DIRECTION > 0 => grows toward higher addresses
|
||||
STACK_DIRECTION < 0 => grows toward lower addresses
|
||||
STACK_DIRECTION = 0 => direction of growth unknown
|
||||
*/
|
||||
/* #undef STACK_DIRECTION */
|
||||
|
||||
/* Define if the `S_IS*' macros in <sys/stat.h> do not work properly. */
|
||||
/* #undef STAT_MACROS_BROKEN */
|
||||
|
||||
/* Define if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Define to use grep's error-checking malloc in the kwset routines. */
|
||||
#define GREP 1
|
||||
|
||||
/* Package name. */
|
||||
#define PACKAGE "grep"
|
||||
|
||||
/* Version number. */
|
||||
#define VERSION "2.3"
|
||||
|
||||
/* Hack for Visual C++ suggested by irox. */
|
||||
/* #undef alloca */
|
||||
|
||||
/* #undef HAVE_STPCPY */
|
||||
|
||||
/* #undef ENABLE_NLS */
|
||||
|
||||
/* #undef HAVE_CATGETS */
|
||||
|
||||
/* #undef HAVE_GETTEXT */
|
||||
|
||||
#define HAVE_LC_MESSAGES 1
|
||||
|
||||
/*
|
||||
* DOS specific
|
||||
*/
|
||||
/* #undef HAVE_DOS_FILE_NAMES */
|
||||
|
||||
/* Define if you have the __argz_count function. */
|
||||
/* #undef HAVE___ARGZ_COUNT */
|
||||
|
||||
/* Define if you have the __argz_next function. */
|
||||
/* #undef HAVE___ARGZ_NEXT */
|
||||
|
||||
/* Define if you have the __argz_stringify function. */
|
||||
/* #undef HAVE___ARGZ_STRINGIFY */
|
||||
|
||||
/* Define if you have the btowc function. */
|
||||
/* #undef HAVE_BTOWC */
|
||||
|
||||
/* Define if you have the dcgettext function. */
|
||||
/* #undef HAVE_DCGETTEXT */
|
||||
|
||||
/* Define if you have the getcwd function. */
|
||||
#define HAVE_GETCWD 1
|
||||
|
||||
/* Define if you have the getpagesize function. */
|
||||
#define HAVE_GETPAGESIZE 1
|
||||
|
||||
/* Define if you have the isascii function. */
|
||||
#define HAVE_ISASCII 1
|
||||
|
||||
/* Define if you have the memchr function. */
|
||||
#define HAVE_MEMCHR 1
|
||||
|
||||
/* Define if you have the munmap function. */
|
||||
#define HAVE_MUNMAP 1
|
||||
|
||||
/* Define if you have the putenv function. */
|
||||
#define HAVE_PUTENV 1
|
||||
|
||||
/* Define if you have the setenv function. */
|
||||
#define HAVE_SETENV 1
|
||||
|
||||
/* Define if you have the setlocale function. */
|
||||
#define HAVE_SETLOCALE 1
|
||||
|
||||
/* Define if you have the setmode function. */
|
||||
#define HAVE_SETMODE 1
|
||||
|
||||
/* Define if you have the stpcpy function. */
|
||||
/* #undef HAVE_STPCPY */
|
||||
|
||||
/* Define if you have the strcasecmp function. */
|
||||
#define HAVE_STRCASECMP 1
|
||||
|
||||
/* Define if you have the strchr function. */
|
||||
#define HAVE_STRCHR 1
|
||||
|
||||
/* Define if you have the strdup function. */
|
||||
#define HAVE_STRDUP 1
|
||||
|
||||
/* Define if you have the strerror function. */
|
||||
#define HAVE_STRERROR 1
|
||||
|
||||
/* Define if you have the <argz.h> header file. */
|
||||
/* #undef HAVE_ARGZ_H */
|
||||
|
||||
/* Define if you have the <dirent.h> header file. */
|
||||
#define HAVE_DIRENT_H 1
|
||||
|
||||
/* Define if you have the <libintl.h> header file. */
|
||||
/* #undef HAVE_LIBINTL_H */
|
||||
|
||||
/* Define if you have the <limits.h> header file. */
|
||||
#define HAVE_LIMITS_H 1
|
||||
|
||||
/* Define if you have the <locale.h> header file. */
|
||||
#define HAVE_LOCALE_H 1
|
||||
|
||||
/* Define if you have the <malloc.h> header file. */
|
||||
/* #undef HAVE_MALLOC_H */
|
||||
|
||||
/* Define if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define if you have the <ndir.h> header file. */
|
||||
/* #undef HAVE_NDIR_H */
|
||||
|
||||
/* Define if you have the <nl_types.h> header file. */
|
||||
#define HAVE_NL_TYPES_H 1
|
||||
|
||||
/* Define if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define if you have the <sys/dir.h> header file. */
|
||||
/* #undef HAVE_SYS_DIR_H */
|
||||
|
||||
/* Define if you have the <sys/ndir.h> header file. */
|
||||
/* #undef HAVE_SYS_NDIR_H */
|
||||
|
||||
/* Define if you have the <sys/param.h> header file. */
|
||||
#define HAVE_SYS_PARAM_H 1
|
||||
|
||||
/* Define if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define if you have the <wchar.h> header file. */
|
||||
/* #undef HAVE_WCHAR_H */
|
||||
|
||||
/* Define if you have the <wctype.h> header file. */
|
||||
/* #undef HAVE_WCTYPE_H */
|
||||
|
||||
/* Define if you have the i library (-li). */
|
||||
/* #undef HAVE_LIBI */
|
2550
gnu/usr.bin/grep/dfa.c
Normal file
2550
gnu/usr.bin/grep/dfa.c
Normal file
File diff suppressed because it is too large
Load Diff
360
gnu/usr.bin/grep/dfa.h
Normal file
360
gnu/usr.bin/grep/dfa.h
Normal file
@ -0,0 +1,360 @@
|
||||
/* dfa.h - declarations for GNU deterministic regexp compiler
|
||||
Copyright (C) 1988 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/* Written June, 1988 by Mike Haertel */
|
||||
|
||||
/* FIXME:
|
||||
2. We should not export so much of the DFA internals.
|
||||
In addition to clobbering modularity, we eat up valuable
|
||||
name space. */
|
||||
|
||||
/* Number of bits in an unsigned char. */
|
||||
#define CHARBITS 8
|
||||
|
||||
/* First integer value that is greater than any character code. */
|
||||
#define NOTCHAR (1 << CHARBITS)
|
||||
|
||||
/* INTBITS need not be exact, just a lower bound. */
|
||||
#define INTBITS (CHARBITS * sizeof (int))
|
||||
|
||||
/* Number of ints required to hold a bit for every character. */
|
||||
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
|
||||
|
||||
/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
|
||||
typedef int charclass[CHARCLASS_INTS];
|
||||
|
||||
/* The regexp is parsed into an array of tokens in postfix form. Some tokens
|
||||
are operators and others are terminal symbols. Most (but not all) of these
|
||||
codes are returned by the lexical analyzer. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
END = -1, /* END is a terminal symbol that matches the
|
||||
end of input; any value of END or less in
|
||||
the parse tree is such a symbol. Accepting
|
||||
states of the DFA are those that would have
|
||||
a transition on END. */
|
||||
|
||||
/* Ordinary character values are terminal symbols that match themselves. */
|
||||
|
||||
EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
|
||||
the empty string. */
|
||||
|
||||
BACKREF, /* BACKREF is generated by \<digit>; it
|
||||
it not completely handled. If the scanner
|
||||
detects a transition on backref, it returns
|
||||
a kind of "semi-success" indicating that
|
||||
the match will have to be verified with
|
||||
a backtracking matcher. */
|
||||
|
||||
BEGLINE, /* BEGLINE is a terminal symbol that matches
|
||||
the empty string if it is at the beginning
|
||||
of a line. */
|
||||
|
||||
ENDLINE, /* ENDLINE is a terminal symbol that matches
|
||||
the empty string if it is at the end of
|
||||
a line. */
|
||||
|
||||
BEGWORD, /* BEGWORD is a terminal symbol that matches
|
||||
the empty string if it is at the beginning
|
||||
of a word. */
|
||||
|
||||
ENDWORD, /* ENDWORD is a terminal symbol that matches
|
||||
the empty string if it is at the end of
|
||||
a word. */
|
||||
|
||||
LIMWORD, /* LIMWORD is a terminal symbol that matches
|
||||
the empty string if it is at the beginning
|
||||
or the end of a word. */
|
||||
|
||||
NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
|
||||
matches the empty string if it is not at
|
||||
the beginning or end of a word. */
|
||||
|
||||
QMARK, /* QMARK is an operator of one argument that
|
||||
matches zero or one occurences of its
|
||||
argument. */
|
||||
|
||||
STAR, /* STAR is an operator of one argument that
|
||||
matches the Kleene closure (zero or more
|
||||
occurrences) of its argument. */
|
||||
|
||||
PLUS, /* PLUS is an operator of one argument that
|
||||
matches the positive closure (one or more
|
||||
occurrences) of its argument. */
|
||||
|
||||
REPMN, /* REPMN is a lexical token corresponding
|
||||
to the {m,n} construct. REPMN never
|
||||
appears in the compiled token vector. */
|
||||
|
||||
CAT, /* CAT is an operator of two arguments that
|
||||
matches the concatenation of its
|
||||
arguments. CAT is never returned by the
|
||||
lexical analyzer. */
|
||||
|
||||
OR, /* OR is an operator of two arguments that
|
||||
matches either of its arguments. */
|
||||
|
||||
ORTOP, /* OR at the toplevel in the parse tree.
|
||||
This is used for a boyer-moore heuristic. */
|
||||
|
||||
LPAREN, /* LPAREN never appears in the parse tree,
|
||||
it is only a lexeme. */
|
||||
|
||||
RPAREN, /* RPAREN never appears in the parse tree. */
|
||||
|
||||
CSET /* CSET and (and any value greater) is a
|
||||
terminal symbol that matches any of a
|
||||
class of characters. */
|
||||
} token;
|
||||
|
||||
/* Sets are stored in an array in the compiled dfa; the index of the
|
||||
array corresponding to a given set token is given by SET_INDEX(t). */
|
||||
#define SET_INDEX(t) ((t) - CSET)
|
||||
|
||||
/* Sometimes characters can only be matched depending on the surrounding
|
||||
context. Such context decisions depend on what the previous character
|
||||
was, and the value of the current (lookahead) character. Context
|
||||
dependent constraints are encoded as 8 bit integers. Each bit that
|
||||
is set indicates that the constraint succeeds in the corresponding
|
||||
context.
|
||||
|
||||
bit 7 - previous and current are newlines
|
||||
bit 6 - previous was newline, current isn't
|
||||
bit 5 - previous wasn't newline, current is
|
||||
bit 4 - neither previous nor current is a newline
|
||||
bit 3 - previous and current are word-constituents
|
||||
bit 2 - previous was word-constituent, current isn't
|
||||
bit 1 - previous wasn't word-constituent, current is
|
||||
bit 0 - neither previous nor current is word-constituent
|
||||
|
||||
Word-constituent characters are those that satisfy isalnum().
|
||||
|
||||
The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint
|
||||
succeeds in a particular context. Prevn is true if the previous character
|
||||
was a newline, currn is true if the lookahead character is a newline.
|
||||
Prevl and currl similarly depend upon whether the previous and current
|
||||
characters are word-constituent letters. */
|
||||
#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
||||
((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4))
|
||||
#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
|
||||
((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0)))
|
||||
#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
|
||||
(MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
|
||||
&& MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
|
||||
|
||||
/* The following macros give information about what a constraint depends on. */
|
||||
#define PREV_NEWLINE_DEPENDENT(constraint) \
|
||||
(((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
|
||||
#define PREV_LETTER_DEPENDENT(constraint) \
|
||||
(((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
|
||||
|
||||
/* Tokens that match the empty string subject to some constraint actually
|
||||
work by applying that constraint to determine what may follow them,
|
||||
taking into account what has gone before. The following values are
|
||||
the constraints corresponding to the special tokens previously defined. */
|
||||
#define NO_CONSTRAINT 0xff
|
||||
#define BEGLINE_CONSTRAINT 0xcf
|
||||
#define ENDLINE_CONSTRAINT 0xaf
|
||||
#define BEGWORD_CONSTRAINT 0xf2
|
||||
#define ENDWORD_CONSTRAINT 0xf4
|
||||
#define LIMWORD_CONSTRAINT 0xf6
|
||||
#define NOTLIMWORD_CONSTRAINT 0xf9
|
||||
|
||||
/* States of the recognizer correspond to sets of positions in the parse
|
||||
tree, together with the constraints under which they may be matched.
|
||||
So a position is encoded as an index into the parse tree together with
|
||||
a constraint. */
|
||||
typedef struct
|
||||
{
|
||||
unsigned index; /* Index into the parse array. */
|
||||
unsigned constraint; /* Constraint for matching this position. */
|
||||
} position;
|
||||
|
||||
/* Sets of positions are stored as arrays. */
|
||||
typedef struct
|
||||
{
|
||||
position *elems; /* Elements of this position set. */
|
||||
int nelem; /* Number of elements in this set. */
|
||||
} position_set;
|
||||
|
||||
/* A state of the dfa consists of a set of positions, some flags,
|
||||
and the token value of the lowest-numbered position of the state that
|
||||
contains an END token. */
|
||||
typedef struct
|
||||
{
|
||||
int hash; /* Hash of the positions of this state. */
|
||||
position_set elems; /* Positions this state could match. */
|
||||
char newline; /* True if previous state matched newline. */
|
||||
char letter; /* True if previous state matched a letter. */
|
||||
char backref; /* True if this state matches a \<digit>. */
|
||||
unsigned char constraint; /* Constraint for this state to accept. */
|
||||
int first_end; /* Token value of the first END in elems. */
|
||||
} dfa_state;
|
||||
|
||||
/* Element of a list of strings, at least one of which is known to
|
||||
appear in any R.E. matching the DFA. */
|
||||
struct dfamust
|
||||
{
|
||||
int exact;
|
||||
char *must;
|
||||
struct dfamust *next;
|
||||
};
|
||||
|
||||
/* A compiled regular expression. */
|
||||
struct dfa
|
||||
{
|
||||
/* Stuff built by the scanner. */
|
||||
charclass *charclasses; /* Array of character sets for CSET tokens. */
|
||||
int cindex; /* Index for adding new charclasses. */
|
||||
int calloc; /* Number of charclasses currently allocated. */
|
||||
|
||||
/* Stuff built by the parser. */
|
||||
token *tokens; /* Postfix parse array. */
|
||||
int tindex; /* Index for adding new tokens. */
|
||||
int talloc; /* Number of tokens currently allocated. */
|
||||
int depth; /* Depth required of an evaluation stack
|
||||
used for depth-first traversal of the
|
||||
parse tree. */
|
||||
int nleaves; /* Number of leaves on the parse tree. */
|
||||
int nregexps; /* Count of parallel regexps being built
|
||||
with dfaparse(). */
|
||||
|
||||
/* Stuff owned by the state builder. */
|
||||
dfa_state *states; /* States of the dfa. */
|
||||
int sindex; /* Index for adding new states. */
|
||||
int salloc; /* Number of states currently allocated. */
|
||||
|
||||
/* Stuff built by the structure analyzer. */
|
||||
position_set *follows; /* Array of follow sets, indexed by position
|
||||
index. The follow of a position is the set
|
||||
of positions containing characters that
|
||||
could conceivably follow a character
|
||||
matching the given position in a string
|
||||
matching the regexp. Allocated to the
|
||||
maximum possible position index. */
|
||||
int searchflag; /* True if we are supposed to build a searching
|
||||
as opposed to an exact matcher. A searching
|
||||
matcher finds the first and shortest string
|
||||
matching a regexp anywhere in the buffer,
|
||||
whereas an exact matcher finds the longest
|
||||
string matching, but anchored to the
|
||||
beginning of the buffer. */
|
||||
|
||||
/* Stuff owned by the executor. */
|
||||
int tralloc; /* Number of transition tables that have
|
||||
slots so far. */
|
||||
int trcount; /* Number of transition tables that have
|
||||
actually been built. */
|
||||
int **trans; /* Transition tables for states that can
|
||||
never accept. If the transitions for a
|
||||
state have not yet been computed, or the
|
||||
state could possibly accept, its entry in
|
||||
this table is NULL. */
|
||||
int **realtrans; /* Trans always points to realtrans + 1; this
|
||||
is so trans[-1] can contain NULL. */
|
||||
int **fails; /* Transition tables after failing to accept
|
||||
on a state that potentially could do so. */
|
||||
int *success; /* Table of acceptance conditions used in
|
||||
dfaexec and computed in build_state. */
|
||||
int *newlines; /* Transitions on newlines. The entry for a
|
||||
newline in any transition table is always
|
||||
-1 so we can count lines without wasting
|
||||
too many cycles. The transition for a
|
||||
newline is stored separately and handled
|
||||
as a special case. Newline is also used
|
||||
as a sentinel at the end of the buffer. */
|
||||
struct dfamust *musts; /* List of strings, at least one of which
|
||||
is known to appear in any r.e. matching
|
||||
the dfa. */
|
||||
};
|
||||
|
||||
/* Some macros for user access to dfa internals. */
|
||||
|
||||
/* ACCEPTING returns true if s could possibly be an accepting state of r. */
|
||||
#define ACCEPTING(s, r) ((r).states[s].constraint)
|
||||
|
||||
/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
|
||||
specified context. */
|
||||
#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \
|
||||
SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \
|
||||
prevn, currn, prevl, currl)
|
||||
|
||||
/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
|
||||
regexps that a given state could accept. Parallel regexps are numbered
|
||||
starting at 1. */
|
||||
#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end)
|
||||
|
||||
/* Entry points. */
|
||||
|
||||
#if __STDC__
|
||||
|
||||
/* dfasyntax() takes two arguments; the first sets the syntax bits described
|
||||
earlier in this file, and the second sets the case-folding flag. */
|
||||
extern void dfasyntax(int, int);
|
||||
|
||||
/* Compile the given string of the given length into the given struct dfa.
|
||||
Final argument is a flag specifying whether to build a searching or an
|
||||
exact matcher. */
|
||||
extern void dfacomp(char *, size_t, struct dfa *, int);
|
||||
|
||||
/* Execute the given struct dfa on the buffer of characters. The
|
||||
first char * points to the beginning, and the second points to the
|
||||
first character after the end of the buffer, which must be a writable
|
||||
place so a sentinel end-of-buffer marker can be stored there. The
|
||||
second-to-last argument is a flag telling whether to allow newlines to
|
||||
be part of a string matching the regexp. The next-to-last argument,
|
||||
if non-NULL, points to a place to increment every time we see a
|
||||
newline. The final argument, if non-NULL, points to a flag that will
|
||||
be set if further examination by a backtracking matcher is needed in
|
||||
order to verify backreferencing; otherwise the flag will be cleared.
|
||||
Returns NULL if no match is found, or a pointer to the first
|
||||
character after the first & shortest matching string in the buffer. */
|
||||
extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *);
|
||||
|
||||
/* Free the storage held by the components of a struct dfa. */
|
||||
extern void dfafree(struct dfa *);
|
||||
|
||||
/* Entry points for people who know what they're doing. */
|
||||
|
||||
/* Initialize the components of a struct dfa. */
|
||||
extern void dfainit(struct dfa *);
|
||||
|
||||
/* Incrementally parse a string of given length into a struct dfa. */
|
||||
extern void dfaparse(char *, size_t, struct dfa *);
|
||||
|
||||
/* Analyze a parsed regexp; second argument tells whether to build a searching
|
||||
or an exact matcher. */
|
||||
extern void dfaanalyze(struct dfa *, int);
|
||||
|
||||
/* Compute, for each possible character, the transitions out of a given
|
||||
state, storing them in an array of integers. */
|
||||
extern void dfastate(int, struct dfa *, int []);
|
||||
|
||||
/* Error handling. */
|
||||
|
||||
/* dfaerror() is called by the regexp routines whenever an error occurs. It
|
||||
takes a single argument, a NUL-terminated string describing the error.
|
||||
The default dfaerror() prints the error message to stderr and exits.
|
||||
The user can provide a different dfafree() if so desired. */
|
||||
extern void dfaerror(char *);
|
||||
|
||||
#else /* ! __STDC__ */
|
||||
extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse();
|
||||
extern void dfaanalyze(), dfastate(), dfaerror();
|
||||
extern char *dfaexec();
|
||||
#endif /* ! __STDC__ */
|
@ -1,11 +0,0 @@
|
||||
# $FreeBSD$
|
||||
|
||||
GREPDIR=${.CURDIR}/../../../../contrib/grep
|
||||
|
||||
.PATH: ${GREPDIR}/doc
|
||||
|
||||
INFO= grep
|
||||
INFOSECTION= "System Utilities"
|
||||
MAKEINFOFLAGS+= -I ${GREPDIR}/doc
|
||||
|
||||
.include <bsd.info.mk>
|
731
gnu/usr.bin/grep/getopt.c
Normal file
731
gnu/usr.bin/grep/getopt.c
Normal file
@ -0,0 +1,731 @@
|
||||
/* Getopt for GNU.
|
||||
NOTE: getopt is now part of the C library, so if you don't know what
|
||||
"Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
|
||||
before changing it!
|
||||
|
||||
Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/* NOTE!!! AIX requires this to be the first thing in the file.
|
||||
Do not put ANYTHING before it! */
|
||||
#if !defined (__GNUC__) && defined (_AIX)
|
||||
#pragma alloca
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define alloca __builtin_alloca
|
||||
#else /* not __GNUC__ */
|
||||
#if defined (HAVE_ALLOCA_H) || (defined(sparc) && (defined(sun) || (!defined(USG) && !defined(SVR4) && !defined(__svr4__))))
|
||||
#include <alloca.h>
|
||||
#else
|
||||
#ifndef _AIX
|
||||
char *alloca ();
|
||||
#endif
|
||||
#endif /* alloca.h */
|
||||
#endif /* not __GNUC__ */
|
||||
|
||||
#if !__STDC__ && !defined(const) && IN_GCC
|
||||
#define const
|
||||
#endif
|
||||
|
||||
/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>. */
|
||||
#ifndef _NO_PROTO
|
||||
#define _NO_PROTO
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/* Comment out all this code if we are using the GNU C Library, and are not
|
||||
actually compiling the library itself. This code is part of the GNU C
|
||||
Library, but also included in many other GNU distributions. Compiling
|
||||
and linking in this code is a waste when using the GNU C library
|
||||
(especially if it is a shared library). Rather than having every GNU
|
||||
program understand `configure --with-gnu-libc' and omit the object files,
|
||||
it is simpler to just do this in the source for each such file. */
|
||||
|
||||
#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
|
||||
|
||||
|
||||
/* This needs to come after some library #include
|
||||
to get __GNU_LIBRARY__ defined. */
|
||||
#ifdef __GNU_LIBRARY__
|
||||
#undef alloca
|
||||
/* Don't include stdlib.h for non-GNU C libraries because some of them
|
||||
contain conflicting prototypes for getopt. */
|
||||
#include <stdlib.h>
|
||||
#else /* Not GNU C library. */
|
||||
#define __alloca alloca
|
||||
#endif /* GNU C library. */
|
||||
|
||||
/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
|
||||
long-named option. Because this is not POSIX.2 compliant, it is
|
||||
being phased out. */
|
||||
/* #define GETOPT_COMPAT */
|
||||
|
||||
/* This version of `getopt' appears to the caller like standard Unix `getopt'
|
||||
but it behaves differently for the user, since it allows the user
|
||||
to intersperse the options with the other arguments.
|
||||
|
||||
As `getopt' works, it permutes the elements of ARGV so that,
|
||||
when it is done, all the options precede everything else. Thus
|
||||
all application programs are extended to handle flexible argument order.
|
||||
|
||||
Setting the environment variable POSIXLY_CORRECT disables permutation.
|
||||
Then the behavior is completely standard.
|
||||
|
||||
GNU application programs can use a third alternative mode in which
|
||||
they can distinguish the relative order of options and other arguments. */
|
||||
|
||||
#include "getopt.h"
|
||||
|
||||
/* For communication from `getopt' to the caller.
|
||||
When `getopt' finds an option that takes an argument,
|
||||
the argument value is returned here.
|
||||
Also, when `ordering' is RETURN_IN_ORDER,
|
||||
each non-option ARGV-element is returned here. */
|
||||
|
||||
char *optarg = 0;
|
||||
|
||||
/* Index in ARGV of the next element to be scanned.
|
||||
This is used for communication to and from the caller
|
||||
and for communication between successive calls to `getopt'.
|
||||
|
||||
On entry to `getopt', zero means this is the first call; initialize.
|
||||
|
||||
When `getopt' returns EOF, this is the index of the first of the
|
||||
non-option elements that the caller should itself scan.
|
||||
|
||||
Otherwise, `optind' communicates from one call to the next
|
||||
how much of ARGV has been scanned so far. */
|
||||
|
||||
/* XXX 1003.2 says this must be 1 before any call. */
|
||||
int optind = 0;
|
||||
|
||||
/* The next char to be scanned in the option-element
|
||||
in which the last option character we returned was found.
|
||||
This allows us to pick up the scan where we left off.
|
||||
|
||||
If this is zero, or a null string, it means resume the scan
|
||||
by advancing to the next ARGV-element. */
|
||||
|
||||
static char *nextchar;
|
||||
|
||||
/* Callers store zero here to inhibit the error message
|
||||
for unrecognized options. */
|
||||
|
||||
int opterr = 1;
|
||||
|
||||
/* Set to an option character which was unrecognized.
|
||||
This must be initialized on some systems to avoid linking in the
|
||||
system's own getopt implementation. */
|
||||
|
||||
int optopt = '?';
|
||||
|
||||
/* Describe how to deal with options that follow non-option ARGV-elements.
|
||||
|
||||
If the caller did not specify anything,
|
||||
the default is REQUIRE_ORDER if the environment variable
|
||||
POSIXLY_CORRECT is defined, PERMUTE otherwise.
|
||||
|
||||
REQUIRE_ORDER means don't recognize them as options;
|
||||
stop option processing when the first non-option is seen.
|
||||
This is what Unix does.
|
||||
This mode of operation is selected by either setting the environment
|
||||
variable POSIXLY_CORRECT, or using `+' as the first character
|
||||
of the list of option characters.
|
||||
|
||||
PERMUTE is the default. We permute the contents of ARGV as we scan,
|
||||
so that eventually all the non-options are at the end. This allows options
|
||||
to be given in any order, even with programs that were not written to
|
||||
expect this.
|
||||
|
||||
RETURN_IN_ORDER is an option available to programs that were written
|
||||
to expect options and other ARGV-elements in any order and that care about
|
||||
the ordering of the two. We describe each non-option ARGV-element
|
||||
as if it were the argument of an option with character code 1.
|
||||
Using `-' as the first character of the list of option characters
|
||||
selects this mode of operation.
|
||||
|
||||
The special argument `--' forces an end of option-scanning regardless
|
||||
of the value of `ordering'. In the case of RETURN_IN_ORDER, only
|
||||
`--' can cause `getopt' to return EOF with `optind' != ARGC. */
|
||||
|
||||
static enum
|
||||
{
|
||||
REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
|
||||
} ordering;
|
||||
|
||||
#ifdef __GNU_LIBRARY__
|
||||
/* We want to avoid inclusion of string.h with non-GNU libraries
|
||||
because there are many ways it can cause trouble.
|
||||
On some systems, it contains special magic macros that don't work
|
||||
in GCC. */
|
||||
#include <string.h>
|
||||
#define my_index strchr
|
||||
#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n))
|
||||
#else
|
||||
|
||||
/* Avoid depending on library functions or files
|
||||
whose names are inconsistent. */
|
||||
|
||||
char *getenv ();
|
||||
|
||||
static char *
|
||||
my_index (str, chr)
|
||||
const char *str;
|
||||
int chr;
|
||||
{
|
||||
while (*str)
|
||||
{
|
||||
if (*str == chr)
|
||||
return (char *) str;
|
||||
str++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
my_bcopy (from, to, size)
|
||||
const char *from;
|
||||
char *to;
|
||||
int size;
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < size; i++)
|
||||
to[i] = from[i];
|
||||
}
|
||||
#endif /* GNU C library. */
|
||||
|
||||
/* Handle permutation of arguments. */
|
||||
|
||||
/* Describe the part of ARGV that contains non-options that have
|
||||
been skipped. `first_nonopt' is the index in ARGV of the first of them;
|
||||
`last_nonopt' is the index after the last of them. */
|
||||
|
||||
static int first_nonopt;
|
||||
static int last_nonopt;
|
||||
|
||||
/* Exchange two adjacent subsequences of ARGV.
|
||||
One subsequence is elements [first_nonopt,last_nonopt)
|
||||
which contains all the non-options that have been skipped so far.
|
||||
The other is elements [last_nonopt,optind), which contains all
|
||||
the options processed since those non-options were skipped.
|
||||
|
||||
`first_nonopt' and `last_nonopt' are relocated so that they describe
|
||||
the new indices of the non-options in ARGV after they are moved. */
|
||||
|
||||
static void
|
||||
exchange (argv)
|
||||
char **argv;
|
||||
{
|
||||
int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
|
||||
char **temp = (char **) __alloca (nonopts_size);
|
||||
|
||||
/* Interchange the two blocks of data in ARGV. */
|
||||
|
||||
my_bcopy ((char *) &argv[first_nonopt], (char *) temp, nonopts_size);
|
||||
my_bcopy ((char *) &argv[last_nonopt], (char *) &argv[first_nonopt],
|
||||
(optind - last_nonopt) * sizeof (char *));
|
||||
my_bcopy ((char *) temp,
|
||||
(char *) &argv[first_nonopt + optind - last_nonopt],
|
||||
nonopts_size);
|
||||
|
||||
/* Update records for the slots the non-options now occupy. */
|
||||
|
||||
first_nonopt += (optind - last_nonopt);
|
||||
last_nonopt = optind;
|
||||
}
|
||||
|
||||
/* Scan elements of ARGV (whose length is ARGC) for option characters
|
||||
given in OPTSTRING.
|
||||
|
||||
If an element of ARGV starts with '-', and is not exactly "-" or "--",
|
||||
then it is an option element. The characters of this element
|
||||
(aside from the initial '-') are option characters. If `getopt'
|
||||
is called repeatedly, it returns successively each of the option characters
|
||||
from each of the option elements.
|
||||
|
||||
If `getopt' finds another option character, it returns that character,
|
||||
updating `optind' and `nextchar' so that the next call to `getopt' can
|
||||
resume the scan with the following option character or ARGV-element.
|
||||
|
||||
If there are no more option characters, `getopt' returns `EOF'.
|
||||
Then `optind' is the index in ARGV of the first ARGV-element
|
||||
that is not an option. (The ARGV-elements have been permuted
|
||||
so that those that are not options now come last.)
|
||||
|
||||
OPTSTRING is a string containing the legitimate option characters.
|
||||
If an option character is seen that is not listed in OPTSTRING,
|
||||
return '?' after printing an error message. If you set `opterr' to
|
||||
zero, the error message is suppressed but we still return '?'.
|
||||
|
||||
If a char in OPTSTRING is followed by a colon, that means it wants an arg,
|
||||
so the following text in the same ARGV-element, or the text of the following
|
||||
ARGV-element, is returned in `optarg'. Two colons mean an option that
|
||||
wants an optional arg; if there is text in the current ARGV-element,
|
||||
it is returned in `optarg', otherwise `optarg' is set to zero.
|
||||
|
||||
If OPTSTRING starts with `-' or `+', it requests different methods of
|
||||
handling the non-option ARGV-elements.
|
||||
See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
|
||||
|
||||
Long-named options begin with `--' instead of `-'.
|
||||
Their names may be abbreviated as long as the abbreviation is unique
|
||||
or is an exact match for some defined option. If they have an
|
||||
argument, it follows the option name in the same ARGV-element, separated
|
||||
from the option name by a `=', or else the in next ARGV-element.
|
||||
When `getopt' finds a long-named option, it returns 0 if that option's
|
||||
`flag' field is nonzero, the value of the option's `val' field
|
||||
if the `flag' field is zero.
|
||||
|
||||
The elements of ARGV aren't really const, because we permute them.
|
||||
But we pretend they're const in the prototype to be compatible
|
||||
with other systems.
|
||||
|
||||
LONGOPTS is a vector of `struct option' terminated by an
|
||||
element containing a name which is zero.
|
||||
|
||||
LONGIND returns the index in LONGOPT of the long-named option found.
|
||||
It is only valid when a long-named option has been found by the most
|
||||
recent call.
|
||||
|
||||
If LONG_ONLY is nonzero, '-' as well as '--' can introduce
|
||||
long-named options. */
|
||||
|
||||
int
|
||||
_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
|
||||
int argc;
|
||||
char *const *argv;
|
||||
const char *optstring;
|
||||
const struct option *longopts;
|
||||
int *longind;
|
||||
int long_only;
|
||||
{
|
||||
int option_index;
|
||||
|
||||
optarg = 0;
|
||||
|
||||
/* Initialize the internal data when the first call is made.
|
||||
Start processing options with ARGV-element 1 (since ARGV-element 0
|
||||
is the program name); the sequence of previously skipped
|
||||
non-option ARGV-elements is empty. */
|
||||
|
||||
if (optind == 0)
|
||||
{
|
||||
first_nonopt = last_nonopt = optind = 1;
|
||||
|
||||
nextchar = NULL;
|
||||
|
||||
/* Determine how to handle the ordering of options and nonoptions. */
|
||||
|
||||
if (optstring[0] == '-')
|
||||
{
|
||||
ordering = RETURN_IN_ORDER;
|
||||
++optstring;
|
||||
}
|
||||
else if (optstring[0] == '+')
|
||||
{
|
||||
ordering = REQUIRE_ORDER;
|
||||
++optstring;
|
||||
}
|
||||
else if (getenv ("POSIXLY_CORRECT") != NULL)
|
||||
ordering = REQUIRE_ORDER;
|
||||
else
|
||||
ordering = PERMUTE;
|
||||
}
|
||||
|
||||
if (nextchar == NULL || *nextchar == '\0')
|
||||
{
|
||||
if (ordering == PERMUTE)
|
||||
{
|
||||
/* If we have just processed some options following some non-options,
|
||||
exchange them so that the options come first. */
|
||||
|
||||
if (first_nonopt != last_nonopt && last_nonopt != optind)
|
||||
exchange ((char **) argv);
|
||||
else if (last_nonopt != optind)
|
||||
first_nonopt = optind;
|
||||
|
||||
/* Now skip any additional non-options
|
||||
and extend the range of non-options previously skipped. */
|
||||
|
||||
while (optind < argc
|
||||
&& (argv[optind][0] != '-' || argv[optind][1] == '\0')
|
||||
#ifdef GETOPT_COMPAT
|
||||
&& (longopts == NULL
|
||||
|| argv[optind][0] != '+' || argv[optind][1] == '\0')
|
||||
#endif /* GETOPT_COMPAT */
|
||||
)
|
||||
optind++;
|
||||
last_nonopt = optind;
|
||||
}
|
||||
|
||||
/* Special ARGV-element `--' means premature end of options.
|
||||
Skip it like a null option,
|
||||
then exchange with previous non-options as if it were an option,
|
||||
then skip everything else like a non-option. */
|
||||
|
||||
if (optind != argc && !strcmp (argv[optind], "--"))
|
||||
{
|
||||
optind++;
|
||||
|
||||
if (first_nonopt != last_nonopt && last_nonopt != optind)
|
||||
exchange ((char **) argv);
|
||||
else if (first_nonopt == last_nonopt)
|
||||
first_nonopt = optind;
|
||||
last_nonopt = argc;
|
||||
|
||||
optind = argc;
|
||||
}
|
||||
|
||||
/* If we have done all the ARGV-elements, stop the scan
|
||||
and back over any non-options that we skipped and permuted. */
|
||||
|
||||
if (optind == argc)
|
||||
{
|
||||
/* Set the next-arg-index to point at the non-options
|
||||
that we previously skipped, so the caller will digest them. */
|
||||
if (first_nonopt != last_nonopt)
|
||||
optind = first_nonopt;
|
||||
return EOF;
|
||||
}
|
||||
|
||||
/* If we have come to a non-option and did not permute it,
|
||||
either stop the scan or describe it to the caller and pass it by. */
|
||||
|
||||
if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
|
||||
#ifdef GETOPT_COMPAT
|
||||
&& (longopts == NULL
|
||||
|| argv[optind][0] != '+' || argv[optind][1] == '\0')
|
||||
#endif /* GETOPT_COMPAT */
|
||||
)
|
||||
{
|
||||
if (ordering == REQUIRE_ORDER)
|
||||
return EOF;
|
||||
optarg = argv[optind++];
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We have found another option-ARGV-element.
|
||||
Start decoding its characters. */
|
||||
|
||||
nextchar = (argv[optind] + 1
|
||||
+ (longopts != NULL && argv[optind][1] == '-'));
|
||||
}
|
||||
|
||||
if (longopts != NULL
|
||||
&& ((argv[optind][0] == '-'
|
||||
&& (argv[optind][1] == '-' || long_only))
|
||||
#ifdef GETOPT_COMPAT
|
||||
|| argv[optind][0] == '+'
|
||||
#endif /* GETOPT_COMPAT */
|
||||
))
|
||||
{
|
||||
const struct option *p;
|
||||
char *s = nextchar;
|
||||
int exact = 0;
|
||||
int ambig = 0;
|
||||
const struct option *pfound = NULL;
|
||||
int indfound;
|
||||
|
||||
while (*s && *s != '=')
|
||||
s++;
|
||||
|
||||
/* Test all options for either exact match or abbreviated matches. */
|
||||
for (p = longopts, option_index = 0; p->name;
|
||||
p++, option_index++)
|
||||
if (!strncmp (p->name, nextchar, s - nextchar))
|
||||
{
|
||||
if (s - nextchar == strlen (p->name))
|
||||
{
|
||||
/* Exact match found. */
|
||||
pfound = p;
|
||||
indfound = option_index;
|
||||
exact = 1;
|
||||
break;
|
||||
}
|
||||
else if (pfound == NULL)
|
||||
{
|
||||
/* First nonexact match found. */
|
||||
pfound = p;
|
||||
indfound = option_index;
|
||||
}
|
||||
else
|
||||
/* Second nonexact match found. */
|
||||
ambig = 1;
|
||||
}
|
||||
|
||||
if (ambig && !exact)
|
||||
{
|
||||
if (opterr)
|
||||
fprintf (stderr, "%s: option `%s' is ambiguous\n",
|
||||
argv[0], argv[optind]);
|
||||
nextchar += strlen (nextchar);
|
||||
optind++;
|
||||
return '?';
|
||||
}
|
||||
|
||||
if (pfound != NULL)
|
||||
{
|
||||
option_index = indfound;
|
||||
optind++;
|
||||
if (*s)
|
||||
{
|
||||
/* Don't test has_arg with >, because some C compilers don't
|
||||
allow it to be used on enums. */
|
||||
if (pfound->has_arg)
|
||||
optarg = s + 1;
|
||||
else
|
||||
{
|
||||
if (opterr)
|
||||
{
|
||||
if (argv[optind - 1][1] == '-')
|
||||
/* --option */
|
||||
fprintf (stderr,
|
||||
"%s: option `--%s' doesn't allow an argument\n",
|
||||
argv[0], pfound->name);
|
||||
else
|
||||
/* +option or -option */
|
||||
fprintf (stderr,
|
||||
"%s: option `%c%s' doesn't allow an argument\n",
|
||||
argv[0], argv[optind - 1][0], pfound->name);
|
||||
}
|
||||
nextchar += strlen (nextchar);
|
||||
return '?';
|
||||
}
|
||||
}
|
||||
else if (pfound->has_arg == 1)
|
||||
{
|
||||
if (optind < argc)
|
||||
optarg = argv[optind++];
|
||||
else
|
||||
{
|
||||
if (opterr)
|
||||
fprintf (stderr, "%s: option `%s' requires an argument\n",
|
||||
argv[0], argv[optind - 1]);
|
||||
nextchar += strlen (nextchar);
|
||||
return optstring[0] == ':' ? ':' : '?';
|
||||
}
|
||||
}
|
||||
nextchar += strlen (nextchar);
|
||||
if (longind != NULL)
|
||||
*longind = option_index;
|
||||
if (pfound->flag)
|
||||
{
|
||||
*(pfound->flag) = pfound->val;
|
||||
return 0;
|
||||
}
|
||||
return pfound->val;
|
||||
}
|
||||
/* Can't find it as a long option. If this is not getopt_long_only,
|
||||
or the option starts with '--' or is not a valid short
|
||||
option, then it's an error.
|
||||
Otherwise interpret it as a short option. */
|
||||
if (!long_only || argv[optind][1] == '-'
|
||||
#ifdef GETOPT_COMPAT
|
||||
|| argv[optind][0] == '+'
|
||||
#endif /* GETOPT_COMPAT */
|
||||
|| my_index (optstring, *nextchar) == NULL)
|
||||
{
|
||||
if (opterr)
|
||||
{
|
||||
if (argv[optind][1] == '-')
|
||||
/* --option */
|
||||
fprintf (stderr, "%s: unrecognized option `--%s'\n",
|
||||
argv[0], nextchar);
|
||||
else
|
||||
/* +option or -option */
|
||||
fprintf (stderr, "%s: unrecognized option `%c%s'\n",
|
||||
argv[0], argv[optind][0], nextchar);
|
||||
}
|
||||
nextchar = (char *) "";
|
||||
optind++;
|
||||
return '?';
|
||||
}
|
||||
}
|
||||
|
||||
/* Look at and handle the next option-character. */
|
||||
|
||||
{
|
||||
char c = *nextchar++;
|
||||
char *temp = my_index (optstring, c);
|
||||
|
||||
/* Increment `optind' when we start to process its last character. */
|
||||
if (*nextchar == '\0')
|
||||
++optind;
|
||||
|
||||
if (temp == NULL || c == ':')
|
||||
{
|
||||
if (opterr)
|
||||
{
|
||||
#if 0
|
||||
if (c < 040 || c >= 0177)
|
||||
fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
|
||||
argv[0], c);
|
||||
else
|
||||
fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
|
||||
#else
|
||||
/* 1003.2 specifies the format of this message. */
|
||||
fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
|
||||
#endif
|
||||
}
|
||||
optopt = c;
|
||||
return '?';
|
||||
}
|
||||
if (temp[1] == ':')
|
||||
{
|
||||
if (temp[2] == ':')
|
||||
{
|
||||
/* This is an option that accepts an argument optionally. */
|
||||
if (*nextchar != '\0')
|
||||
{
|
||||
optarg = nextchar;
|
||||
optind++;
|
||||
}
|
||||
else
|
||||
optarg = 0;
|
||||
nextchar = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This is an option that requires an argument. */
|
||||
if (*nextchar != '\0')
|
||||
{
|
||||
optarg = nextchar;
|
||||
/* If we end this ARGV-element by taking the rest as an arg,
|
||||
we must advance to the next element now. */
|
||||
optind++;
|
||||
}
|
||||
else if (optind == argc)
|
||||
{
|
||||
if (opterr)
|
||||
{
|
||||
#if 0
|
||||
fprintf (stderr, "%s: option `-%c' requires an argument\n",
|
||||
argv[0], c);
|
||||
#else
|
||||
/* 1003.2 specifies the format of this message. */
|
||||
fprintf (stderr, "%s: option requires an argument -- %c\n",
|
||||
argv[0], c);
|
||||
#endif
|
||||
}
|
||||
optopt = c;
|
||||
if (optstring[0] == ':')
|
||||
c = ':';
|
||||
else
|
||||
c = '?';
|
||||
}
|
||||
else
|
||||
/* We already incremented `optind' once;
|
||||
increment it again when taking next ARGV-elt as argument. */
|
||||
optarg = argv[optind++];
|
||||
nextchar = NULL;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
getopt (argc, argv, optstring)
|
||||
int argc;
|
||||
char *const *argv;
|
||||
const char *optstring;
|
||||
{
|
||||
return _getopt_internal (argc, argv, optstring,
|
||||
(const struct option *) 0,
|
||||
(int *) 0,
|
||||
0);
|
||||
}
|
||||
|
||||
#endif /* _LIBC or not __GNU_LIBRARY__. */
|
||||
|
||||
#ifdef TEST
|
||||
|
||||
/* Compile with -DTEST to make an executable for use in testing
|
||||
the above definition of `getopt'. */
|
||||
|
||||
int
|
||||
main (argc, argv)
|
||||
int argc;
|
||||
char **argv;
|
||||
{
|
||||
int c;
|
||||
int digit_optind = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int this_option_optind = optind ? optind : 1;
|
||||
|
||||
c = getopt (argc, argv, "abc:d:0123456789");
|
||||
if (c == EOF)
|
||||
break;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
if (digit_optind != 0 && digit_optind != this_option_optind)
|
||||
printf ("digits occur in two different argv-elements.\n");
|
||||
digit_optind = this_option_optind;
|
||||
printf ("option %c\n", c);
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
printf ("option a\n");
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
printf ("option b\n");
|
||||
break;
|
||||
|
||||
case 'c':
|
||||
printf ("option c with value `%s'\n", optarg);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
break;
|
||||
|
||||
default:
|
||||
printf ("?? getopt returned character code 0%o ??\n", c);
|
||||
}
|
||||
}
|
||||
|
||||
if (optind < argc)
|
||||
{
|
||||
printf ("non-option ARGV-elements: ");
|
||||
while (optind < argc)
|
||||
printf ("%s ", argv[optind++]);
|
||||
printf ("\n");
|
||||
}
|
||||
|
||||
exit (0);
|
||||
}
|
||||
|
||||
#endif /* TEST */
|
129
gnu/usr.bin/grep/getopt.h
Normal file
129
gnu/usr.bin/grep/getopt.h
Normal file
@ -0,0 +1,129 @@
|
||||
/* Declarations for getopt.
|
||||
Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#ifndef _GETOPT_H
|
||||
#define _GETOPT_H 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* For communication from `getopt' to the caller.
|
||||
When `getopt' finds an option that takes an argument,
|
||||
the argument value is returned here.
|
||||
Also, when `ordering' is RETURN_IN_ORDER,
|
||||
each non-option ARGV-element is returned here. */
|
||||
|
||||
extern char *optarg;
|
||||
|
||||
/* Index in ARGV of the next element to be scanned.
|
||||
This is used for communication to and from the caller
|
||||
and for communication between successive calls to `getopt'.
|
||||
|
||||
On entry to `getopt', zero means this is the first call; initialize.
|
||||
|
||||
When `getopt' returns EOF, this is the index of the first of the
|
||||
non-option elements that the caller should itself scan.
|
||||
|
||||
Otherwise, `optind' communicates from one call to the next
|
||||
how much of ARGV has been scanned so far. */
|
||||
|
||||
extern int optind;
|
||||
|
||||
/* Callers store zero here to inhibit the error message `getopt' prints
|
||||
for unrecognized options. */
|
||||
|
||||
extern int opterr;
|
||||
|
||||
/* Set to an option character which was unrecognized. */
|
||||
|
||||
extern int optopt;
|
||||
|
||||
/* Describe the long-named options requested by the application.
|
||||
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
|
||||
of `struct option' terminated by an element containing a name which is
|
||||
zero.
|
||||
|
||||
The field `has_arg' is:
|
||||
no_argument (or 0) if the option does not take an argument,
|
||||
required_argument (or 1) if the option requires an argument,
|
||||
optional_argument (or 2) if the option takes an optional argument.
|
||||
|
||||
If the field `flag' is not NULL, it points to a variable that is set
|
||||
to the value given in the field `val' when the option is found, but
|
||||
left unchanged if the option is not found.
|
||||
|
||||
To have a long-named option do something other than set an `int' to
|
||||
a compiled-in constant, such as set a value from `optarg', set the
|
||||
option's `flag' field to zero and its `val' field to a nonzero
|
||||
value (the equivalent single-letter option character, if there is
|
||||
one). For long options that have a zero `flag' field, `getopt'
|
||||
returns the contents of the `val' field. */
|
||||
|
||||
struct option
|
||||
{
|
||||
#if __STDC__
|
||||
const char *name;
|
||||
#else
|
||||
char *name;
|
||||
#endif
|
||||
/* has_arg can't be an enum because some compilers complain about
|
||||
type mismatches in all the code that assumes it is an int. */
|
||||
int has_arg;
|
||||
int *flag;
|
||||
int val;
|
||||
};
|
||||
|
||||
/* Names for the values of the `has_arg' field of `struct option'. */
|
||||
|
||||
#define no_argument 0
|
||||
#define required_argument 1
|
||||
#define optional_argument 2
|
||||
|
||||
#if __STDC__
|
||||
#if defined(__GNU_LIBRARY__)
|
||||
/* Many other libraries have conflicting prototypes for getopt, with
|
||||
differences in the consts, in stdlib.h. To avoid compilation
|
||||
errors, only prototype getopt for the GNU C library. */
|
||||
extern int getopt (int argc, char *const *argv, const char *shortopts);
|
||||
#else /* not __GNU_LIBRARY__ */
|
||||
extern int getopt ();
|
||||
#endif /* not __GNU_LIBRARY__ */
|
||||
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
|
||||
const struct option *longopts, int *longind);
|
||||
extern int getopt_long_only (int argc, char *const *argv,
|
||||
const char *shortopts,
|
||||
const struct option *longopts, int *longind);
|
||||
|
||||
/* Internal only. Users should not call this directly. */
|
||||
extern int _getopt_internal (int argc, char *const *argv,
|
||||
const char *shortopts,
|
||||
const struct option *longopts, int *longind,
|
||||
int long_only);
|
||||
#else /* not __STDC__ */
|
||||
extern int getopt ();
|
||||
extern int getopt_long ();
|
||||
extern int getopt_long_only ();
|
||||
|
||||
extern int _getopt_internal ();
|
||||
#endif /* not __STDC__ */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _GETOPT_H */
|
42
gnu/usr.bin/grep/getpagesize.h
Normal file
42
gnu/usr.bin/grep/getpagesize.h
Normal file
@ -0,0 +1,42 @@
|
||||
#ifdef BSD
|
||||
#ifndef BSD4_1
|
||||
#define HAVE_GETPAGESIZE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_GETPAGESIZE
|
||||
|
||||
#ifdef VMS
|
||||
#define getpagesize() 512
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef _SC_PAGESIZE
|
||||
#define getpagesize() sysconf(_SC_PAGESIZE)
|
||||
#else
|
||||
|
||||
#ifdef HAVE_SYS_PARAM_H
|
||||
#include <sys/param.h>
|
||||
|
||||
#ifdef EXEC_PAGESIZE
|
||||
#define getpagesize() EXEC_PAGESIZE
|
||||
#else
|
||||
#ifdef NBPG
|
||||
#define getpagesize() NBPG * CLSIZE
|
||||
#ifndef CLSIZE
|
||||
#define CLSIZE 1
|
||||
#endif /* no CLSIZE */
|
||||
#else /* no NBPG */
|
||||
#define getpagesize() NBPC
|
||||
#endif /* no NBPG */
|
||||
#endif /* no EXEC_PAGESIZE */
|
||||
#else /* !HAVE_SYS_PARAM_H */
|
||||
#define getpagesize() 8192 /* punt totally */
|
||||
#endif /* !HAVE_SYS_PARAM_H */
|
||||
#endif /* no _SC_PAGESIZE */
|
||||
|
||||
#endif /* not HAVE_GETPAGESIZE */
|
||||
|
410
gnu/usr.bin/grep/grep.1
Normal file
410
gnu/usr.bin/grep/grep.1
Normal file
@ -0,0 +1,410 @@
|
||||
.TH GREP 1 "1992 September 10" "GNU Project"
|
||||
.SH NAME
|
||||
grep, egrep, fgrep, zgrep \- print lines matching a pattern
|
||||
.SH SYNOPSIS
|
||||
.B grep
|
||||
[\-[AB] num]
|
||||
[\-HRPS]
|
||||
[\-CEFGLVabchilnqsvwx]
|
||||
[\-e expr]
|
||||
[\-f file]
|
||||
files...
|
||||
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
.B Grep
|
||||
searches the named input
|
||||
.I files
|
||||
(or standard input if no files are named, or
|
||||
the file name
|
||||
.B \-
|
||||
is given)
|
||||
for lines containing a match to the given
|
||||
.IR pattern .
|
||||
By default,
|
||||
.B grep
|
||||
prints the matching lines.
|
||||
.PP
|
||||
There are three major variants of
|
||||
.BR grep ,
|
||||
controlled by the following options.
|
||||
.PD 0
|
||||
.TP
|
||||
.B \-G
|
||||
Interpret
|
||||
.I pattern
|
||||
as a basic regular expression (see below). This is the default.
|
||||
.TP
|
||||
.B \-E
|
||||
Interpret
|
||||
.I pattern
|
||||
as an extended regular expression (see below).
|
||||
.TP
|
||||
.B \-F
|
||||
Interpret
|
||||
.I pattern
|
||||
as a list of fixed strings, separated by newlines,
|
||||
any of which is to be matched.
|
||||
.LP
|
||||
In addition, two variant programs
|
||||
.B egrep
|
||||
and
|
||||
.B fgrep
|
||||
are available.
|
||||
.B Egrep
|
||||
is similar (but not identical) to
|
||||
.BR "grep\ \-E" ,
|
||||
and is compatible with the historical Unix
|
||||
.BR egrep .
|
||||
.B Fgrep
|
||||
is the same as
|
||||
.BR "grep\ \-F" .
|
||||
When called as
|
||||
.BR zgrep ,
|
||||
the
|
||||
.BR \-Z
|
||||
option is assumed.
|
||||
.PD
|
||||
.LP
|
||||
All variants of
|
||||
.B grep
|
||||
understand the following options:
|
||||
.PD 0
|
||||
.TP
|
||||
.BI \- num
|
||||
Matches will be printed with
|
||||
.I num
|
||||
lines of leading and trailing context. However,
|
||||
.B grep
|
||||
will never print any given line more than once.
|
||||
.TP
|
||||
.BI \-A " num"
|
||||
Print
|
||||
.I num
|
||||
lines of trailing context after matching lines.
|
||||
.TP
|
||||
.BI \-B " num"
|
||||
Print
|
||||
.I num
|
||||
lines of leading context before matching lines.
|
||||
.TP
|
||||
.B \-C
|
||||
Equivalent to
|
||||
.BR \-2 .
|
||||
.TP
|
||||
.B \-V
|
||||
Print the version number of
|
||||
.B grep
|
||||
to standard error. This version number should
|
||||
be included in all bug reports (see below).
|
||||
.TP
|
||||
.B \-a
|
||||
Don't search in binary files.
|
||||
.TP
|
||||
.B \-b
|
||||
Print the byte offset within the input file before
|
||||
each line of output.
|
||||
.TP
|
||||
.B \-c
|
||||
Suppress normal output; instead print a count of
|
||||
matching lines for each input file.
|
||||
With the
|
||||
.B \-v
|
||||
option (see below), count non-matching lines.
|
||||
.TP
|
||||
.BI \-e " pattern"
|
||||
Use
|
||||
.I pattern
|
||||
as the pattern; useful to protect patterns beginning with
|
||||
.BR \- .
|
||||
.TP
|
||||
.BI \-f " file"
|
||||
Obtain the pattern from
|
||||
.IR file .
|
||||
.TP
|
||||
.B \-h
|
||||
Suppress the prefixing of filenames on output
|
||||
when multiple files are searched.
|
||||
.TP
|
||||
.B \-i
|
||||
Ignore case distinctions in both the
|
||||
.I pattern
|
||||
and the input files.
|
||||
.TP
|
||||
.B \-L
|
||||
Suppress normal output; instead print the name
|
||||
of each input file from which no output would
|
||||
normally have been printed.
|
||||
.TP
|
||||
.B \-l
|
||||
Suppress normal output; instead print
|
||||
the name of each input file from which output
|
||||
would normally have been printed.
|
||||
.TP
|
||||
.B \-n
|
||||
Prefix each line of output with the line number
|
||||
within its input file.
|
||||
.TP
|
||||
.B \-q
|
||||
Quiet; suppress normal output.
|
||||
.TP
|
||||
.B \-s
|
||||
Suppress error messages about nonexistent or unreadable files.
|
||||
.TP
|
||||
.B \-v
|
||||
Invert the sense of matching, to select non-matching lines.
|
||||
.TP
|
||||
.B \-w
|
||||
Select only those lines containing matches that form whole words.
|
||||
The test is that the matching substring must either be at the
|
||||
beginning of the line, or preceded by a non-word constituent
|
||||
character. Similarly, it must be either at the end of the line
|
||||
or followed by a non-word constituent character. Word-constituent
|
||||
characters are letters, digits, and the underscore.
|
||||
.TP
|
||||
.B \-x
|
||||
Select only those matches that exactly match the whole line.
|
||||
|
||||
.PP
|
||||
Following options are only available if compiled with FTS library:
|
||||
.PD 0
|
||||
.TP
|
||||
.BI \-H
|
||||
If the
|
||||
.I \-R
|
||||
option is specified, symbolic links on the command line
|
||||
are followed. (Symbolic links encountered in the tree traversal
|
||||
are not followed.)
|
||||
.TP
|
||||
.BI \-L
|
||||
If the
|
||||
.I \-R
|
||||
option is specified, all symbolic links are followed.
|
||||
.TP
|
||||
.BI \-P
|
||||
If the
|
||||
.I \-R
|
||||
option is specified, no symbolic links are followed.
|
||||
.TP
|
||||
.BI \-R
|
||||
Search in the file hierarchies
|
||||
rooted in the files instead of just the files themselves.
|
||||
|
||||
.LP
|
||||
Following option is only available if compiled with zlib library:
|
||||
.PD 0
|
||||
.TP
|
||||
.BI \-Z
|
||||
If the
|
||||
.I \-Z
|
||||
option is specified, the input data will be
|
||||
decompressed before searching.
|
||||
.TP
|
||||
.PD
|
||||
.SH "REGULAR EXPRESSIONS"
|
||||
.PP
|
||||
A regular expression is a pattern that describes a set of strings.
|
||||
Regular expressions are constructed analogously to arithmetic
|
||||
expressions, by using various operators to combine smaller expressions.
|
||||
.PP
|
||||
.B Grep
|
||||
understands two different versions of regular expression syntax:
|
||||
``basic'' and ``extended.'' In
|
||||
.RB "GNU\ " grep ,
|
||||
there is no difference in available functionality using either syntax.
|
||||
In other implementations, basic regular expressions are less powerful.
|
||||
The following description applies to extended regular expressions;
|
||||
differences for basic regular expressions are summarized afterwards.
|
||||
.PP
|
||||
The fundamental building blocks are the regular expressions that match
|
||||
a single character. Most characters, including all letters and digits,
|
||||
are regular expressions that match themselves. Any metacharacter with
|
||||
special meaning may be quoted by preceding it with a backslash.
|
||||
.PP
|
||||
A list of characters enclosed by
|
||||
.B [
|
||||
and
|
||||
.B ]
|
||||
matches any single
|
||||
character in that list; if the first character of the list
|
||||
is the caret
|
||||
.B ^
|
||||
then it matches any character
|
||||
.I not
|
||||
in the list.
|
||||
For example, the regular expression
|
||||
.B [0123456789]
|
||||
matches any single digit. A range of ASCII characters
|
||||
may be specified by giving the first and last characters, separated
|
||||
by a hyphen.
|
||||
Finally, certain named classes of characters are predefined.
|
||||
Their names are self explanatory, and they are
|
||||
.BR [:alnum:] ,
|
||||
.BR [:alpha:] ,
|
||||
.BR [:cntrl:] ,
|
||||
.BR [:digit:] ,
|
||||
.BR [:graph:] ,
|
||||
.BR [:lower:] ,
|
||||
.BR [:print:] ,
|
||||
.BR [:punct:] ,
|
||||
.BR [:space:] ,
|
||||
.BR [:upper:] ,
|
||||
and
|
||||
.BR [:xdigit:].
|
||||
For example,
|
||||
.B [[:alnum:]]
|
||||
means
|
||||
.BR [0-9A-Za-z] ,
|
||||
except the latter form is dependent upon the ASCII character encoding,
|
||||
whereas the former is portable.
|
||||
(Note that the brackets in these class names are part of the symbolic
|
||||
names, and must be included in addition to the brackets delimiting
|
||||
the bracket list.) Most metacharacters lose their special meaning
|
||||
inside lists. To include a literal
|
||||
.B ]
|
||||
place it first in the list. Similarly, to include a literal
|
||||
.B ^
|
||||
place it anywhere but first. Finally, to include a literal
|
||||
.B \-
|
||||
place it last.
|
||||
.PP
|
||||
The period
|
||||
.B .
|
||||
matches any single character.
|
||||
The symbol
|
||||
.B \ew
|
||||
is a synonym for
|
||||
.B [[:alnum:]]
|
||||
and
|
||||
.B \eW
|
||||
is a synonym for
|
||||
.BR [^[:alnum]] .
|
||||
.PP
|
||||
The caret
|
||||
.B ^
|
||||
and the dollar sign
|
||||
.B $
|
||||
are metacharacters that respectively match the empty string at the
|
||||
beginning and end of a line.
|
||||
The symbols
|
||||
.B \e<
|
||||
and
|
||||
.B \e>
|
||||
respectively match the empty string at the beginning and end of a word.
|
||||
The symbol
|
||||
.B \eb
|
||||
matches the empty string at the edge of a word,
|
||||
and
|
||||
.B \eB
|
||||
matches the empty string provided it's
|
||||
.I not
|
||||
at the edge of a word.
|
||||
.PP
|
||||
A regular expression matching a single character may be followed
|
||||
by one of several repetition operators:
|
||||
.PD 0
|
||||
.TP
|
||||
.B ?
|
||||
The preceding item is optional and matched at most once.
|
||||
.TP
|
||||
.B *
|
||||
The preceding item will be matched zero or more times.
|
||||
.TP
|
||||
.B +
|
||||
The preceding item will be matched one or more times.
|
||||
.TP
|
||||
.BI { n }
|
||||
The preceding item is matched exactly
|
||||
.I n
|
||||
times.
|
||||
.TP
|
||||
.BI { n ,}
|
||||
The preceding item is matched
|
||||
.I n
|
||||
or more times.
|
||||
.TP
|
||||
.BI {, m }
|
||||
The preceding item is optional and is matched at most
|
||||
.I m
|
||||
times.
|
||||
.TP
|
||||
.BI { n , m }
|
||||
The preceding item is matched at least
|
||||
.I n
|
||||
times, but not more than
|
||||
.I m
|
||||
times.
|
||||
.PD
|
||||
.PP
|
||||
Two regular expressions may be concatenated; the resulting
|
||||
regular expression matches any string formed by concatenating
|
||||
two substrings that respectively match the concatenated
|
||||
subexpressions.
|
||||
.PP
|
||||
Two regular expressions may be joined by the infix operator
|
||||
.BR | ;
|
||||
the resulting regular expression matches any string matching
|
||||
either subexpression.
|
||||
.PP
|
||||
Repetition takes precedence over concatenation, which in turn
|
||||
takes precedence over alternation. A whole subexpression may be
|
||||
enclosed in parentheses to override these precedence rules.
|
||||
.PP
|
||||
The backreference
|
||||
.BI \e n\c
|
||||
\&, where
|
||||
.I n
|
||||
is a single digit, matches the substring
|
||||
previously matched by the
|
||||
.IR n th
|
||||
parenthesized subexpression of the regular expression.
|
||||
.PP
|
||||
In basic regular expressions the metacharacters
|
||||
.BR ? ,
|
||||
.BR + ,
|
||||
.BR { ,
|
||||
.BR | ,
|
||||
.BR ( ,
|
||||
and
|
||||
.BR )
|
||||
lose their special meaning; instead use the backslashed
|
||||
versions
|
||||
.BR \e? ,
|
||||
.BR \e+ ,
|
||||
.BR \e{ ,
|
||||
.BR \e| ,
|
||||
.BR \e( ,
|
||||
and
|
||||
.BR \e) .
|
||||
.PP
|
||||
In
|
||||
.B egrep
|
||||
the metacharacter
|
||||
.B {
|
||||
loses its special meaning; instead use
|
||||
.BR \e{ .
|
||||
.SH DIAGNOSTICS
|
||||
.PP
|
||||
Normally, exit status is 0 if matches were found,
|
||||
and 1 if no matches were found. (The
|
||||
.B \-v
|
||||
option inverts the sense of the exit status.)
|
||||
Exit status is 2 if there were syntax errors
|
||||
in the pattern, inaccessible input files, or
|
||||
other system errors.
|
||||
.SH BUGS
|
||||
.PP
|
||||
Email bug reports to
|
||||
.BR bug-gnu-utils@prep.ai.mit.edu .
|
||||
Be sure to include the word ``grep'' somewhere in the ``Subject:'' field.
|
||||
.PP
|
||||
Large repetition counts in the
|
||||
.BI { m , n }
|
||||
construct may cause grep to use lots of memory.
|
||||
In addition,
|
||||
certain other obscure regular expressions require exponential time
|
||||
and space, and may cause
|
||||
.B grep
|
||||
to run out of memory.
|
||||
.PP
|
||||
Backreferences are very slow, and may require exponential time.
|
1103
gnu/usr.bin/grep/grep.c
Normal file
1103
gnu/usr.bin/grep/grep.c
Normal file
File diff suppressed because it is too large
Load Diff
53
gnu/usr.bin/grep/grep.h
Normal file
53
gnu/usr.bin/grep/grep.h
Normal file
@ -0,0 +1,53 @@
|
||||
/* grep.h - interface to grep driver for searching subroutines.
|
||||
Copyright (C) 1992 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#if __STDC__
|
||||
|
||||
extern void fatal(const char *, int);
|
||||
|
||||
/* Grep.c expects the matchers vector to be terminated
|
||||
by an entry with a NULL name, and to contain at least
|
||||
an entry named "default". */
|
||||
|
||||
extern struct matcher
|
||||
{
|
||||
char *name;
|
||||
void (*compile)(char *, size_t);
|
||||
char *(*execute)(char *, size_t, char **);
|
||||
} matchers[];
|
||||
|
||||
#else
|
||||
|
||||
extern void fatal();
|
||||
|
||||
extern struct matcher
|
||||
{
|
||||
char *name;
|
||||
void (*compile)();
|
||||
char *(*execute)();
|
||||
} matchers[];
|
||||
|
||||
#endif
|
||||
|
||||
/* Exported from grep.c. */
|
||||
extern char *matcher;
|
||||
|
||||
/* The following flags are exported from grep for the matchers
|
||||
to look at. */
|
||||
extern int match_icase; /* -i */
|
||||
extern int match_words; /* -w */
|
||||
extern int match_lines; /* -x */
|
807
gnu/usr.bin/grep/kwset.c
Normal file
807
gnu/usr.bin/grep/kwset.c
Normal file
@ -0,0 +1,807 @@
|
||||
/* kwset.c - search for any of a set of keywords.
|
||||
Copyright 1989 Free Software Foundation
|
||||
Written August 1989 by Mike Haertel.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
The author may be reached (Email) at the address mike@ai.mit.edu,
|
||||
or (US mail) as Mike Haertel c/o Free Software Foundation. */
|
||||
|
||||
/* The algorithm implemented by these routines bears a startling resemblence
|
||||
to one discovered by Beate Commentz-Walter, although it is not identical.
|
||||
See "A String Matching Algorithm Fast on the Average," Technical Report,
|
||||
IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
|
||||
Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
|
||||
String Matching: An Aid to Bibliographic Search," CACM June 1975,
|
||||
Vol. 18, No. 6, which describes the failure function used below. */
|
||||
|
||||
/* $FreeBSD$ */
|
||||
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#else
|
||||
#define INT_MAX 2147483647
|
||||
#define UCHAR_MAX 255
|
||||
#ifdef __STDC__
|
||||
#include <stddef.h>
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
extern char *malloc();
|
||||
extern void free();
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMCHR
|
||||
#include <string.h>
|
||||
#ifdef NEED_MEMORY_H
|
||||
#include <memory.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef __STDC__
|
||||
extern void *memchr();
|
||||
#else
|
||||
extern char *memchr();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef GREP
|
||||
extern char *xmalloc();
|
||||
#define malloc xmalloc
|
||||
#endif
|
||||
|
||||
#include "kwset.h"
|
||||
#include "obstack.h"
|
||||
|
||||
#define NCHAR (UCHAR_MAX + 1)
|
||||
#define obstack_chunk_alloc malloc
|
||||
#define obstack_chunk_free free
|
||||
|
||||
/* Balanced tree of edges and labels leaving a given trie node. */
|
||||
struct tree
|
||||
{
|
||||
struct tree *llink; /* Left link; MUST be first field. */
|
||||
struct tree *rlink; /* Right link (to larger labels). */
|
||||
struct trie *trie; /* Trie node pointed to by this edge. */
|
||||
unsigned char label; /* Label on this edge. */
|
||||
char balance; /* Difference in depths of subtrees. */
|
||||
};
|
||||
|
||||
/* Node of a trie representing a set of reversed keywords. */
|
||||
struct trie
|
||||
{
|
||||
unsigned int accepting; /* Word index of accepted word, or zero. */
|
||||
struct tree *links; /* Tree of edges leaving this node. */
|
||||
struct trie *parent; /* Parent of this node. */
|
||||
struct trie *next; /* List of all trie nodes in level order. */
|
||||
struct trie *fail; /* Aho-Corasick failure function. */
|
||||
int depth; /* Depth of this node from the root. */
|
||||
int shift; /* Shift function for search failures. */
|
||||
int maxshift; /* Max shift of self and descendents. */
|
||||
};
|
||||
|
||||
/* Structure returned opaquely to the caller, containing everything. */
|
||||
struct kwset
|
||||
{
|
||||
struct obstack obstack; /* Obstack for node allocation. */
|
||||
int words; /* Number of words in the trie. */
|
||||
struct trie *trie; /* The trie itself. */
|
||||
int mind; /* Minimum depth of an accepting node. */
|
||||
int maxd; /* Maximum depth of any node. */
|
||||
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
|
||||
struct trie *next[NCHAR]; /* Table of children of the root. */
|
||||
char *target; /* Target string if there's only one. */
|
||||
int mind2; /* Used in Boyer-Moore search for one string. */
|
||||
char *trans; /* Character translation table. */
|
||||
};
|
||||
|
||||
/* Allocate and initialize a keyword set object, returning an opaque
|
||||
pointer to it. Return NULL if memory is not available. */
|
||||
kwset_t
|
||||
kwsalloc(trans)
|
||||
char *trans;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
|
||||
kwset = (struct kwset *) malloc(sizeof (struct kwset));
|
||||
if (!kwset)
|
||||
return 0;
|
||||
|
||||
obstack_init(&kwset->obstack);
|
||||
kwset->words = 0;
|
||||
kwset->trie
|
||||
= (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
|
||||
if (!kwset->trie)
|
||||
{
|
||||
kwsfree((kwset_t) kwset);
|
||||
return 0;
|
||||
}
|
||||
kwset->trie->accepting = 0;
|
||||
kwset->trie->links = 0;
|
||||
kwset->trie->parent = 0;
|
||||
kwset->trie->next = 0;
|
||||
kwset->trie->fail = 0;
|
||||
kwset->trie->depth = 0;
|
||||
kwset->trie->shift = 0;
|
||||
kwset->mind = INT_MAX;
|
||||
kwset->maxd = -1;
|
||||
kwset->target = 0;
|
||||
kwset->trans = trans;
|
||||
|
||||
return (kwset_t) kwset;
|
||||
}
|
||||
|
||||
/* Add the given string to the contents of the keyword set. Return NULL
|
||||
for success, an error message otherwise. */
|
||||
char *
|
||||
kwsincr(kws, text, len)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t len;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
register struct trie *trie;
|
||||
register unsigned char label;
|
||||
register struct tree *link;
|
||||
register int depth;
|
||||
struct tree *links[12];
|
||||
enum { L, R } dirs[12];
|
||||
struct tree *t, *r, *l, *rl, *lr;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
trie = kwset->trie;
|
||||
text += len;
|
||||
|
||||
/* Descend the trie (built of reversed keywords) character-by-character,
|
||||
installing new nodes when necessary. */
|
||||
while (len--)
|
||||
{
|
||||
label = kwset->trans ? kwset->trans[(unsigned char) *--text] : *--text;
|
||||
|
||||
/* Descend the tree of outgoing links for this trie node,
|
||||
looking for the current character and keeping track
|
||||
of the path followed. */
|
||||
link = trie->links;
|
||||
links[0] = (struct tree *) &trie->links;
|
||||
dirs[0] = L;
|
||||
depth = 1;
|
||||
|
||||
while (link && label != link->label)
|
||||
{
|
||||
links[depth] = link;
|
||||
if (label < link->label)
|
||||
dirs[depth++] = L, link = link->llink;
|
||||
else
|
||||
dirs[depth++] = R, link = link->rlink;
|
||||
}
|
||||
|
||||
/* The current character doesn't have an outgoing link at
|
||||
this trie node, so build a new trie node and install
|
||||
a link in the current trie node's tree. */
|
||||
if (!link)
|
||||
{
|
||||
link = (struct tree *) obstack_alloc(&kwset->obstack,
|
||||
sizeof (struct tree));
|
||||
if (!link)
|
||||
return "memory exhausted";
|
||||
link->llink = 0;
|
||||
link->rlink = 0;
|
||||
link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
|
||||
sizeof (struct trie));
|
||||
if (!link->trie)
|
||||
return "memory exhausted";
|
||||
link->trie->accepting = 0;
|
||||
link->trie->links = 0;
|
||||
link->trie->parent = trie;
|
||||
link->trie->next = 0;
|
||||
link->trie->fail = 0;
|
||||
link->trie->depth = trie->depth + 1;
|
||||
link->trie->shift = 0;
|
||||
link->label = label;
|
||||
link->balance = 0;
|
||||
|
||||
/* Install the new tree node in its parent. */
|
||||
if (dirs[--depth] == L)
|
||||
links[depth]->llink = link;
|
||||
else
|
||||
links[depth]->rlink = link;
|
||||
|
||||
/* Back up the tree fixing the balance flags. */
|
||||
while (depth && !links[depth]->balance)
|
||||
{
|
||||
if (dirs[depth] == L)
|
||||
--links[depth]->balance;
|
||||
else
|
||||
++links[depth]->balance;
|
||||
--depth;
|
||||
}
|
||||
|
||||
/* Rebalance the tree by pointer rotations if necessary. */
|
||||
if (depth && ((dirs[depth] == L && --links[depth]->balance)
|
||||
|| (dirs[depth] == R && ++links[depth]->balance)))
|
||||
{
|
||||
switch (links[depth]->balance)
|
||||
{
|
||||
case (char) -2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case L:
|
||||
r = links[depth], t = r->llink, rl = t->rlink;
|
||||
t->rlink = r, r->llink = rl;
|
||||
t->balance = r->balance = 0;
|
||||
break;
|
||||
case R:
|
||||
r = links[depth], l = r->llink, t = l->rlink;
|
||||
rl = t->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case R:
|
||||
l = links[depth], t = l->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr;
|
||||
t->balance = l->balance = 0;
|
||||
break;
|
||||
case L:
|
||||
l = links[depth], r = l->rlink, t = r->llink;
|
||||
lr = t->llink, rl = t->rlink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (dirs[depth - 1] == L)
|
||||
links[depth - 1]->llink = t;
|
||||
else
|
||||
links[depth - 1]->rlink = t;
|
||||
}
|
||||
}
|
||||
|
||||
trie = link->trie;
|
||||
}
|
||||
|
||||
/* Mark the node we finally reached as accepting, encoding the
|
||||
index number of this word in the keyword set so far. */
|
||||
if (!trie->accepting)
|
||||
trie->accepting = 1 + 2 * kwset->words;
|
||||
++kwset->words;
|
||||
|
||||
/* Keep track of the longest and shortest string of the keyword set. */
|
||||
if (trie->depth < kwset->mind)
|
||||
kwset->mind = trie->depth;
|
||||
if (trie->depth > kwset->maxd)
|
||||
kwset->maxd = trie->depth;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Enqueue the trie nodes referenced from the given tree in the
|
||||
given queue. */
|
||||
static void
|
||||
enqueue(tree, last)
|
||||
struct tree *tree;
|
||||
struct trie **last;
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
enqueue(tree->llink, last);
|
||||
enqueue(tree->rlink, last);
|
||||
(*last) = (*last)->next = tree->trie;
|
||||
}
|
||||
|
||||
/* Compute the Aho-Corasick failure function for the trie nodes referenced
|
||||
from the given tree, given the failure function for their parent as
|
||||
well as a last resort failure node. */
|
||||
static void
|
||||
treefails(tree, fail, recourse)
|
||||
register struct tree *tree;
|
||||
struct trie *fail;
|
||||
struct trie *recourse;
|
||||
{
|
||||
register struct tree *link;
|
||||
|
||||
if (!tree)
|
||||
return;
|
||||
|
||||
treefails(tree->llink, fail, recourse);
|
||||
treefails(tree->rlink, fail, recourse);
|
||||
|
||||
/* Find, in the chain of fails going back to the root, the first
|
||||
node that has a descendent on the current label. */
|
||||
while (fail)
|
||||
{
|
||||
link = fail->links;
|
||||
while (link && tree->label != link->label)
|
||||
if (tree->label < link->label)
|
||||
link = link->llink;
|
||||
else
|
||||
link = link->rlink;
|
||||
if (link)
|
||||
{
|
||||
tree->trie->fail = link->trie;
|
||||
return;
|
||||
}
|
||||
fail = fail->fail;
|
||||
}
|
||||
|
||||
tree->trie->fail = recourse;
|
||||
}
|
||||
|
||||
/* Set delta entries for the links of the given tree such that
|
||||
the preexisting delta value is larger than the current depth. */
|
||||
static void
|
||||
treedelta(tree, depth, delta)
|
||||
register struct tree *tree;
|
||||
register unsigned int depth;
|
||||
unsigned char delta[];
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treedelta(tree->llink, depth, delta);
|
||||
treedelta(tree->rlink, depth, delta);
|
||||
if (depth < delta[tree->label])
|
||||
delta[tree->label] = depth;
|
||||
}
|
||||
|
||||
/* Return true if A has every label in B. */
|
||||
static int
|
||||
hasevery(a, b)
|
||||
register struct tree *a;
|
||||
register struct tree *b;
|
||||
{
|
||||
if (!b)
|
||||
return 1;
|
||||
if (!hasevery(a, b->llink))
|
||||
return 0;
|
||||
if (!hasevery(a, b->rlink))
|
||||
return 0;
|
||||
while (a && b->label != a->label)
|
||||
if (b->label < a->label)
|
||||
a = a->llink;
|
||||
else
|
||||
a = a->rlink;
|
||||
return !!a;
|
||||
}
|
||||
|
||||
/* Compute a vector, indexed by character code, of the trie nodes
|
||||
referenced from the given tree. */
|
||||
static void
|
||||
treenext(tree, next)
|
||||
struct tree *tree;
|
||||
struct trie *next[];
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treenext(tree->llink, next);
|
||||
treenext(tree->rlink, next);
|
||||
next[tree->label] = tree->trie;
|
||||
}
|
||||
|
||||
/* Compute the shift for each trie node, as well as the delta
|
||||
table and next cache for the given keyword set. */
|
||||
char *
|
||||
kwsprep(kws)
|
||||
kwset_t kws;
|
||||
{
|
||||
register struct kwset *kwset;
|
||||
register int i;
|
||||
register struct trie *curr, *fail;
|
||||
register char *trans;
|
||||
unsigned char delta[NCHAR];
|
||||
struct trie *last, *next[NCHAR];
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
|
||||
/* Initial values for the delta table; will be changed later. The
|
||||
delta entry for a given character is the smallest depth of any
|
||||
node at which an outgoing edge is labeled by that character. */
|
||||
if (kwset->mind < 256)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
delta[i] = kwset->mind;
|
||||
else
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
delta[i] = 255;
|
||||
|
||||
/* Check if we can use the simple boyer-moore algorithm, instead
|
||||
of the hairy commentz-walter algorithm. */
|
||||
if (kwset->words == 1 && kwset->trans == 0)
|
||||
{
|
||||
/* Looking for just one string. Extract it from the trie. */
|
||||
kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
|
||||
for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
|
||||
{
|
||||
kwset->target[i] = curr->links->label;
|
||||
curr = curr->links->trie;
|
||||
}
|
||||
/* Build the Boyer Moore delta. Boy that's easy compared to CW. */
|
||||
for (i = 0; i < kwset->mind; ++i)
|
||||
delta[(unsigned char) kwset->target[i]] = kwset->mind - (i + 1);
|
||||
kwset->mind2 = kwset->mind;
|
||||
/* Find the minimal delta2 shift that we might make after
|
||||
a backwards match has failed. */
|
||||
for (i = 0; i < kwset->mind - 1; ++i)
|
||||
if (kwset->target[i] == kwset->target[kwset->mind - 1])
|
||||
kwset->mind2 = kwset->mind - (i + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Traverse the nodes of the trie in level order, simultaneously
|
||||
computing the delta table, failure function, and shift function. */
|
||||
for (curr = last = kwset->trie; curr; curr = curr->next)
|
||||
{
|
||||
/* Enqueue the immediate descendents in the level order queue. */
|
||||
enqueue(curr->links, &last);
|
||||
|
||||
curr->shift = kwset->mind;
|
||||
curr->maxshift = kwset->mind;
|
||||
|
||||
/* Update the delta table for the descendents of this node. */
|
||||
treedelta(curr->links, curr->depth, delta);
|
||||
|
||||
/* Compute the failure function for the decendents of this node. */
|
||||
treefails(curr->links, curr->fail, kwset->trie);
|
||||
|
||||
/* Update the shifts at each node in the current node's chain
|
||||
of fails back to the root. */
|
||||
for (fail = curr->fail; fail; fail = fail->fail)
|
||||
{
|
||||
/* If the current node has some outgoing edge that the fail
|
||||
doesn't, then the shift at the fail should be no larger
|
||||
than the difference of their depths. */
|
||||
if (!hasevery(fail->links, curr->links))
|
||||
if (curr->depth - fail->depth < fail->shift)
|
||||
fail->shift = curr->depth - fail->depth;
|
||||
|
||||
/* If the current node is accepting then the shift at the
|
||||
fail and its descendents should be no larger than the
|
||||
difference of their depths. */
|
||||
if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
|
||||
fail->maxshift = curr->depth - fail->depth;
|
||||
}
|
||||
}
|
||||
|
||||
/* Traverse the trie in level order again, fixing up all nodes whose
|
||||
shift exceeds their inherited maxshift. */
|
||||
for (curr = kwset->trie->next; curr; curr = curr->next)
|
||||
{
|
||||
if (curr->maxshift > curr->parent->maxshift)
|
||||
curr->maxshift = curr->parent->maxshift;
|
||||
if (curr->shift > curr->maxshift)
|
||||
curr->shift = curr->maxshift;
|
||||
}
|
||||
|
||||
/* Create a vector, indexed by character code, of the outgoing links
|
||||
from the root node. */
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
next[i] = 0;
|
||||
treenext(kwset->trie->links, next);
|
||||
|
||||
if ((trans = kwset->trans) != 0)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->next[i] = next[(unsigned char) trans[i]];
|
||||
else
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->next[i] = next[i];
|
||||
}
|
||||
|
||||
/* Fix things up for any translation table. */
|
||||
if ((trans = kwset->trans) != 0)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->delta[i] = delta[(unsigned char) trans[i]];
|
||||
else
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->delta[i] = delta[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define U(C) ((unsigned char) (C))
|
||||
|
||||
/* Fast boyer-moore search. */
|
||||
static char *
|
||||
bmexec(kws, text, size)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t size;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
register unsigned char *d1;
|
||||
register char *ep, *sp, *tp;
|
||||
register int d, gc, i, len, md2;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
len = kwset->mind;
|
||||
|
||||
if (len == 0)
|
||||
return text;
|
||||
if (len > size)
|
||||
return 0;
|
||||
if (len == 1)
|
||||
return memchr(text, kwset->target[0], size);
|
||||
|
||||
d1 = kwset->delta;
|
||||
sp = kwset->target + len;
|
||||
gc = U(sp[-2]);
|
||||
md2 = kwset->mind2;
|
||||
tp = text + len;
|
||||
|
||||
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
|
||||
if (size > 12 * len)
|
||||
/* 11 is not a bug, the initial offset happens only once. */
|
||||
for (ep = text + size - 11 * len;;)
|
||||
{
|
||||
while (tp <= ep)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d == 0)
|
||||
goto found;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d == 0)
|
||||
goto found;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d == 0)
|
||||
goto found;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
}
|
||||
break;
|
||||
found:
|
||||
if (U(tp[-2]) == gc)
|
||||
{
|
||||
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
|
||||
;
|
||||
if (i > len)
|
||||
return tp - len;
|
||||
}
|
||||
tp += md2;
|
||||
}
|
||||
|
||||
/* Now we have only a few characters left to search. We
|
||||
carefully avoid ever producing an out-of-bounds pointer. */
|
||||
ep = text + size;
|
||||
d = d1[U(tp[-1])];
|
||||
while (d <= ep - tp)
|
||||
{
|
||||
d = d1[U((tp += d)[-1])];
|
||||
if (d != 0)
|
||||
continue;
|
||||
if (U(tp[-2]) == gc)
|
||||
{
|
||||
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
|
||||
;
|
||||
if (i > len)
|
||||
return tp - len;
|
||||
}
|
||||
d = md2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Hairy multiple string search. */
|
||||
static char *
|
||||
cwexec(kws, text, len, kwsmatch)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t len;
|
||||
struct kwsmatch *kwsmatch;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
struct trie **next, *trie, *accept;
|
||||
char *beg, *lim, *mch, *lmch;
|
||||
register unsigned char c, *delta;
|
||||
register int d;
|
||||
register char *end, *qlim;
|
||||
register struct tree *tree;
|
||||
register char *trans;
|
||||
|
||||
/* Initialize register copies and look for easy ways out. */
|
||||
kwset = (struct kwset *) kws;
|
||||
if (len < kwset->mind)
|
||||
return 0;
|
||||
next = kwset->next;
|
||||
delta = kwset->delta;
|
||||
trans = kwset->trans;
|
||||
lim = text + len;
|
||||
end = text;
|
||||
if ((d = kwset->mind) != 0)
|
||||
mch = 0;
|
||||
else
|
||||
{
|
||||
mch = text, accept = kwset->trie;
|
||||
goto match;
|
||||
}
|
||||
|
||||
if (len >= 4 * kwset->mind)
|
||||
qlim = lim - 4 * kwset->mind;
|
||||
else
|
||||
qlim = 0;
|
||||
|
||||
while (lim - end >= d)
|
||||
{
|
||||
if (qlim && end <= qlim)
|
||||
{
|
||||
end += d - 1;
|
||||
while ((d = delta[c = *end]) && end < qlim)
|
||||
{
|
||||
end += d;
|
||||
end += delta[(unsigned char) *end];
|
||||
end += delta[(unsigned char) *end];
|
||||
}
|
||||
++end;
|
||||
}
|
||||
else
|
||||
d = delta[c = (end += d)[-1]];
|
||||
if (d)
|
||||
continue;
|
||||
beg = end - 1;
|
||||
trie = next[c];
|
||||
if (trie->accepting)
|
||||
{
|
||||
mch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
d = trie->shift;
|
||||
while (beg > text)
|
||||
{
|
||||
c = trans ? trans[(unsigned char) *--beg] : *--beg;
|
||||
tree = trie->links;
|
||||
while (tree && c != tree->label)
|
||||
if (c < tree->label)
|
||||
tree = tree->llink;
|
||||
else
|
||||
tree = tree->rlink;
|
||||
if (tree)
|
||||
{
|
||||
trie = tree->trie;
|
||||
if (trie->accepting)
|
||||
{
|
||||
mch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
d = trie->shift;
|
||||
}
|
||||
if (mch)
|
||||
goto match;
|
||||
}
|
||||
return 0;
|
||||
|
||||
match:
|
||||
/* Given a known match, find the longest possible match anchored
|
||||
at or before its starting point. This is nearly a verbatim
|
||||
copy of the preceding main search loops. */
|
||||
if (lim - mch > kwset->maxd)
|
||||
lim = mch + kwset->maxd;
|
||||
lmch = 0;
|
||||
d = 1;
|
||||
while (lim - end >= d)
|
||||
{
|
||||
if ((d = delta[c = (end += d)[-1]]) != 0)
|
||||
continue;
|
||||
beg = end - 1;
|
||||
if (!(trie = next[c]))
|
||||
{
|
||||
d = 1;
|
||||
continue;
|
||||
}
|
||||
if (trie->accepting && beg <= mch)
|
||||
{
|
||||
lmch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
d = trie->shift;
|
||||
while (beg > text)
|
||||
{
|
||||
c = trans ? trans[(unsigned char) *--beg] : *--beg;
|
||||
tree = trie->links;
|
||||
while (tree && c != tree->label)
|
||||
if (c < tree->label)
|
||||
tree = tree->llink;
|
||||
else
|
||||
tree = tree->rlink;
|
||||
if (tree)
|
||||
{
|
||||
trie = tree->trie;
|
||||
if (trie->accepting && beg <= mch)
|
||||
{
|
||||
lmch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
d = trie->shift;
|
||||
}
|
||||
if (lmch)
|
||||
{
|
||||
mch = lmch;
|
||||
goto match;
|
||||
}
|
||||
if (!d)
|
||||
d = 1;
|
||||
}
|
||||
|
||||
if (kwsmatch)
|
||||
{
|
||||
kwsmatch->index = accept->accepting / 2;
|
||||
kwsmatch->beg[0] = mch;
|
||||
kwsmatch->size[0] = accept->depth;
|
||||
}
|
||||
return mch;
|
||||
}
|
||||
|
||||
/* Search through the given text for a match of any member of the
|
||||
given keyword set. Return a pointer to the first character of
|
||||
the matching substring, or NULL if no match is found. If FOUNDLEN
|
||||
is non-NULL store in the referenced location the length of the
|
||||
matching substring. Similarly, if FOUNDIDX is non-NULL, store
|
||||
in the referenced location the index number of the particular
|
||||
keyword matched. */
|
||||
char *
|
||||
kwsexec(kws, text, size, kwsmatch)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t size;
|
||||
struct kwsmatch *kwsmatch;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
char *ret;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
if (kwset->words == 1 && kwset->trans == 0)
|
||||
{
|
||||
ret = bmexec(kws, text, size);
|
||||
if (kwsmatch != 0 && ret != 0)
|
||||
{
|
||||
kwsmatch->index = 0;
|
||||
kwsmatch->beg[0] = ret;
|
||||
kwsmatch->size[0] = kwset->mind;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
return cwexec(kws, text, size, kwsmatch);
|
||||
}
|
||||
|
||||
/* Free the components of the given keyword set. */
|
||||
void
|
||||
kwsfree(kws)
|
||||
kwset_t kws;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
obstack_free(&kwset->obstack, 0);
|
||||
free(kws);
|
||||
}
|
69
gnu/usr.bin/grep/kwset.h
Normal file
69
gnu/usr.bin/grep/kwset.h
Normal file
@ -0,0 +1,69 @@
|
||||
/* kwset.h - header declaring the keyword set library.
|
||||
Copyright 1989 Free Software Foundation
|
||||
Written August 1989 by Mike Haertel.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
The author may be reached (Email) at the address mike@ai.mit.edu,
|
||||
or (US mail) as Mike Haertel c/o Free Software Foundation. */
|
||||
|
||||
struct kwsmatch
|
||||
{
|
||||
int index; /* Index number of matching keyword. */
|
||||
char *beg[1]; /* Begin pointer for each submatch. */
|
||||
size_t size[1]; /* Length of each submatch. */
|
||||
};
|
||||
|
||||
#if __STDC__
|
||||
|
||||
typedef void *kwset_t;
|
||||
|
||||
/* Return an opaque pointer to a newly allocated keyword set, or NULL
|
||||
if enough memory cannot be obtained. The argument if non-NULL
|
||||
specifies a table of character translations to be applied to all
|
||||
pattern and search text. */
|
||||
extern kwset_t kwsalloc(char *);
|
||||
|
||||
/* Incrementally extend the keyword set to include the given string.
|
||||
Return NULL for success, or an error message. Remember an index
|
||||
number for each keyword included in the set. */
|
||||
extern char *kwsincr(kwset_t, char *, size_t);
|
||||
|
||||
/* When the keyword set has been completely built, prepare it for
|
||||
use. Return NULL for success, or an error message. */
|
||||
extern char *kwsprep(kwset_t);
|
||||
|
||||
/* Search through the given buffer for a member of the keyword set.
|
||||
Return a pointer to the leftmost longest match found, or NULL if
|
||||
no match is found. If foundlen is non-NULL, store the length of
|
||||
the matching substring in the integer it points to. Similarly,
|
||||
if foundindex is non-NULL, store the index of the particular
|
||||
keyword found therein. */
|
||||
extern char *kwsexec(kwset_t, char *, size_t, struct kwsmatch *);
|
||||
|
||||
/* Deallocate the given keyword set and all its associated storage. */
|
||||
extern void kwsfree(kwset_t);
|
||||
|
||||
#else
|
||||
|
||||
typedef char *kwset_t;
|
||||
|
||||
extern kwset_t kwsalloc();
|
||||
extern char *kwsincr();
|
||||
extern char *kwsprep();
|
||||
extern char *kwsexec();
|
||||
extern void kwsfree();
|
||||
|
||||
#endif
|
454
gnu/usr.bin/grep/obstack.c
Normal file
454
gnu/usr.bin/grep/obstack.c
Normal file
@ -0,0 +1,454 @@
|
||||
/* obstack.c - subroutines used implicitly by object stack macros
|
||||
Copyright (C) 1988, 1993 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#include "obstack.h"
|
||||
|
||||
/* This is just to get __GNU_LIBRARY__ defined. */
|
||||
#include <stdio.h>
|
||||
|
||||
/* Comment out all this code if we are using the GNU C Library, and are not
|
||||
actually compiling the library itself. This code is part of the GNU C
|
||||
Library, but also included in many other GNU distributions. Compiling
|
||||
and linking in this code is a waste when using the GNU C library
|
||||
(especially if it is a shared library). Rather than having every GNU
|
||||
program understand `configure --with-gnu-libc' and omit the object files,
|
||||
it is simpler to just do this in the source for each such file. */
|
||||
|
||||
#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
|
||||
|
||||
|
||||
#ifdef __STDC__
|
||||
#define POINTER void *
|
||||
#else
|
||||
#define POINTER char *
|
||||
#endif
|
||||
|
||||
/* Determine default alignment. */
|
||||
struct fooalign {char x; double d;};
|
||||
#define DEFAULT_ALIGNMENT \
|
||||
((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0))
|
||||
/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
|
||||
But in fact it might be less smart and round addresses to as much as
|
||||
DEFAULT_ROUNDING. So we prepare for it to do that. */
|
||||
union fooround {long x; double d;};
|
||||
#define DEFAULT_ROUNDING (sizeof (union fooround))
|
||||
|
||||
/* When we copy a long block of data, this is the unit to do it with.
|
||||
On some machines, copying successive ints does not work;
|
||||
in such a case, redefine COPYING_UNIT to `long' (if that works)
|
||||
or `char' as a last resort. */
|
||||
#ifndef COPYING_UNIT
|
||||
#define COPYING_UNIT int
|
||||
#endif
|
||||
|
||||
/* The non-GNU-C macros copy the obstack into this global variable
|
||||
to avoid multiple evaluation. */
|
||||
|
||||
struct obstack *_obstack;
|
||||
|
||||
/* Define a macro that either calls functions with the traditional malloc/free
|
||||
calling interface, or calls functions with the mmalloc/mfree interface
|
||||
(that adds an extra first argument), based on the state of use_extra_arg.
|
||||
For free, do not use ?:, since some compilers, like the MIPS compilers,
|
||||
do not allow (expr) ? void : void. */
|
||||
|
||||
#define CALL_CHUNKFUN(h, size) \
|
||||
(((h) -> use_extra_arg) \
|
||||
? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
|
||||
: (*(h)->chunkfun) ((size)))
|
||||
|
||||
#define CALL_FREEFUN(h, old_chunk) \
|
||||
do { \
|
||||
if ((h) -> use_extra_arg) \
|
||||
(*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
|
||||
else \
|
||||
(*(h)->freefun) ((old_chunk)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
|
||||
Objects start on multiples of ALIGNMENT (0 means use default).
|
||||
CHUNKFUN is the function to use to allocate chunks,
|
||||
and FREEFUN the function to free them. */
|
||||
|
||||
void
|
||||
_obstack_begin (h, size, alignment, chunkfun, freefun)
|
||||
struct obstack *h;
|
||||
int size;
|
||||
int alignment;
|
||||
POINTER (*chunkfun) ();
|
||||
void (*freefun) ();
|
||||
{
|
||||
register struct _obstack_chunk* chunk; /* points to new chunk */
|
||||
|
||||
if (alignment == 0)
|
||||
alignment = DEFAULT_ALIGNMENT;
|
||||
if (size == 0)
|
||||
/* Default size is what GNU malloc can fit in a 4096-byte block. */
|
||||
{
|
||||
/* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
|
||||
Use the values for range checking, because if range checking is off,
|
||||
the extra bytes won't be missed terribly, but if range checking is on
|
||||
and we used a larger request, a whole extra 4096 bytes would be
|
||||
allocated.
|
||||
|
||||
These number are irrelevant to the new GNU malloc. I suspect it is
|
||||
less sensitive to the size of the request. */
|
||||
int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
|
||||
+ 4 + DEFAULT_ROUNDING - 1)
|
||||
& ~(DEFAULT_ROUNDING - 1));
|
||||
size = 4096 - extra;
|
||||
}
|
||||
|
||||
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
|
||||
h->freefun = freefun;
|
||||
h->chunk_size = size;
|
||||
h->alignment_mask = alignment - 1;
|
||||
h->use_extra_arg = 0;
|
||||
|
||||
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
|
||||
h->next_free = h->object_base = chunk->contents;
|
||||
h->chunk_limit = chunk->limit
|
||||
= (char *) chunk + h->chunk_size;
|
||||
chunk->prev = 0;
|
||||
/* The initial chunk now contains no empty object. */
|
||||
h->maybe_empty_object = 0;
|
||||
}
|
||||
|
||||
void
|
||||
_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
|
||||
struct obstack *h;
|
||||
int size;
|
||||
int alignment;
|
||||
POINTER (*chunkfun) ();
|
||||
void (*freefun) ();
|
||||
POINTER arg;
|
||||
{
|
||||
register struct _obstack_chunk* chunk; /* points to new chunk */
|
||||
|
||||
if (alignment == 0)
|
||||
alignment = DEFAULT_ALIGNMENT;
|
||||
if (size == 0)
|
||||
/* Default size is what GNU malloc can fit in a 4096-byte block. */
|
||||
{
|
||||
/* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
|
||||
Use the values for range checking, because if range checking is off,
|
||||
the extra bytes won't be missed terribly, but if range checking is on
|
||||
and we used a larger request, a whole extra 4096 bytes would be
|
||||
allocated.
|
||||
|
||||
These number are irrelevant to the new GNU malloc. I suspect it is
|
||||
less sensitive to the size of the request. */
|
||||
int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
|
||||
+ 4 + DEFAULT_ROUNDING - 1)
|
||||
& ~(DEFAULT_ROUNDING - 1));
|
||||
size = 4096 - extra;
|
||||
}
|
||||
|
||||
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
|
||||
h->freefun = freefun;
|
||||
h->chunk_size = size;
|
||||
h->alignment_mask = alignment - 1;
|
||||
h->extra_arg = arg;
|
||||
h->use_extra_arg = 1;
|
||||
|
||||
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
|
||||
h->next_free = h->object_base = chunk->contents;
|
||||
h->chunk_limit = chunk->limit
|
||||
= (char *) chunk + h->chunk_size;
|
||||
chunk->prev = 0;
|
||||
/* The initial chunk now contains no empty object. */
|
||||
h->maybe_empty_object = 0;
|
||||
}
|
||||
|
||||
/* Allocate a new current chunk for the obstack *H
|
||||
on the assumption that LENGTH bytes need to be added
|
||||
to the current object, or a new object of length LENGTH allocated.
|
||||
Copies any partial object from the end of the old chunk
|
||||
to the beginning of the new one. */
|
||||
|
||||
void
|
||||
_obstack_newchunk (h, length)
|
||||
struct obstack *h;
|
||||
int length;
|
||||
{
|
||||
register struct _obstack_chunk* old_chunk = h->chunk;
|
||||
register struct _obstack_chunk* new_chunk;
|
||||
register long new_size;
|
||||
register int obj_size = h->next_free - h->object_base;
|
||||
register int i;
|
||||
int already;
|
||||
|
||||
/* Compute size for new chunk. */
|
||||
new_size = (obj_size + length) + (obj_size >> 3) + 100;
|
||||
if (new_size < h->chunk_size)
|
||||
new_size = h->chunk_size;
|
||||
|
||||
/* Allocate and initialize the new chunk. */
|
||||
new_chunk = h->chunk = CALL_CHUNKFUN (h, new_size);
|
||||
new_chunk->prev = old_chunk;
|
||||
new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
|
||||
|
||||
/* Move the existing object to the new chunk.
|
||||
Word at a time is fast and is safe if the object
|
||||
is sufficiently aligned. */
|
||||
if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
|
||||
{
|
||||
for (i = obj_size / sizeof (COPYING_UNIT) - 1;
|
||||
i >= 0; i--)
|
||||
((COPYING_UNIT *)new_chunk->contents)[i]
|
||||
= ((COPYING_UNIT *)h->object_base)[i];
|
||||
/* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
|
||||
but that can cross a page boundary on a machine
|
||||
which does not do strict alignment for COPYING_UNITS. */
|
||||
already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
|
||||
}
|
||||
else
|
||||
already = 0;
|
||||
/* Copy remaining bytes one by one. */
|
||||
for (i = already; i < obj_size; i++)
|
||||
new_chunk->contents[i] = h->object_base[i];
|
||||
|
||||
/* If the object just copied was the only data in OLD_CHUNK,
|
||||
free that chunk and remove it from the chain.
|
||||
But not if that chunk might contain an empty object. */
|
||||
if (h->object_base == old_chunk->contents && ! h->maybe_empty_object)
|
||||
{
|
||||
new_chunk->prev = old_chunk->prev;
|
||||
CALL_FREEFUN (h, old_chunk);
|
||||
}
|
||||
|
||||
h->object_base = new_chunk->contents;
|
||||
h->next_free = h->object_base + obj_size;
|
||||
/* The new chunk certainly contains no empty object yet. */
|
||||
h->maybe_empty_object = 0;
|
||||
}
|
||||
|
||||
/* Return nonzero if object OBJ has been allocated from obstack H.
|
||||
This is here for debugging.
|
||||
If you use it in a program, you are probably losing. */
|
||||
|
||||
int
|
||||
_obstack_allocated_p (h, obj)
|
||||
struct obstack *h;
|
||||
POINTER obj;
|
||||
{
|
||||
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
|
||||
register struct _obstack_chunk* plp; /* point to previous chunk if any */
|
||||
|
||||
lp = (h)->chunk;
|
||||
/* We use >= rather than > since the object cannot be exactly at
|
||||
the beginning of the chunk but might be an empty object exactly
|
||||
at the end of an adjacent chunk. */
|
||||
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
|
||||
{
|
||||
plp = lp->prev;
|
||||
lp = plp;
|
||||
}
|
||||
return lp != 0;
|
||||
}
|
||||
|
||||
/* Free objects in obstack H, including OBJ and everything allocate
|
||||
more recently than OBJ. If OBJ is zero, free everything in H. */
|
||||
|
||||
#undef obstack_free
|
||||
|
||||
/* This function has two names with identical definitions.
|
||||
This is the first one, called from non-ANSI code. */
|
||||
|
||||
void
|
||||
_obstack_free (h, obj)
|
||||
struct obstack *h;
|
||||
POINTER obj;
|
||||
{
|
||||
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
|
||||
register struct _obstack_chunk* plp; /* point to previous chunk if any */
|
||||
|
||||
lp = h->chunk;
|
||||
/* We use >= because there cannot be an object at the beginning of a chunk.
|
||||
But there can be an empty object at that address
|
||||
at the end of another chunk. */
|
||||
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
|
||||
{
|
||||
plp = lp->prev;
|
||||
CALL_FREEFUN (h, lp);
|
||||
lp = plp;
|
||||
/* If we switch chunks, we can't tell whether the new current
|
||||
chunk contains an empty object, so assume that it may. */
|
||||
h->maybe_empty_object = 1;
|
||||
}
|
||||
if (lp)
|
||||
{
|
||||
h->object_base = h->next_free = (char *)(obj);
|
||||
h->chunk_limit = lp->limit;
|
||||
h->chunk = lp;
|
||||
}
|
||||
else if (obj != 0)
|
||||
/* obj is not in any of the chunks! */
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* This function is used from ANSI code. */
|
||||
|
||||
void
|
||||
obstack_free (h, obj)
|
||||
struct obstack *h;
|
||||
POINTER obj;
|
||||
{
|
||||
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
|
||||
register struct _obstack_chunk* plp; /* point to previous chunk if any */
|
||||
|
||||
lp = h->chunk;
|
||||
/* We use >= because there cannot be an object at the beginning of a chunk.
|
||||
But there can be an empty object at that address
|
||||
at the end of another chunk. */
|
||||
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
|
||||
{
|
||||
plp = lp->prev;
|
||||
CALL_FREEFUN (h, lp);
|
||||
lp = plp;
|
||||
/* If we switch chunks, we can't tell whether the new current
|
||||
chunk contains an empty object, so assume that it may. */
|
||||
h->maybe_empty_object = 1;
|
||||
}
|
||||
if (lp)
|
||||
{
|
||||
h->object_base = h->next_free = (char *)(obj);
|
||||
h->chunk_limit = lp->limit;
|
||||
h->chunk = lp;
|
||||
}
|
||||
else if (obj != 0)
|
||||
/* obj is not in any of the chunks! */
|
||||
abort ();
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* These are now turned off because the applications do not use it
|
||||
and it uses bcopy via obstack_grow, which causes trouble on sysV. */
|
||||
|
||||
/* Now define the functional versions of the obstack macros.
|
||||
Define them to simply use the corresponding macros to do the job. */
|
||||
|
||||
#ifdef __STDC__
|
||||
/* These function definitions do not work with non-ANSI preprocessors;
|
||||
they won't pass through the macro names in parentheses. */
|
||||
|
||||
/* The function names appear in parentheses in order to prevent
|
||||
the macro-definitions of the names from being expanded there. */
|
||||
|
||||
POINTER (obstack_base) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_base (obstack);
|
||||
}
|
||||
|
||||
POINTER (obstack_next_free) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_next_free (obstack);
|
||||
}
|
||||
|
||||
int (obstack_object_size) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_object_size (obstack);
|
||||
}
|
||||
|
||||
int (obstack_room) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_room (obstack);
|
||||
}
|
||||
|
||||
void (obstack_grow) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
obstack_grow (obstack, pointer, length);
|
||||
}
|
||||
|
||||
void (obstack_grow0) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
obstack_grow0 (obstack, pointer, length);
|
||||
}
|
||||
|
||||
void (obstack_1grow) (obstack, character)
|
||||
struct obstack *obstack;
|
||||
int character;
|
||||
{
|
||||
obstack_1grow (obstack, character);
|
||||
}
|
||||
|
||||
void (obstack_blank) (obstack, length)
|
||||
struct obstack *obstack;
|
||||
int length;
|
||||
{
|
||||
obstack_blank (obstack, length);
|
||||
}
|
||||
|
||||
void (obstack_1grow_fast) (obstack, character)
|
||||
struct obstack *obstack;
|
||||
int character;
|
||||
{
|
||||
obstack_1grow_fast (obstack, character);
|
||||
}
|
||||
|
||||
void (obstack_blank_fast) (obstack, length)
|
||||
struct obstack *obstack;
|
||||
int length;
|
||||
{
|
||||
obstack_blank_fast (obstack, length);
|
||||
}
|
||||
|
||||
POINTER (obstack_finish) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_finish (obstack);
|
||||
}
|
||||
|
||||
POINTER (obstack_alloc) (obstack, length)
|
||||
struct obstack *obstack;
|
||||
int length;
|
||||
{
|
||||
return obstack_alloc (obstack, length);
|
||||
}
|
||||
|
||||
POINTER (obstack_copy) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
return obstack_copy (obstack, pointer, length);
|
||||
}
|
||||
|
||||
POINTER (obstack_copy0) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
return obstack_copy0 (obstack, pointer, length);
|
||||
}
|
||||
|
||||
#endif /* __STDC__ */
|
||||
|
||||
#endif /* 0 */
|
||||
|
||||
#endif /* _LIBC or not __GNU_LIBRARY__. */
|
484
gnu/usr.bin/grep/obstack.h
Normal file
484
gnu/usr.bin/grep/obstack.h
Normal file
@ -0,0 +1,484 @@
|
||||
/* obstack.h - object stack macros
|
||||
Copyright (C) 1988, 1992 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/* Summary:
|
||||
|
||||
All the apparent functions defined here are macros. The idea
|
||||
is that you would use these pre-tested macros to solve a
|
||||
very specific set of problems, and they would run fast.
|
||||
Caution: no side-effects in arguments please!! They may be
|
||||
evaluated MANY times!!
|
||||
|
||||
These macros operate a stack of objects. Each object starts life
|
||||
small, and may grow to maturity. (Consider building a word syllable
|
||||
by syllable.) An object can move while it is growing. Once it has
|
||||
been "finished" it never changes address again. So the "top of the
|
||||
stack" is typically an immature growing object, while the rest of the
|
||||
stack is of mature, fixed size and fixed address objects.
|
||||
|
||||
These routines grab large chunks of memory, using a function you
|
||||
supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
|
||||
by calling `obstack_chunk_free'. You must define them and declare
|
||||
them before using any obstack macros.
|
||||
|
||||
Each independent stack is represented by a `struct obstack'.
|
||||
Each of the obstack macros expects a pointer to such a structure
|
||||
as the first argument.
|
||||
|
||||
One motivation for this package is the problem of growing char strings
|
||||
in symbol tables. Unless you are "fascist pig with a read-only mind"
|
||||
--Gosper's immortal quote from HAKMEM item 154, out of context--you
|
||||
would not like to put any arbitrary upper limit on the length of your
|
||||
symbols.
|
||||
|
||||
In practice this often means you will build many short symbols and a
|
||||
few long symbols. At the time you are reading a symbol you don't know
|
||||
how long it is. One traditional method is to read a symbol into a
|
||||
buffer, realloc()ating the buffer every time you try to read a symbol
|
||||
that is longer than the buffer. This is beaut, but you still will
|
||||
want to copy the symbol from the buffer to a more permanent
|
||||
symbol-table entry say about half the time.
|
||||
|
||||
With obstacks, you can work differently. Use one obstack for all symbol
|
||||
names. As you read a symbol, grow the name in the obstack gradually.
|
||||
When the name is complete, finalize it. Then, if the symbol exists already,
|
||||
free the newly read name.
|
||||
|
||||
The way we do this is to take a large chunk, allocating memory from
|
||||
low addresses. When you want to build a symbol in the chunk you just
|
||||
add chars above the current "high water mark" in the chunk. When you
|
||||
have finished adding chars, because you got to the end of the symbol,
|
||||
you know how long the chars are, and you can create a new object.
|
||||
Mostly the chars will not burst over the highest address of the chunk,
|
||||
because you would typically expect a chunk to be (say) 100 times as
|
||||
long as an average object.
|
||||
|
||||
In case that isn't clear, when we have enough chars to make up
|
||||
the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
|
||||
so we just point to it where it lies. No moving of chars is
|
||||
needed and this is the second win: potentially long strings need
|
||||
never be explicitly shuffled. Once an object is formed, it does not
|
||||
change its address during its lifetime.
|
||||
|
||||
When the chars burst over a chunk boundary, we allocate a larger
|
||||
chunk, and then copy the partly formed object from the end of the old
|
||||
chunk to the beginning of the new larger chunk. We then carry on
|
||||
accreting characters to the end of the object as we normally would.
|
||||
|
||||
A special macro is provided to add a single char at a time to a
|
||||
growing object. This allows the use of register variables, which
|
||||
break the ordinary 'growth' macro.
|
||||
|
||||
Summary:
|
||||
We allocate large chunks.
|
||||
We carve out one object at a time from the current chunk.
|
||||
Once carved, an object never moves.
|
||||
We are free to append data of any size to the currently
|
||||
growing object.
|
||||
Exactly one object is growing in an obstack at any one time.
|
||||
You can run one obstack per control block.
|
||||
You may have as many control blocks as you dare.
|
||||
Because of the way we do it, you can `unwind' an obstack
|
||||
back to a previous state. (You may remove objects much
|
||||
as you would with a stack.)
|
||||
*/
|
||||
|
||||
|
||||
/* Don't do the contents of this file more than once. */
|
||||
|
||||
#ifndef __OBSTACKS__
|
||||
#define __OBSTACKS__
|
||||
|
||||
/* We use subtraction of (char *)0 instead of casting to int
|
||||
because on word-addressable machines a simple cast to int
|
||||
may ignore the byte-within-word field of the pointer. */
|
||||
|
||||
#ifndef __PTR_TO_INT
|
||||
#define __PTR_TO_INT(P) ((P) - (char *)0)
|
||||
#endif
|
||||
|
||||
#ifndef __INT_TO_PTR
|
||||
#define __INT_TO_PTR(P) ((P) + (char *)0)
|
||||
#endif
|
||||
|
||||
/* We need the type of the resulting object. In ANSI C it is ptrdiff_t
|
||||
but in traditional C it is usually long. If we are in ANSI C and
|
||||
don't already have ptrdiff_t get it. */
|
||||
|
||||
#if defined (__STDC__) && ! defined (offsetof)
|
||||
#if defined (__GNUC__) && defined (IN_GCC)
|
||||
/* On Next machine, the system's stddef.h screws up if included
|
||||
after we have defined just ptrdiff_t, so include all of gstddef.h.
|
||||
Otherwise, define just ptrdiff_t, which is all we need. */
|
||||
#ifndef __NeXT__
|
||||
#define __need_ptrdiff_t
|
||||
#endif
|
||||
|
||||
/* While building GCC, the stddef.h that goes with GCC has this name. */
|
||||
#include "gstddef.h"
|
||||
#else
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
#define PTR_INT_TYPE ptrdiff_t
|
||||
#else
|
||||
#define PTR_INT_TYPE long
|
||||
#endif
|
||||
|
||||
struct _obstack_chunk /* Lives at front of each chunk. */
|
||||
{
|
||||
char *limit; /* 1 past end of this chunk */
|
||||
struct _obstack_chunk *prev; /* address of prior chunk or NULL */
|
||||
char contents[4]; /* objects begin here */
|
||||
};
|
||||
|
||||
struct obstack /* control current object in current chunk */
|
||||
{
|
||||
long chunk_size; /* preferred size to allocate chunks in */
|
||||
struct _obstack_chunk* chunk; /* address of current struct obstack_chunk */
|
||||
char *object_base; /* address of object we are building */
|
||||
char *next_free; /* where to add next char to current object */
|
||||
char *chunk_limit; /* address of char after current chunk */
|
||||
PTR_INT_TYPE temp; /* Temporary for some macros. */
|
||||
int alignment_mask; /* Mask of alignment for each object. */
|
||||
struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
|
||||
void (*freefun) (); /* User's function to free a chunk. */
|
||||
char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
|
||||
unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
|
||||
unsigned maybe_empty_object:1;/* There is a possibility that the current
|
||||
chunk contains a zero-length object. This
|
||||
prevents freeing the chunk if we allocate
|
||||
a bigger chunk to replace it. */
|
||||
};
|
||||
|
||||
/* Declare the external functions we use; they are in obstack.c. */
|
||||
|
||||
#ifdef __STDC__
|
||||
extern void _obstack_newchunk (struct obstack *, int);
|
||||
extern void _obstack_free (struct obstack *, void *);
|
||||
extern void _obstack_begin (struct obstack *, int, int,
|
||||
void *(*) (), void (*) ());
|
||||
extern void _obstack_begin_1 (struct obstack *, int, int,
|
||||
void *(*) (), void (*) (), void *);
|
||||
#else
|
||||
extern void _obstack_newchunk ();
|
||||
extern void _obstack_free ();
|
||||
extern void _obstack_begin ();
|
||||
extern void _obstack_begin_1 ();
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
/* Do the function-declarations after the structs
|
||||
but before defining the macros. */
|
||||
|
||||
void obstack_init (struct obstack *obstack);
|
||||
|
||||
void * obstack_alloc (struct obstack *obstack, int size);
|
||||
|
||||
void * obstack_copy (struct obstack *obstack, void *address, int size);
|
||||
void * obstack_copy0 (struct obstack *obstack, void *address, int size);
|
||||
|
||||
void obstack_free (struct obstack *obstack, void *block);
|
||||
|
||||
void obstack_blank (struct obstack *obstack, int size);
|
||||
|
||||
void obstack_grow (struct obstack *obstack, void *data, int size);
|
||||
void obstack_grow0 (struct obstack *obstack, void *data, int size);
|
||||
|
||||
void obstack_1grow (struct obstack *obstack, int data_char);
|
||||
void obstack_ptr_grow (struct obstack *obstack, void *data);
|
||||
void obstack_int_grow (struct obstack *obstack, int data);
|
||||
|
||||
void * obstack_finish (struct obstack *obstack);
|
||||
|
||||
int obstack_object_size (struct obstack *obstack);
|
||||
|
||||
int obstack_room (struct obstack *obstack);
|
||||
void obstack_1grow_fast (struct obstack *obstack, int data_char);
|
||||
void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
|
||||
void obstack_int_grow_fast (struct obstack *obstack, int data);
|
||||
void obstack_blank_fast (struct obstack *obstack, int size);
|
||||
|
||||
void * obstack_base (struct obstack *obstack);
|
||||
void * obstack_next_free (struct obstack *obstack);
|
||||
int obstack_alignment_mask (struct obstack *obstack);
|
||||
int obstack_chunk_size (struct obstack *obstack);
|
||||
|
||||
#endif /* __STDC__ */
|
||||
|
||||
/* Non-ANSI C cannot really support alternative functions for these macros,
|
||||
so we do not declare them. */
|
||||
|
||||
/* Pointer to beginning of object being allocated or to be allocated next.
|
||||
Note that this might not be the final address of the object
|
||||
because a new chunk might be needed to hold the final size. */
|
||||
|
||||
#define obstack_base(h) ((h)->object_base)
|
||||
|
||||
/* Size for allocating ordinary chunks. */
|
||||
|
||||
#define obstack_chunk_size(h) ((h)->chunk_size)
|
||||
|
||||
/* Pointer to next byte not yet allocated in current chunk. */
|
||||
|
||||
#define obstack_next_free(h) ((h)->next_free)
|
||||
|
||||
/* Mask specifying low bits that should be clear in address of an object. */
|
||||
|
||||
#define obstack_alignment_mask(h) ((h)->alignment_mask)
|
||||
|
||||
#define obstack_init(h) \
|
||||
_obstack_begin ((h), 0, 0, \
|
||||
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
|
||||
|
||||
#define obstack_begin(h, size) \
|
||||
_obstack_begin ((h), (size), 0, \
|
||||
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
|
||||
|
||||
#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
|
||||
_obstack_begin ((h), (size), (alignment), \
|
||||
(void *(*) ()) (chunkfun), (void (*) ()) (freefun))
|
||||
|
||||
#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
|
||||
_obstack_begin_1 ((h), (size), (alignment), \
|
||||
(void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
|
||||
|
||||
#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
|
||||
|
||||
#define obstack_blank_fast(h,n) ((h)->next_free += (n))
|
||||
|
||||
#if defined (__GNUC__) && defined (__STDC__)
|
||||
#if __GNUC__ < 2 || defined(NeXT)
|
||||
#define __extension__
|
||||
#endif
|
||||
|
||||
/* For GNU C, if not -traditional,
|
||||
we can define these macros to compute all args only once
|
||||
without using a global variable.
|
||||
Also, we can avoid using the `temp' slot, to make faster code. */
|
||||
|
||||
#define obstack_object_size(OBSTACK) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
(unsigned) (__o->next_free - __o->object_base); })
|
||||
|
||||
#define obstack_room(OBSTACK) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
(unsigned) (__o->chunk_limit - __o->next_free); })
|
||||
|
||||
/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
|
||||
so that we can avoid having void expressions
|
||||
in the arms of the conditional expression.
|
||||
Casting the third operand to void was tried before,
|
||||
but some compilers won't accept it. */
|
||||
#define obstack_grow(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
int __len = (length); \
|
||||
((__o->next_free + __len > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, __len), 0) : 0); \
|
||||
bcopy (where, __o->next_free, __len); \
|
||||
__o->next_free += __len; \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_grow0(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
int __len = (length); \
|
||||
((__o->next_free + __len + 1 > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, __len + 1), 0) : 0), \
|
||||
bcopy (where, __o->next_free, __len), \
|
||||
__o->next_free += __len, \
|
||||
*(__o->next_free)++ = 0; \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_1grow(OBSTACK,datum) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
((__o->next_free + 1 > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, 1), 0) : 0), \
|
||||
*(__o->next_free)++ = (datum); \
|
||||
(void) 0; })
|
||||
|
||||
/* These assume that the obstack alignment is good enough for pointers or ints,
|
||||
and that the data added so far to the current object
|
||||
shares that much alignment. */
|
||||
|
||||
#define obstack_ptr_grow(OBSTACK,datum) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
((__o->next_free + sizeof (void *) > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, sizeof (void *)), 0) : 0), \
|
||||
*((void **)__o->next_free)++ = ((void *)datum); \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_int_grow(OBSTACK,datum) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
((__o->next_free + sizeof (int) > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, sizeof (int)), 0) : 0), \
|
||||
*((int *)__o->next_free)++ = ((int)datum); \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr)
|
||||
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
|
||||
|
||||
#define obstack_blank(OBSTACK,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
int __len = (length); \
|
||||
((__o->chunk_limit - __o->next_free < __len) \
|
||||
? (_obstack_newchunk (__o, __len), 0) : 0); \
|
||||
__o->next_free += __len; \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_alloc(OBSTACK,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__h = (OBSTACK); \
|
||||
obstack_blank (__h, (length)); \
|
||||
obstack_finish (__h); })
|
||||
|
||||
#define obstack_copy(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__h = (OBSTACK); \
|
||||
obstack_grow (__h, (where), (length)); \
|
||||
obstack_finish (__h); })
|
||||
|
||||
#define obstack_copy0(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__h = (OBSTACK); \
|
||||
obstack_grow0 (__h, (where), (length)); \
|
||||
obstack_finish (__h); })
|
||||
|
||||
/* The local variable is named __o1 to avoid a name conflict
|
||||
when obstack_blank is called. */
|
||||
#define obstack_finish(OBSTACK) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o1 = (OBSTACK); \
|
||||
void *value = (void *) __o1->object_base; \
|
||||
if (__o1->next_free == value) \
|
||||
__o1->maybe_empty_object = 1; \
|
||||
__o1->next_free \
|
||||
= __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
|
||||
& ~ (__o1->alignment_mask)); \
|
||||
((__o1->next_free - (char *)__o1->chunk \
|
||||
> __o1->chunk_limit - (char *)__o1->chunk) \
|
||||
? (__o1->next_free = __o1->chunk_limit) : 0); \
|
||||
__o1->object_base = __o1->next_free; \
|
||||
value; })
|
||||
|
||||
#define obstack_free(OBSTACK, OBJ) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
void *__obj = (OBJ); \
|
||||
if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
|
||||
__o->next_free = __o->object_base = __obj; \
|
||||
else (obstack_free) (__o, __obj); })
|
||||
|
||||
#else /* not __GNUC__ or not __STDC__ */
|
||||
|
||||
#define obstack_object_size(h) \
|
||||
(unsigned) ((h)->next_free - (h)->object_base)
|
||||
|
||||
#define obstack_room(h) \
|
||||
(unsigned) ((h)->chunk_limit - (h)->next_free)
|
||||
|
||||
#define obstack_grow(h,where,length) \
|
||||
( (h)->temp = (length), \
|
||||
(((h)->next_free + (h)->temp > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
|
||||
bcopy (where, (h)->next_free, (h)->temp), \
|
||||
(h)->next_free += (h)->temp)
|
||||
|
||||
#define obstack_grow0(h,where,length) \
|
||||
( (h)->temp = (length), \
|
||||
(((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
|
||||
bcopy (where, (h)->next_free, (h)->temp), \
|
||||
(h)->next_free += (h)->temp, \
|
||||
*((h)->next_free)++ = 0)
|
||||
|
||||
#define obstack_1grow(h,datum) \
|
||||
( (((h)->next_free + 1 > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), 1), 0) : 0), \
|
||||
*((h)->next_free)++ = (datum))
|
||||
|
||||
#define obstack_ptr_grow(h,datum) \
|
||||
( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
|
||||
*((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum))
|
||||
|
||||
#define obstack_int_grow(h,datum) \
|
||||
( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
|
||||
*((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum))
|
||||
|
||||
#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr)
|
||||
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
|
||||
|
||||
#define obstack_blank(h,length) \
|
||||
( (h)->temp = (length), \
|
||||
(((h)->chunk_limit - (h)->next_free < (h)->temp) \
|
||||
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
|
||||
(h)->next_free += (h)->temp)
|
||||
|
||||
#define obstack_alloc(h,length) \
|
||||
(obstack_blank ((h), (length)), obstack_finish ((h)))
|
||||
|
||||
#define obstack_copy(h,where,length) \
|
||||
(obstack_grow ((h), (where), (length)), obstack_finish ((h)))
|
||||
|
||||
#define obstack_copy0(h,where,length) \
|
||||
(obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
|
||||
|
||||
#define obstack_finish(h) \
|
||||
( ((h)->next_free == (h)->object_base \
|
||||
? (((h)->maybe_empty_object = 1), 0) \
|
||||
: 0), \
|
||||
(h)->temp = __PTR_TO_INT ((h)->object_base), \
|
||||
(h)->next_free \
|
||||
= __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
|
||||
& ~ ((h)->alignment_mask)), \
|
||||
(((h)->next_free - (char *)(h)->chunk \
|
||||
> (h)->chunk_limit - (char *)(h)->chunk) \
|
||||
? ((h)->next_free = (h)->chunk_limit) : 0), \
|
||||
(h)->object_base = (h)->next_free, \
|
||||
__INT_TO_PTR ((h)->temp))
|
||||
|
||||
#ifdef __STDC__
|
||||
#define obstack_free(h,obj) \
|
||||
( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
|
||||
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
|
||||
? (int) ((h)->next_free = (h)->object_base \
|
||||
= (h)->temp + (char *) (h)->chunk) \
|
||||
: (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
|
||||
#else
|
||||
#define obstack_free(h,obj) \
|
||||
( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
|
||||
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
|
||||
? (int) ((h)->next_free = (h)->object_base \
|
||||
= (h)->temp + (char *) (h)->chunk) \
|
||||
: (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0)))
|
||||
#endif
|
||||
|
||||
#endif /* not __GNUC__ or not __STDC__ */
|
||||
|
||||
#endif /* not __OBSTACKS__ */
|
481
gnu/usr.bin/grep/search.c
Normal file
481
gnu/usr.bin/grep/search.c
Normal file
@ -0,0 +1,481 @@
|
||||
/* search.c - searching subroutines using dfa, kwset and regex for grep.
|
||||
Copyright (C) 1992 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
Written August 1992 by Mike Haertel. */
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#else
|
||||
#define UCHAR_MAX 255
|
||||
#include <sys/types.h>
|
||||
extern char *malloc();
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMCHR
|
||||
#include <string.h>
|
||||
#ifdef NEED_MEMORY_H
|
||||
#include <memory.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef __STDC__
|
||||
extern void *memchr();
|
||||
#else
|
||||
extern char *memchr();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
|
||||
#undef bcopy
|
||||
#define bcopy(s, d, n) memcpy((d), (s), (n))
|
||||
#endif
|
||||
|
||||
#if defined(isascii) && !defined(__FreeBSD__)
|
||||
#define ISALNUM(C) (isascii(C) && isalnum(C))
|
||||
#define ISUPPER(C) (isascii(C) && isupper(C))
|
||||
#else
|
||||
#define ISALNUM(C) isalnum((unsigned char)C)
|
||||
#define ISUPPER(C) isupper((unsigned char)C)
|
||||
#endif
|
||||
|
||||
#define TOLOWER(C) (ISUPPER(C) ? tolower((unsigned char)C) : (C))
|
||||
|
||||
#include "grep.h"
|
||||
#include "dfa.h"
|
||||
#include "kwset.h"
|
||||
#include "gnuregex.h"
|
||||
|
||||
#define NCHAR (UCHAR_MAX + 1)
|
||||
|
||||
#if __STDC__
|
||||
static void Gcompile(char *, size_t);
|
||||
static void Ecompile(char *, size_t);
|
||||
static char *EGexecute(char *, size_t, char **);
|
||||
static void Fcompile(char *, size_t);
|
||||
static char *Fexecute(char *, size_t, char **);
|
||||
#else
|
||||
static void Gcompile();
|
||||
static void Ecompile();
|
||||
static char *EGexecute();
|
||||
static void Fcompile();
|
||||
static char *Fexecute();
|
||||
#endif
|
||||
|
||||
/* Here is the matchers vector for the main program. */
|
||||
struct matcher matchers[] = {
|
||||
{ "default", Gcompile, EGexecute },
|
||||
{ "grep", Gcompile, EGexecute },
|
||||
{ "ggrep", Gcompile, EGexecute },
|
||||
{ "egrep", Ecompile, EGexecute },
|
||||
{ "posix-egrep", Ecompile, EGexecute },
|
||||
{ "gegrep", Ecompile, EGexecute },
|
||||
{ "fgrep", Fcompile, Fexecute },
|
||||
{ "gfgrep", Fcompile, Fexecute },
|
||||
{ 0, 0, 0 },
|
||||
};
|
||||
|
||||
/* For -w, we also consider _ to be word constituent. */
|
||||
#define WCHAR(C) (ISALNUM(C) || (C) == '_')
|
||||
|
||||
/* DFA compiled regexp. */
|
||||
static struct dfa dfa;
|
||||
|
||||
/* Regex compiled regexp. */
|
||||
static struct re_pattern_buffer regex;
|
||||
|
||||
/* KWset compiled pattern. For Ecompile and Gcompile, we compile
|
||||
a list of strings, at least one of which is known to occur in
|
||||
any string matching the regexp. */
|
||||
static kwset_t kwset;
|
||||
|
||||
/* Last compiled fixed string known to exactly match the regexp.
|
||||
If kwsexec() returns < lastexact, then we don't need to
|
||||
call the regexp matcher at all. */
|
||||
static int lastexact;
|
||||
|
||||
void
|
||||
dfaerror(mesg)
|
||||
char *mesg;
|
||||
{
|
||||
fatal(mesg, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
kwsinit()
|
||||
{
|
||||
static char trans[NCHAR];
|
||||
int i;
|
||||
|
||||
if (match_icase)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
trans[i] = TOLOWER(i);
|
||||
|
||||
if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
|
||||
fatal("memory exhausted", 0);
|
||||
}
|
||||
|
||||
/* If the DFA turns out to have some set of fixed strings one of
|
||||
which must occur in the match, then we build a kwset matcher
|
||||
to find those strings, and thus quickly filter out impossible
|
||||
matches. */
|
||||
static void
|
||||
kwsmusts()
|
||||
{
|
||||
struct dfamust *dm;
|
||||
char *err;
|
||||
|
||||
if (dfa.musts)
|
||||
{
|
||||
kwsinit();
|
||||
/* First, we compile in the substrings known to be exact
|
||||
matches. The kwset matcher will return the index
|
||||
of the matching string that it chooses. */
|
||||
for (dm = dfa.musts; dm; dm = dm->next)
|
||||
{
|
||||
if (!dm->exact)
|
||||
continue;
|
||||
++lastexact;
|
||||
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
/* Now, we compile the substrings that will require
|
||||
the use of the regexp matcher. */
|
||||
for (dm = dfa.musts; dm; dm = dm->next)
|
||||
{
|
||||
if (dm->exact)
|
||||
continue;
|
||||
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
if ((err = kwsprep(kwset)) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
Gcompile(pattern, size)
|
||||
char *pattern;
|
||||
size_t size;
|
||||
{
|
||||
#ifdef __STDC__
|
||||
const
|
||||
#endif
|
||||
char *err;
|
||||
|
||||
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
|
||||
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
|
||||
|
||||
if ((err = re_compile_pattern(pattern, size, ®ex)) != 0)
|
||||
fatal(err, 0);
|
||||
|
||||
dfainit(&dfa);
|
||||
|
||||
/* In the match_words and match_lines cases, we use a different pattern
|
||||
for the DFA matcher that will quickly throw out cases that won't work.
|
||||
Then if DFA succeeds we do some hairy stuff using the regex matcher
|
||||
to decide whether the match should really count. */
|
||||
if (match_words || match_lines)
|
||||
{
|
||||
/* In the whole-word case, we use the pattern:
|
||||
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
|
||||
In the whole-line case, we use the pattern:
|
||||
^(userpattern)$.
|
||||
BUG: Using [A-Za-z_] is locale-dependent! */
|
||||
|
||||
char *n = malloc(size + 50);
|
||||
int i = 0;
|
||||
|
||||
strcpy(n, "");
|
||||
|
||||
if (match_lines)
|
||||
strcpy(n, "^\\(");
|
||||
if (match_words)
|
||||
strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
|
||||
|
||||
i = strlen(n);
|
||||
bcopy(pattern, n + i, size);
|
||||
i += size;
|
||||
|
||||
if (match_words)
|
||||
strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)");
|
||||
if (match_lines)
|
||||
strcpy(n + i, "\\)$");
|
||||
|
||||
i += strlen(n + i);
|
||||
dfacomp(n, i, &dfa, 1);
|
||||
}
|
||||
else
|
||||
dfacomp(pattern, size, &dfa, 1);
|
||||
|
||||
kwsmusts();
|
||||
}
|
||||
|
||||
static void
|
||||
Ecompile(pattern, size)
|
||||
char *pattern;
|
||||
size_t size;
|
||||
{
|
||||
#ifdef __STDC__
|
||||
const
|
||||
#endif
|
||||
char *err;
|
||||
|
||||
if (strcmp(matcher, "posix-egrep") == 0)
|
||||
{
|
||||
re_set_syntax(RE_SYNTAX_POSIX_EGREP);
|
||||
dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
|
||||
}
|
||||
else
|
||||
{
|
||||
re_set_syntax(RE_SYNTAX_EGREP);
|
||||
dfasyntax(RE_SYNTAX_EGREP, match_icase);
|
||||
}
|
||||
|
||||
if ((err = re_compile_pattern(pattern, size, ®ex)) != 0)
|
||||
fatal(err, 0);
|
||||
|
||||
dfainit(&dfa);
|
||||
|
||||
/* In the match_words and match_lines cases, we use a different pattern
|
||||
for the DFA matcher that will quickly throw out cases that won't work.
|
||||
Then if DFA succeeds we do some hairy stuff using the regex matcher
|
||||
to decide whether the match should really count. */
|
||||
if (match_words || match_lines)
|
||||
{
|
||||
/* In the whole-word case, we use the pattern:
|
||||
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
|
||||
In the whole-line case, we use the pattern:
|
||||
^(userpattern)$.
|
||||
BUG: Using [A-Za-z_] is locale-dependent! */
|
||||
|
||||
char *n = malloc(size + 50);
|
||||
int i = 0;
|
||||
|
||||
strcpy(n, "");
|
||||
|
||||
if (match_lines)
|
||||
strcpy(n, "^(");
|
||||
if (match_words)
|
||||
strcpy(n, "(^|[^0-9A-Za-z_])(");
|
||||
|
||||
i = strlen(n);
|
||||
bcopy(pattern, n + i, size);
|
||||
i += size;
|
||||
|
||||
if (match_words)
|
||||
strcpy(n + i, ")([^0-9A-Za-z_]|$)");
|
||||
if (match_lines)
|
||||
strcpy(n + i, ")$");
|
||||
|
||||
i += strlen(n + i);
|
||||
dfacomp(n, i, &dfa, 1);
|
||||
}
|
||||
else
|
||||
dfacomp(pattern, size, &dfa, 1);
|
||||
|
||||
kwsmusts();
|
||||
}
|
||||
|
||||
static char *
|
||||
EGexecute(buf, size, endp)
|
||||
char *buf;
|
||||
size_t size;
|
||||
char **endp;
|
||||
{
|
||||
register char *buflim, *beg, *end, save;
|
||||
int backref, start, len;
|
||||
struct kwsmatch kwsm;
|
||||
static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
|
||||
Q@#%!# library interface in regex.c. */
|
||||
|
||||
buflim = buf + size;
|
||||
|
||||
for (beg = end = buf; end < buflim; beg = end + 1)
|
||||
{
|
||||
if (kwset)
|
||||
{
|
||||
/* Find a possible match using the KWset matcher. */
|
||||
beg = kwsexec(kwset, beg, buflim - beg, &kwsm);
|
||||
if (!beg)
|
||||
goto failure;
|
||||
/* Narrow down to the line containing the candidate, and
|
||||
run it through DFA. */
|
||||
end = memchr(beg, '\n', buflim - beg);
|
||||
if (!end)
|
||||
end = buflim;
|
||||
while (beg > buf && beg[-1] != '\n')
|
||||
--beg;
|
||||
save = *end;
|
||||
if (kwsm.index < lastexact)
|
||||
goto success;
|
||||
if (!dfaexec(&dfa, beg, end, 0, (int *) 0, &backref))
|
||||
{
|
||||
*end = save;
|
||||
continue;
|
||||
}
|
||||
*end = save;
|
||||
/* Successful, no backreferences encountered. */
|
||||
if (!backref)
|
||||
goto success;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No good fixed strings; start with DFA. */
|
||||
save = *buflim;
|
||||
beg = dfaexec(&dfa, beg, buflim, 0, (int *) 0, &backref);
|
||||
*buflim = save;
|
||||
if (!beg)
|
||||
goto failure;
|
||||
/* Narrow down to the line we've found. */
|
||||
end = memchr(beg, '\n', buflim - beg);
|
||||
if (!end)
|
||||
end = buflim;
|
||||
while (beg > buf && beg[-1] != '\n')
|
||||
--beg;
|
||||
/* Successful, no backreferences encountered! */
|
||||
if (!backref)
|
||||
goto success;
|
||||
}
|
||||
/* If we've made it to this point, this means DFA has seen
|
||||
a probable match, and we need to run it through Regex. */
|
||||
regex.not_eol = 0;
|
||||
if ((start = re_search(®ex, beg, end - beg, 0, end - beg, ®s)) >= 0)
|
||||
{
|
||||
len = regs.end[0] - start;
|
||||
if (!match_lines && !match_words || match_lines && len == end - beg)
|
||||
goto success;
|
||||
/* If -w, check if the match aligns with word boundaries.
|
||||
We do this iteratively because:
|
||||
(a) the line may contain more than one occurence of the pattern, and
|
||||
(b) Several alternatives in the pattern might be valid at a given
|
||||
point, and we may need to consider a shorter one to find a word
|
||||
boundary. */
|
||||
if (match_words)
|
||||
while (start >= 0)
|
||||
{
|
||||
if ((start == 0 || !WCHAR(beg[start - 1]))
|
||||
&& (len == end - beg || !WCHAR(beg[start + len])))
|
||||
goto success;
|
||||
if (len > 0)
|
||||
{
|
||||
/* Try a shorter length anchored at the same place. */
|
||||
--len;
|
||||
regex.not_eol = 1;
|
||||
len = re_match(®ex, beg, start + len, start, ®s);
|
||||
}
|
||||
if (len <= 0)
|
||||
{
|
||||
/* Try looking further on. */
|
||||
if (start == end - beg)
|
||||
break;
|
||||
++start;
|
||||
regex.not_eol = 0;
|
||||
start = re_search(®ex, beg, end - beg,
|
||||
start, end - beg - start, ®s);
|
||||
len = regs.end[0] - start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
failure:
|
||||
return 0;
|
||||
|
||||
success:
|
||||
*endp = end < buflim ? end + 1 : end;
|
||||
return beg;
|
||||
}
|
||||
|
||||
static void
|
||||
Fcompile(pattern, size)
|
||||
char *pattern;
|
||||
size_t size;
|
||||
{
|
||||
char *beg, *lim, *err;
|
||||
|
||||
kwsinit();
|
||||
beg = pattern;
|
||||
do
|
||||
{
|
||||
for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
|
||||
;
|
||||
if ((err = kwsincr(kwset, beg, lim - beg)) != 0)
|
||||
fatal(err, 0);
|
||||
if (lim < pattern + size)
|
||||
++lim;
|
||||
beg = lim;
|
||||
}
|
||||
while (beg < pattern + size);
|
||||
|
||||
if ((err = kwsprep(kwset)) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
|
||||
static char *
|
||||
Fexecute(buf, size, endp)
|
||||
char *buf;
|
||||
size_t size;
|
||||
char **endp;
|
||||
{
|
||||
register char *beg, *try, *end;
|
||||
register size_t len;
|
||||
struct kwsmatch kwsmatch;
|
||||
|
||||
for (beg = buf; beg <= buf + size; ++beg)
|
||||
{
|
||||
if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch)))
|
||||
return 0;
|
||||
len = kwsmatch.size[0];
|
||||
if (match_lines)
|
||||
{
|
||||
if (beg > buf && beg[-1] != '\n')
|
||||
continue;
|
||||
if (beg + len < buf + size && beg[len] != '\n')
|
||||
continue;
|
||||
goto success;
|
||||
}
|
||||
else if (match_words)
|
||||
for (try = beg; len && try;)
|
||||
{
|
||||
if (try > buf && WCHAR((unsigned char) try[-1]))
|
||||
break;
|
||||
if (try + len < buf + size && WCHAR((unsigned char) try[len]))
|
||||
{
|
||||
try = kwsexec(kwset, beg, --len, &kwsmatch);
|
||||
len = kwsmatch.size[0];
|
||||
}
|
||||
else
|
||||
goto success;
|
||||
}
|
||||
else
|
||||
goto success;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
success:
|
||||
if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0)
|
||||
++end;
|
||||
else
|
||||
end = buf + size;
|
||||
*endp = end;
|
||||
while (beg > buf && beg[-1] != '\n')
|
||||
--beg;
|
||||
return beg;
|
||||
}
|
24
gnu/usr.bin/grep/tests/check.sh
Normal file
24
gnu/usr.bin/grep/tests/check.sh
Normal file
@ -0,0 +1,24 @@
|
||||
#! /bin/sh
|
||||
# Regression test for GNU grep.
|
||||
# Usage: regress.sh [testdir]
|
||||
|
||||
testdir=${1-tests}
|
||||
|
||||
failures=0
|
||||
|
||||
# The Khadafy test is brought to you by Scott Anderson . . .
|
||||
./grep -E -f $testdir/khadafy.regexp $testdir/khadafy.lines > khadafy.out
|
||||
if cmp $testdir/khadafy.lines khadafy.out
|
||||
then
|
||||
:
|
||||
else
|
||||
echo Khadafy test failed -- output left on khadafy.out
|
||||
failures=1
|
||||
fi
|
||||
|
||||
# . . . and the following by Henry Spencer.
|
||||
|
||||
${AWK-awk} -F: -f $testdir/scriptgen.awk $testdir/spencer.tests > tmp.script
|
||||
|
||||
sh tmp.script && exit $failures
|
||||
exit 1
|
10
gnu/usr.bin/grep/tests/scriptgen.awk
Normal file
10
gnu/usr.bin/grep/tests/scriptgen.awk
Normal file
@ -0,0 +1,10 @@
|
||||
BEGIN { print "failures=0"; }
|
||||
$0 !~ /^#/ && NF == 3 {
|
||||
print "echo '" $3 "' | ./grep -E -e '" $2 "' > /dev/null 2>&1";
|
||||
print "if [ $? != " $1 " ]"
|
||||
print "then"
|
||||
printf "\techo Spencer test \\#%d failed\n", ++n
|
||||
print "\tfailures=1"
|
||||
print "fi"
|
||||
}
|
||||
END { print "exit $failures"; }
|
122
gnu/usr.bin/grep/tests/spencer.tests
Normal file
122
gnu/usr.bin/grep/tests/spencer.tests
Normal file
@ -0,0 +1,122 @@
|
||||
0:abc:abc
|
||||
1:abc:xbc
|
||||
1:abc:axc
|
||||
1:abc:abx
|
||||
0:abc:xabcy
|
||||
0:abc:ababc
|
||||
0:ab*c:abc
|
||||
0:ab*bc:abc
|
||||
0:ab*bc:abbc
|
||||
0:ab*bc:abbbbc
|
||||
0:ab+bc:abbc
|
||||
1:ab+bc:abc
|
||||
1:ab+bc:abq
|
||||
0:ab+bc:abbbbc
|
||||
0:ab?bc:abbc
|
||||
0:ab?bc:abc
|
||||
1:ab?bc:abbbbc
|
||||
0:ab?c:abc
|
||||
0:^abc$:abc
|
||||
1:^abc$:abcc
|
||||
0:^abc:abcc
|
||||
1:^abc$:aabc
|
||||
0:abc$:aabc
|
||||
0:^:abc
|
||||
0:$:abc
|
||||
0:a.c:abc
|
||||
0:a.c:axc
|
||||
0:a.*c:axyzc
|
||||
1:a.*c:axyzd
|
||||
1:a[bc]d:abc
|
||||
0:a[bc]d:abd
|
||||
1:a[b-d]e:abd
|
||||
0:a[b-d]e:ace
|
||||
0:a[b-d]:aac
|
||||
0:a[-b]:a-
|
||||
0:a[b-]:a-
|
||||
2:a[b-a]:-
|
||||
2:a[]b:-
|
||||
2:a[:-
|
||||
0:a]:a]
|
||||
0:a[]]b:a]b
|
||||
0:a[^bc]d:aed
|
||||
1:a[^bc]d:abd
|
||||
0:a[^-b]c:adc
|
||||
1:a[^-b]c:a-c
|
||||
1:a[^]b]c:a]c
|
||||
0:a[^]b]c:adc
|
||||
0:ab|cd:abc
|
||||
0:ab|cd:abcd
|
||||
0:()ef:def
|
||||
0:()*:-
|
||||
1:*a:-
|
||||
0:^*:-
|
||||
0:$*:-
|
||||
1:(*)b:-
|
||||
1:$b:b
|
||||
2:a\:-
|
||||
0:a\(b:a(b
|
||||
0:a\(*b:ab
|
||||
0:a\(*b:a((b
|
||||
1:a\x:a\x
|
||||
2:abc):-
|
||||
2:(abc:-
|
||||
0:((a)):abc
|
||||
0:(a)b(c):abc
|
||||
0:a+b+c:aabbabc
|
||||
0:a**:-
|
||||
0:a*?:-
|
||||
0:(a*)*:-
|
||||
0:(a*)+:-
|
||||
0:(a|)*:-
|
||||
0:(a*|b)*:-
|
||||
0:(a+|b)*:ab
|
||||
0:(a+|b)+:ab
|
||||
0:(a+|b)?:ab
|
||||
0:[^ab]*:cde
|
||||
0:(^)*:-
|
||||
0:(ab|)*:-
|
||||
2:)(:-
|
||||
1:abc:
|
||||
1:abc:
|
||||
0:a*:
|
||||
0:([abc])*d:abbbcd
|
||||
0:([abc])*bcd:abcd
|
||||
0:a|b|c|d|e:e
|
||||
0:(a|b|c|d|e)f:ef
|
||||
0:((a*|b))*:-
|
||||
0:abcd*efg:abcdefg
|
||||
0:ab*:xabyabbbz
|
||||
0:ab*:xayabbbz
|
||||
0:(ab|cd)e:abcde
|
||||
0:[abhgefdc]ij:hij
|
||||
1:^(ab|cd)e:abcde
|
||||
0:(abc|)ef:abcdef
|
||||
0:(a|b)c*d:abcd
|
||||
0:(ab|ab*)bc:abc
|
||||
0:a([bc]*)c*:abc
|
||||
0:a([bc]*)(c*d):abcd
|
||||
0:a([bc]+)(c*d):abcd
|
||||
0:a([bc]*)(c+d):abcd
|
||||
0:a[bcd]*dcdcde:adcdcde
|
||||
1:a[bcd]+dcdcde:adcdcde
|
||||
0:(ab|a)b*c:abc
|
||||
0:((a)(b)c)(d):abcd
|
||||
0:[A-Za-z_][A-Za-z0-9_]*:alpha
|
||||
0:^a(bc+|b[eh])g|.h$:abh
|
||||
0:(bc+d$|ef*g.|h?i(j|k)):effgz
|
||||
0:(bc+d$|ef*g.|h?i(j|k)):ij
|
||||
1:(bc+d$|ef*g.|h?i(j|k)):effg
|
||||
1:(bc+d$|ef*g.|h?i(j|k)):bcdd
|
||||
0:(bc+d$|ef*g.|h?i(j|k)):reffgz
|
||||
1:((((((((((a)))))))))):-
|
||||
0:(((((((((a))))))))):a
|
||||
1:multiple words of text:uh-uh
|
||||
0:multiple words:multiple words, yeah
|
||||
0:(.*)c(.*):abcde
|
||||
1:\((.*),:(.*)\)
|
||||
1:[k]:ab
|
||||
0:abcd:abcd
|
||||
0:a(bc)d:abcd
|
||||
0:a[-]?c:ac
|
||||
0:(....).*\1:beriberi
|
Loading…
Reference in New Issue
Block a user