This commit was manufactured by cvs2svn to create branch 'FSF'.

This commit is contained in:
cvs2svn 1993-07-06 18:45:28 +00:00
commit de798f3fde
9 changed files with 2452 additions and 0 deletions

29
gnu/usr.bin/grep/AUTHORS Normal file
View File

@ -0,0 +1,29 @@
Mike Haertel wrote the main program and the dfa and kwset matchers.
Arthur David Olson contributed the heuristics for finding fixed substrings
at the end of dfa.c.
Richard Stallman and Karl Berry wrote the regex backtracking matcher.
Henry Spencer wrote the original test suite from which grep's was derived.
Scott Anderson invented the Khadafy test.
David MacKenzie wrote the automatic configuration software use to
produce the configure script.
Authors of the replacements for standard library routines are identified
in the corresponding source files.
The idea of using Boyer-Moore type algorithms to quickly filter out
non-matching text before calling the regexp matcher was originally due
to James Woods. He also contributed some code to early versions of
GNU grep.
Finally, I would like to thank Andrew Hume for many fascinating discussions
of string searching issues over the years. Hume & Sunday's excellent
paper on fast string searching (AT&T Bell Laboratories CSTR #156)
describes some of the history of the subject, as well as providing
exhaustive performance analysis of various implementation alternatives.
The inner loop of GNU grep is similar to Hume & Sunday's recommended
"Tuned Boyer Moore" inner loop.

35
gnu/usr.bin/grep/NEWS Normal file
View File

@ -0,0 +1,35 @@
Version 2.0:
The most important user visible change is that egrep and fgrep have
disappeared as separate programs into the single grep program mandated
by POSIX 1003.2. New options -G, -E, and -F have been added,
selecting grep, egrep, and fgrep behavior respectively. For
compatibility with historical practice, hard links named egrep and
fgrep are also provided. See the manual page for details.
In addition, the regular expression facilities described in Posix
draft 11.2 are now supported, except for internationalization features
related to locale-dependent collating sequence information.
There is a new option, -L, which is like -l except it lists
files which don't contain matches. The reason this option was
added is because '-l -v' doesn't do what you expect.
Performance has been improved; the amount of improvement is platform
dependent, but (for example) grep 2.0 typically runs at least 30% faster
than grep 1.6 on a DECstation using the MIPS compiler. Where possible,
grep now uses mmap() for file input; on a Sun 4 running SunOS 4.1 this
may cut system time by as much as half, for a total reduction in running
time by nearly 50%. On machines that don't use mmap(), the buffering
code has been rewritten to choose more favorable alignments and buffer
sizes for read().
Portability has been substantially cleaned up, and an automatic
configure script is now provided.
The internals have changed in ways too numerous to mention.
People brave enough to reuse the DFA matcher in other programs
will now have their bravery amply "rewarded", for the interface
to that file has been completely changed. Some changes were
necessary to track the evolution of the regex package, and since
I was changing it anyway I decided to do a general cleanup.

View File

@ -0,0 +1,42 @@
#ifdef BSD
#ifndef BSD4_1
#define HAVE_GETPAGESIZE
#endif
#endif
#ifndef HAVE_GETPAGESIZE
#ifdef VMS
#define getpagesize() 512
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef _SC_PAGESIZE
#define getpagesize() sysconf(_SC_PAGESIZE)
#else
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#ifdef EXEC_PAGESIZE
#define getpagesize() EXEC_PAGESIZE
#else
#ifdef NBPG
#define getpagesize() NBPG * CLSIZE
#ifndef CLSIZE
#define CLSIZE 1
#endif /* no CLSIZE */
#else /* no NBPG */
#define getpagesize() NBPC
#endif /* no NBPG */
#endif /* no EXEC_PAGESIZE */
#else /* !HAVE_SYS_PARAM_H */
#define getpagesize() 8192 /* punt totally */
#endif /* !HAVE_SYS_PARAM_H */
#endif /* no _SC_PAGESIZE */
#endif /* not HAVE_GETPAGESIZE */

53
gnu/usr.bin/grep/grep.h Normal file
View File

@ -0,0 +1,53 @@
/* grep.h - interface to grep driver for searching subroutines.
Copyright (C) 1992 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#if __STDC__
extern void fatal(const char *, int);
/* Grep.c expects the matchers vector to be terminated
by an entry with a NULL name, and to contain at least
an entry named "default". */
extern struct matcher
{
char *name;
void (*compile)(char *, size_t);
char *(*execute)(char *, size_t, char **);
} matchers[];
#else
extern void fatal();
extern struct matcher
{
char *name;
void (*compile)();
char *(*execute)();
} matchers[];
#endif
/* Exported from grep.c. */
extern char *matcher;
/* The following flags are exported from grep for the matchers
to look at. */
extern int match_icase; /* -i */
extern int match_words; /* -w */
extern int match_lines; /* -x */

805
gnu/usr.bin/grep/kwset.c Normal file
View File

@ -0,0 +1,805 @@
/* kwset.c - search for any of a set of keywords.
Copyright 1989 Free Software Foundation
Written August 1989 by Mike Haertel.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
The author may be reached (Email) at the address mike@ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
/* The algorithm implemented by these routines bears a startling resemblence
to one discovered by Beate Commentz-Walter, although it is not identical.
See "A String Matching Algorithm Fast on the Average," Technical Report,
IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
String Matching: An Aid to Bibliographic Search," CACM June 1975,
Vol. 18, No. 6, which describes the failure function used below. */
#ifdef STDC_HEADERS
#include <limits.h>
#include <stdlib.h>
#else
#define INT_MAX 2147483647
#define UCHAR_MAX 255
#ifdef __STDC__
#include <stddef.h>
#else
#include <sys/types.h>
#endif
extern char *malloc();
extern void free();
#endif
#ifdef HAVE_MEMCHR
#include <string.h>
#ifdef NEED_MEMORY_H
#include <memory.h>
#endif
#else
#ifdef __STDC__
extern void *memchr();
#else
extern char *memchr();
#endif
#endif
#ifdef GREP
extern char *xmalloc();
#define malloc xmalloc
#endif
#include "kwset.h"
#include "obstack.h"
#define NCHAR (UCHAR_MAX + 1)
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
/* Balanced tree of edges and labels leaving a given trie node. */
struct tree
{
struct tree *llink; /* Left link; MUST be first field. */
struct tree *rlink; /* Right link (to larger labels). */
struct trie *trie; /* Trie node pointed to by this edge. */
unsigned char label; /* Label on this edge. */
char balance; /* Difference in depths of subtrees. */
};
/* Node of a trie representing a set of reversed keywords. */
struct trie
{
unsigned int accepting; /* Word index of accepted word, or zero. */
struct tree *links; /* Tree of edges leaving this node. */
struct trie *parent; /* Parent of this node. */
struct trie *next; /* List of all trie nodes in level order. */
struct trie *fail; /* Aho-Corasick failure function. */
int depth; /* Depth of this node from the root. */
int shift; /* Shift function for search failures. */
int maxshift; /* Max shift of self and descendents. */
};
/* Structure returned opaquely to the caller, containing everything. */
struct kwset
{
struct obstack obstack; /* Obstack for node allocation. */
int words; /* Number of words in the trie. */
struct trie *trie; /* The trie itself. */
int mind; /* Minimum depth of an accepting node. */
int maxd; /* Maximum depth of any node. */
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
struct trie *next[NCHAR]; /* Table of children of the root. */
char *target; /* Target string if there's only one. */
int mind2; /* Used in Boyer-Moore search for one string. */
char *trans; /* Character translation table. */
};
/* Allocate and initialize a keyword set object, returning an opaque
pointer to it. Return NULL if memory is not available. */
kwset_t
kwsalloc(trans)
char *trans;
{
struct kwset *kwset;
kwset = (struct kwset *) malloc(sizeof (struct kwset));
if (!kwset)
return 0;
obstack_init(&kwset->obstack);
kwset->words = 0;
kwset->trie
= (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
if (!kwset->trie)
{
kwsfree((kwset_t) kwset);
return 0;
}
kwset->trie->accepting = 0;
kwset->trie->links = 0;
kwset->trie->parent = 0;
kwset->trie->next = 0;
kwset->trie->fail = 0;
kwset->trie->depth = 0;
kwset->trie->shift = 0;
kwset->mind = INT_MAX;
kwset->maxd = -1;
kwset->target = 0;
kwset->trans = trans;
return (kwset_t) kwset;
}
/* Add the given string to the contents of the keyword set. Return NULL
for success, an error message otherwise. */
char *
kwsincr(kws, text, len)
kwset_t kws;
char *text;
size_t len;
{
struct kwset *kwset;
register struct trie *trie;
register unsigned char label;
register struct tree *link;
register int depth;
struct tree *links[12];
enum { L, R } dirs[12];
struct tree *t, *r, *l, *rl, *lr;
kwset = (struct kwset *) kws;
trie = kwset->trie;
text += len;
/* Descend the trie (built of reversed keywords) character-by-character,
installing new nodes when necessary. */
while (len--)
{
label = kwset->trans ? kwset->trans[(unsigned char) *--text] : *--text;
/* Descend the tree of outgoing links for this trie node,
looking for the current character and keeping track
of the path followed. */
link = trie->links;
links[0] = (struct tree *) &trie->links;
dirs[0] = L;
depth = 1;
while (link && label != link->label)
{
links[depth] = link;
if (label < link->label)
dirs[depth++] = L, link = link->llink;
else
dirs[depth++] = R, link = link->rlink;
}
/* The current character doesn't have an outgoing link at
this trie node, so build a new trie node and install
a link in the current trie node's tree. */
if (!link)
{
link = (struct tree *) obstack_alloc(&kwset->obstack,
sizeof (struct tree));
if (!link)
return "memory exhausted";
link->llink = 0;
link->rlink = 0;
link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
sizeof (struct trie));
if (!link->trie)
return "memory exhausted";
link->trie->accepting = 0;
link->trie->links = 0;
link->trie->parent = trie;
link->trie->next = 0;
link->trie->fail = 0;
link->trie->depth = trie->depth + 1;
link->trie->shift = 0;
link->label = label;
link->balance = 0;
/* Install the new tree node in its parent. */
if (dirs[--depth] == L)
links[depth]->llink = link;
else
links[depth]->rlink = link;
/* Back up the tree fixing the balance flags. */
while (depth && !links[depth]->balance)
{
if (dirs[depth] == L)
--links[depth]->balance;
else
++links[depth]->balance;
--depth;
}
/* Rebalance the tree by pointer rotations if necessary. */
if (depth && ((dirs[depth] == L && --links[depth]->balance)
|| (dirs[depth] == R && ++links[depth]->balance)))
{
switch (links[depth]->balance)
{
case (char) -2:
switch (dirs[depth + 1])
{
case L:
r = links[depth], t = r->llink, rl = t->rlink;
t->rlink = r, r->llink = rl;
t->balance = r->balance = 0;
break;
case R:
r = links[depth], l = r->llink, t = l->rlink;
rl = t->rlink, lr = t->llink;
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
l->balance = t->balance != 1 ? 0 : -1;
r->balance = t->balance != (char) -1 ? 0 : 1;
t->balance = 0;
break;
}
break;
case 2:
switch (dirs[depth + 1])
{
case R:
l = links[depth], t = l->rlink, lr = t->llink;
t->llink = l, l->rlink = lr;
t->balance = l->balance = 0;
break;
case L:
l = links[depth], r = l->rlink, t = r->llink;
lr = t->llink, rl = t->rlink;
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
l->balance = t->balance != 1 ? 0 : -1;
r->balance = t->balance != (char) -1 ? 0 : 1;
t->balance = 0;
break;
}
break;
}
if (dirs[depth - 1] == L)
links[depth - 1]->llink = t;
else
links[depth - 1]->rlink = t;
}
}
trie = link->trie;
}
/* Mark the node we finally reached as accepting, encoding the
index number of this word in the keyword set so far. */
if (!trie->accepting)
trie->accepting = 1 + 2 * kwset->words;
++kwset->words;
/* Keep track of the longest and shortest string of the keyword set. */
if (trie->depth < kwset->mind)
kwset->mind = trie->depth;
if (trie->depth > kwset->maxd)
kwset->maxd = trie->depth;
return 0;
}
/* Enqueue the trie nodes referenced from the given tree in the
given queue. */
static void
enqueue(tree, last)
struct tree *tree;
struct trie **last;
{
if (!tree)
return;
enqueue(tree->llink, last);
enqueue(tree->rlink, last);
(*last) = (*last)->next = tree->trie;
}
/* Compute the Aho-Corasick failure function for the trie nodes referenced
from the given tree, given the failure function for their parent as
well as a last resort failure node. */
static void
treefails(tree, fail, recourse)
register struct tree *tree;
struct trie *fail;
struct trie *recourse;
{
register struct tree *link;
if (!tree)
return;
treefails(tree->llink, fail, recourse);
treefails(tree->rlink, fail, recourse);
/* Find, in the chain of fails going back to the root, the first
node that has a descendent on the current label. */
while (fail)
{
link = fail->links;
while (link && tree->label != link->label)
if (tree->label < link->label)
link = link->llink;
else
link = link->rlink;
if (link)
{
tree->trie->fail = link->trie;
return;
}
fail = fail->fail;
}
tree->trie->fail = recourse;
}
/* Set delta entries for the links of the given tree such that
the preexisting delta value is larger than the current depth. */
static void
treedelta(tree, depth, delta)
register struct tree *tree;
register unsigned int depth;
unsigned char delta[];
{
if (!tree)
return;
treedelta(tree->llink, depth, delta);
treedelta(tree->rlink, depth, delta);
if (depth < delta[tree->label])
delta[tree->label] = depth;
}
/* Return true if A has every label in B. */
static int
hasevery(a, b)
register struct tree *a;
register struct tree *b;
{
if (!b)
return 1;
if (!hasevery(a, b->llink))
return 0;
if (!hasevery(a, b->rlink))
return 0;
while (a && b->label != a->label)
if (b->label < a->label)
a = a->llink;
else
a = a->rlink;
return !!a;
}
/* Compute a vector, indexed by character code, of the trie nodes
referenced from the given tree. */
static void
treenext(tree, next)
struct tree *tree;
struct trie *next[];
{
if (!tree)
return;
treenext(tree->llink, next);
treenext(tree->rlink, next);
next[tree->label] = tree->trie;
}
/* Compute the shift for each trie node, as well as the delta
table and next cache for the given keyword set. */
char *
kwsprep(kws)
kwset_t kws;
{
register struct kwset *kwset;
register int i;
register struct trie *curr, *fail;
register char *trans;
unsigned char delta[NCHAR];
struct trie *last, *next[NCHAR];
kwset = (struct kwset *) kws;
/* Initial values for the delta table; will be changed later. The
delta entry for a given character is the smallest depth of any
node at which an outgoing edge is labeled by that character. */
if (kwset->mind < 256)
for (i = 0; i < NCHAR; ++i)
delta[i] = kwset->mind;
else
for (i = 0; i < NCHAR; ++i)
delta[i] = 255;
/* Check if we can use the simple boyer-moore algorithm, instead
of the hairy commentz-walter algorithm. */
if (kwset->words == 1 && kwset->trans == 0)
{
/* Looking for just one string. Extract it from the trie. */
kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
{
kwset->target[i] = curr->links->label;
curr = curr->links->trie;
}
/* Build the Boyer Moore delta. Boy that's easy compared to CW. */
for (i = 0; i < kwset->mind; ++i)
delta[(unsigned char) kwset->target[i]] = kwset->mind - (i + 1);
kwset->mind2 = kwset->mind;
/* Find the minimal delta2 shift that we might make after
a backwards match has failed. */
for (i = 0; i < kwset->mind - 1; ++i)
if (kwset->target[i] == kwset->target[kwset->mind - 1])
kwset->mind2 = kwset->mind - (i + 1);
}
else
{
/* Traverse the nodes of the trie in level order, simultaneously
computing the delta table, failure function, and shift function. */
for (curr = last = kwset->trie; curr; curr = curr->next)
{
/* Enqueue the immediate descendents in the level order queue. */
enqueue(curr->links, &last);
curr->shift = kwset->mind;
curr->maxshift = kwset->mind;
/* Update the delta table for the descendents of this node. */
treedelta(curr->links, curr->depth, delta);
/* Compute the failure function for the decendents of this node. */
treefails(curr->links, curr->fail, kwset->trie);
/* Update the shifts at each node in the current node's chain
of fails back to the root. */
for (fail = curr->fail; fail; fail = fail->fail)
{
/* If the current node has some outgoing edge that the fail
doesn't, then the shift at the fail should be no larger
than the difference of their depths. */
if (!hasevery(fail->links, curr->links))
if (curr->depth - fail->depth < fail->shift)
fail->shift = curr->depth - fail->depth;
/* If the current node is accepting then the shift at the
fail and its descendents should be no larger than the
difference of their depths. */
if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
fail->maxshift = curr->depth - fail->depth;
}
}
/* Traverse the trie in level order again, fixing up all nodes whose
shift exceeds their inherited maxshift. */
for (curr = kwset->trie->next; curr; curr = curr->next)
{
if (curr->maxshift > curr->parent->maxshift)
curr->maxshift = curr->parent->maxshift;
if (curr->shift > curr->maxshift)
curr->shift = curr->maxshift;
}
/* Create a vector, indexed by character code, of the outgoing links
from the root node. */
for (i = 0; i < NCHAR; ++i)
next[i] = 0;
treenext(kwset->trie->links, next);
if ((trans = kwset->trans) != 0)
for (i = 0; i < NCHAR; ++i)
kwset->next[i] = next[(unsigned char) trans[i]];
else
for (i = 0; i < NCHAR; ++i)
kwset->next[i] = next[i];
}
/* Fix things up for any translation table. */
if ((trans = kwset->trans) != 0)
for (i = 0; i < NCHAR; ++i)
kwset->delta[i] = delta[(unsigned char) trans[i]];
else
for (i = 0; i < NCHAR; ++i)
kwset->delta[i] = delta[i];
return 0;
}
#define U(C) ((unsigned char) (C))
/* Fast boyer-moore search. */
static char *
bmexec(kws, text, size)
kwset_t kws;
char *text;
size_t size;
{
struct kwset *kwset;
register unsigned char *d1;
register char *ep, *sp, *tp;
register int d, gc, i, len, md2;
kwset = (struct kwset *) kws;
len = kwset->mind;
if (len == 0)
return text;
if (len > size)
return 0;
if (len == 1)
return memchr(text, kwset->target[0], size);
d1 = kwset->delta;
sp = kwset->target + len;
gc = U(sp[-2]);
md2 = kwset->mind2;
tp = text + len;
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
if (size > 12 * len)
/* 11 is not a bug, the initial offset happens only once. */
for (ep = text + size - 11 * len;;)
{
while (tp <= ep)
{
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
if (d == 0)
goto found;
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
if (d == 0)
goto found;
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
if (d == 0)
goto found;
d = d1[U(tp[-1])], tp += d;
d = d1[U(tp[-1])], tp += d;
}
break;
found:
if (U(tp[-2]) == gc)
{
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
;
if (i > len)
return tp - len;
}
tp += md2;
}
/* Now we have only a few characters left to search. We
carefully avoid ever producing an out-of-bounds pointer. */
ep = text + size;
d = d1[U(tp[-1])];
while (d <= ep - tp)
{
d = d1[U((tp += d)[-1])];
if (d != 0)
continue;
if (tp[-2] == gc)
{
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
;
if (i > len)
return tp - len;
}
d = md2;
}
return 0;
}
/* Hairy multiple string search. */
static char *
cwexec(kws, text, len, kwsmatch)
kwset_t kws;
char *text;
size_t len;
struct kwsmatch *kwsmatch;
{
struct kwset *kwset;
struct trie **next, *trie, *accept;
char *beg, *lim, *mch, *lmch;
register unsigned char c, *delta;
register int d;
register char *end, *qlim;
register struct tree *tree;
register char *trans;
/* Initialize register copies and look for easy ways out. */
kwset = (struct kwset *) kws;
if (len < kwset->mind)
return 0;
next = kwset->next;
delta = kwset->delta;
trans = kwset->trans;
lim = text + len;
end = text;
if ((d = kwset->mind) != 0)
mch = 0;
else
{
mch = text, accept = kwset->trie;
goto match;
}
if (len >= 4 * kwset->mind)
qlim = lim - 4 * kwset->mind;
else
qlim = 0;
while (lim - end >= d)
{
if (qlim && end <= qlim)
{
end += d - 1;
while ((d = delta[c = *end]) && end < qlim)
{
end += d;
end += delta[(unsigned char) *end];
end += delta[(unsigned char) *end];
}
++end;
}
else
d = delta[c = (end += d)[-1]];
if (d)
continue;
beg = end - 1;
trie = next[c];
if (trie->accepting)
{
mch = beg;
accept = trie;
}
d = trie->shift;
while (beg > text)
{
c = trans ? trans[(unsigned char) *--beg] : *--beg;
tree = trie->links;
while (tree && c != tree->label)
if (c < tree->label)
tree = tree->llink;
else
tree = tree->rlink;
if (tree)
{
trie = tree->trie;
if (trie->accepting)
{
mch = beg;
accept = trie;
}
}
else
break;
d = trie->shift;
}
if (mch)
goto match;
}
return 0;
match:
/* Given a known match, find the longest possible match anchored
at or before its starting point. This is nearly a verbatim
copy of the preceding main search loops. */
if (lim - mch > kwset->maxd)
lim = mch + kwset->maxd;
lmch = 0;
d = 1;
while (lim - end >= d)
{
if ((d = delta[c = (end += d)[-1]]) != 0)
continue;
beg = end - 1;
if (!(trie = next[c]))
{
d = 1;
continue;
}
if (trie->accepting && beg <= mch)
{
lmch = beg;
accept = trie;
}
d = trie->shift;
while (beg > text)
{
c = trans ? trans[(unsigned char) *--beg] : *--beg;
tree = trie->links;
while (tree && c != tree->label)
if (c < tree->label)
tree = tree->llink;
else
tree = tree->rlink;
if (tree)
{
trie = tree->trie;
if (trie->accepting && beg <= mch)
{
lmch = beg;
accept = trie;
}
}
else
break;
d = trie->shift;
}
if (lmch)
{
mch = lmch;
goto match;
}
if (!d)
d = 1;
}
if (kwsmatch)
{
kwsmatch->index = accept->accepting / 2;
kwsmatch->beg[0] = mch;
kwsmatch->size[0] = accept->depth;
}
return mch;
}
/* Search through the given text for a match of any member of the
given keyword set. Return a pointer to the first character of
the matching substring, or NULL if no match is found. If FOUNDLEN
is non-NULL store in the referenced location the length of the
matching substring. Similarly, if FOUNDIDX is non-NULL, store
in the referenced location the index number of the particular
keyword matched. */
char *
kwsexec(kws, text, size, kwsmatch)
kwset_t kws;
char *text;
size_t size;
struct kwsmatch *kwsmatch;
{
struct kwset *kwset;
char *ret;
kwset = (struct kwset *) kws;
if (kwset->words == 1 && kwset->trans == 0)
{
ret = bmexec(kws, text, size);
if (kwsmatch != 0 && ret != 0)
{
kwsmatch->index = 0;
kwsmatch->beg[0] = ret;
kwsmatch->size[0] = kwset->mind;
}
return ret;
}
else
return cwexec(kws, text, size, kwsmatch);
}
/* Free the components of the given keyword set. */
void
kwsfree(kws)
kwset_t kws;
{
struct kwset *kwset;
kwset = (struct kwset *) kws;
obstack_free(&kwset->obstack, 0);
free(kws);
}

69
gnu/usr.bin/grep/kwset.h Normal file
View File

@ -0,0 +1,69 @@
/* kwset.h - header declaring the keyword set library.
Copyright 1989 Free Software Foundation
Written August 1989 by Mike Haertel.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
The author may be reached (Email) at the address mike@ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
struct kwsmatch
{
int index; /* Index number of matching keyword. */
char *beg[1]; /* Begin pointer for each submatch. */
size_t size[1]; /* Length of each submatch. */
};
#if __STDC__
typedef void *kwset_t;
/* Return an opaque pointer to a newly allocated keyword set, or NULL
if enough memory cannot be obtained. The argument if non-NULL
specifies a table of character translations to be applied to all
pattern and search text. */
extern kwset_t kwsalloc(char *);
/* Incrementally extend the keyword set to include the given string.
Return NULL for success, or an error message. Remember an index
number for each keyword included in the set. */
extern char *kwsincr(kwset_t, char *, size_t);
/* When the keyword set has been completely built, prepare it for
use. Return NULL for success, or an error message. */
extern char *kwsprep(kwset_t);
/* Search through the given buffer for a member of the keyword set.
Return a pointer to the leftmost longest match found, or NULL if
no match is found. If foundlen is non-NULL, store the length of
the matching substring in the integer it points to. Similarly,
if foundindex is non-NULL, store the index of the particular
keyword found therein. */
extern char *kwsexec(kwset_t, char *, size_t, struct kwsmatch *);
/* Deallocate the given keyword set and all its associated storage. */
extern void kwsfree(kwset_t);
#else
typedef char *kwset_t;
extern kwset_t kwsalloc();
extern char *kwsincr();
extern char *kwsprep();
extern char *kwsexec();
extern void kwsfree();
#endif

454
gnu/usr.bin/grep/obstack.c Normal file
View File

@ -0,0 +1,454 @@
/* obstack.c - subroutines used implicitly by object stack macros
Copyright (C) 1988, 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "obstack.h"
/* This is just to get __GNU_LIBRARY__ defined. */
#include <stdio.h>
/* Comment out all this code if we are using the GNU C Library, and are not
actually compiling the library itself. This code is part of the GNU C
Library, but also included in many other GNU distributions. Compiling
and linking in this code is a waste when using the GNU C library
(especially if it is a shared library). Rather than having every GNU
program understand `configure --with-gnu-libc' and omit the object files,
it is simpler to just do this in the source for each such file. */
#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
#ifdef __STDC__
#define POINTER void *
#else
#define POINTER char *
#endif
/* Determine default alignment. */
struct fooalign {char x; double d;};
#define DEFAULT_ALIGNMENT \
((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0))
/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
But in fact it might be less smart and round addresses to as much as
DEFAULT_ROUNDING. So we prepare for it to do that. */
union fooround {long x; double d;};
#define DEFAULT_ROUNDING (sizeof (union fooround))
/* When we copy a long block of data, this is the unit to do it with.
On some machines, copying successive ints does not work;
in such a case, redefine COPYING_UNIT to `long' (if that works)
or `char' as a last resort. */
#ifndef COPYING_UNIT
#define COPYING_UNIT int
#endif
/* The non-GNU-C macros copy the obstack into this global variable
to avoid multiple evaluation. */
struct obstack *_obstack;
/* Define a macro that either calls functions with the traditional malloc/free
calling interface, or calls functions with the mmalloc/mfree interface
(that adds an extra first argument), based on the state of use_extra_arg.
For free, do not use ?:, since some compilers, like the MIPS compilers,
do not allow (expr) ? void : void. */
#define CALL_CHUNKFUN(h, size) \
(((h) -> use_extra_arg) \
? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
: (*(h)->chunkfun) ((size)))
#define CALL_FREEFUN(h, old_chunk) \
do { \
if ((h) -> use_extra_arg) \
(*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
else \
(*(h)->freefun) ((old_chunk)); \
} while (0)
/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
Objects start on multiples of ALIGNMENT (0 means use default).
CHUNKFUN is the function to use to allocate chunks,
and FREEFUN the function to free them. */
void
_obstack_begin (h, size, alignment, chunkfun, freefun)
struct obstack *h;
int size;
int alignment;
POINTER (*chunkfun) ();
void (*freefun) ();
{
register struct _obstack_chunk* chunk; /* points to new chunk */
if (alignment == 0)
alignment = DEFAULT_ALIGNMENT;
if (size == 0)
/* Default size is what GNU malloc can fit in a 4096-byte block. */
{
/* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
Use the values for range checking, because if range checking is off,
the extra bytes won't be missed terribly, but if range checking is on
and we used a larger request, a whole extra 4096 bytes would be
allocated.
These number are irrelevant to the new GNU malloc. I suspect it is
less sensitive to the size of the request. */
int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ 4 + DEFAULT_ROUNDING - 1)
& ~(DEFAULT_ROUNDING - 1));
size = 4096 - extra;
}
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
h->freefun = freefun;
h->chunk_size = size;
h->alignment_mask = alignment - 1;
h->use_extra_arg = 0;
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
h->next_free = h->object_base = chunk->contents;
h->chunk_limit = chunk->limit
= (char *) chunk + h->chunk_size;
chunk->prev = 0;
/* The initial chunk now contains no empty object. */
h->maybe_empty_object = 0;
}
void
_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
struct obstack *h;
int size;
int alignment;
POINTER (*chunkfun) ();
void (*freefun) ();
POINTER arg;
{
register struct _obstack_chunk* chunk; /* points to new chunk */
if (alignment == 0)
alignment = DEFAULT_ALIGNMENT;
if (size == 0)
/* Default size is what GNU malloc can fit in a 4096-byte block. */
{
/* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
Use the values for range checking, because if range checking is off,
the extra bytes won't be missed terribly, but if range checking is on
and we used a larger request, a whole extra 4096 bytes would be
allocated.
These number are irrelevant to the new GNU malloc. I suspect it is
less sensitive to the size of the request. */
int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ 4 + DEFAULT_ROUNDING - 1)
& ~(DEFAULT_ROUNDING - 1));
size = 4096 - extra;
}
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
h->freefun = freefun;
h->chunk_size = size;
h->alignment_mask = alignment - 1;
h->extra_arg = arg;
h->use_extra_arg = 1;
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
h->next_free = h->object_base = chunk->contents;
h->chunk_limit = chunk->limit
= (char *) chunk + h->chunk_size;
chunk->prev = 0;
/* The initial chunk now contains no empty object. */
h->maybe_empty_object = 0;
}
/* Allocate a new current chunk for the obstack *H
on the assumption that LENGTH bytes need to be added
to the current object, or a new object of length LENGTH allocated.
Copies any partial object from the end of the old chunk
to the beginning of the new one. */
void
_obstack_newchunk (h, length)
struct obstack *h;
int length;
{
register struct _obstack_chunk* old_chunk = h->chunk;
register struct _obstack_chunk* new_chunk;
register long new_size;
register int obj_size = h->next_free - h->object_base;
register int i;
int already;
/* Compute size for new chunk. */
new_size = (obj_size + length) + (obj_size >> 3) + 100;
if (new_size < h->chunk_size)
new_size = h->chunk_size;
/* Allocate and initialize the new chunk. */
new_chunk = h->chunk = CALL_CHUNKFUN (h, new_size);
new_chunk->prev = old_chunk;
new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
/* Move the existing object to the new chunk.
Word at a time is fast and is safe if the object
is sufficiently aligned. */
if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
{
for (i = obj_size / sizeof (COPYING_UNIT) - 1;
i >= 0; i--)
((COPYING_UNIT *)new_chunk->contents)[i]
= ((COPYING_UNIT *)h->object_base)[i];
/* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
but that can cross a page boundary on a machine
which does not do strict alignment for COPYING_UNITS. */
already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
}
else
already = 0;
/* Copy remaining bytes one by one. */
for (i = already; i < obj_size; i++)
new_chunk->contents[i] = h->object_base[i];
/* If the object just copied was the only data in OLD_CHUNK,
free that chunk and remove it from the chain.
But not if that chunk might contain an empty object. */
if (h->object_base == old_chunk->contents && ! h->maybe_empty_object)
{
new_chunk->prev = old_chunk->prev;
CALL_FREEFUN (h, old_chunk);
}
h->object_base = new_chunk->contents;
h->next_free = h->object_base + obj_size;
/* The new chunk certainly contains no empty object yet. */
h->maybe_empty_object = 0;
}
/* Return nonzero if object OBJ has been allocated from obstack H.
This is here for debugging.
If you use it in a program, you are probably losing. */
int
_obstack_allocated_p (h, obj)
struct obstack *h;
POINTER obj;
{
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
register struct _obstack_chunk* plp; /* point to previous chunk if any */
lp = (h)->chunk;
/* We use >= rather than > since the object cannot be exactly at
the beginning of the chunk but might be an empty object exactly
at the end of an adjacent chunk. */
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
{
plp = lp->prev;
lp = plp;
}
return lp != 0;
}
/* Free objects in obstack H, including OBJ and everything allocate
more recently than OBJ. If OBJ is zero, free everything in H. */
#undef obstack_free
/* This function has two names with identical definitions.
This is the first one, called from non-ANSI code. */
void
_obstack_free (h, obj)
struct obstack *h;
POINTER obj;
{
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
register struct _obstack_chunk* plp; /* point to previous chunk if any */
lp = h->chunk;
/* We use >= because there cannot be an object at the beginning of a chunk.
But there can be an empty object at that address
at the end of another chunk. */
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
{
plp = lp->prev;
CALL_FREEFUN (h, lp);
lp = plp;
/* If we switch chunks, we can't tell whether the new current
chunk contains an empty object, so assume that it may. */
h->maybe_empty_object = 1;
}
if (lp)
{
h->object_base = h->next_free = (char *)(obj);
h->chunk_limit = lp->limit;
h->chunk = lp;
}
else if (obj != 0)
/* obj is not in any of the chunks! */
abort ();
}
/* This function is used from ANSI code. */
void
obstack_free (h, obj)
struct obstack *h;
POINTER obj;
{
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
register struct _obstack_chunk* plp; /* point to previous chunk if any */
lp = h->chunk;
/* We use >= because there cannot be an object at the beginning of a chunk.
But there can be an empty object at that address
at the end of another chunk. */
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
{
plp = lp->prev;
CALL_FREEFUN (h, lp);
lp = plp;
/* If we switch chunks, we can't tell whether the new current
chunk contains an empty object, so assume that it may. */
h->maybe_empty_object = 1;
}
if (lp)
{
h->object_base = h->next_free = (char *)(obj);
h->chunk_limit = lp->limit;
h->chunk = lp;
}
else if (obj != 0)
/* obj is not in any of the chunks! */
abort ();
}
#if 0
/* These are now turned off because the applications do not use it
and it uses bcopy via obstack_grow, which causes trouble on sysV. */
/* Now define the functional versions of the obstack macros.
Define them to simply use the corresponding macros to do the job. */
#ifdef __STDC__
/* These function definitions do not work with non-ANSI preprocessors;
they won't pass through the macro names in parentheses. */
/* The function names appear in parentheses in order to prevent
the macro-definitions of the names from being expanded there. */
POINTER (obstack_base) (obstack)
struct obstack *obstack;
{
return obstack_base (obstack);
}
POINTER (obstack_next_free) (obstack)
struct obstack *obstack;
{
return obstack_next_free (obstack);
}
int (obstack_object_size) (obstack)
struct obstack *obstack;
{
return obstack_object_size (obstack);
}
int (obstack_room) (obstack)
struct obstack *obstack;
{
return obstack_room (obstack);
}
void (obstack_grow) (obstack, pointer, length)
struct obstack *obstack;
POINTER pointer;
int length;
{
obstack_grow (obstack, pointer, length);
}
void (obstack_grow0) (obstack, pointer, length)
struct obstack *obstack;
POINTER pointer;
int length;
{
obstack_grow0 (obstack, pointer, length);
}
void (obstack_1grow) (obstack, character)
struct obstack *obstack;
int character;
{
obstack_1grow (obstack, character);
}
void (obstack_blank) (obstack, length)
struct obstack *obstack;
int length;
{
obstack_blank (obstack, length);
}
void (obstack_1grow_fast) (obstack, character)
struct obstack *obstack;
int character;
{
obstack_1grow_fast (obstack, character);
}
void (obstack_blank_fast) (obstack, length)
struct obstack *obstack;
int length;
{
obstack_blank_fast (obstack, length);
}
POINTER (obstack_finish) (obstack)
struct obstack *obstack;
{
return obstack_finish (obstack);
}
POINTER (obstack_alloc) (obstack, length)
struct obstack *obstack;
int length;
{
return obstack_alloc (obstack, length);
}
POINTER (obstack_copy) (obstack, pointer, length)
struct obstack *obstack;
POINTER pointer;
int length;
{
return obstack_copy (obstack, pointer, length);
}
POINTER (obstack_copy0) (obstack, pointer, length)
struct obstack *obstack;
POINTER pointer;
int length;
{
return obstack_copy0 (obstack, pointer, length);
}
#endif /* __STDC__ */
#endif /* 0 */
#endif /* _LIBC or not __GNU_LIBRARY__. */

484
gnu/usr.bin/grep/obstack.h Normal file
View File

@ -0,0 +1,484 @@
/* obstack.h - object stack macros
Copyright (C) 1988, 1992 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Summary:
All the apparent functions defined here are macros. The idea
is that you would use these pre-tested macros to solve a
very specific set of problems, and they would run fast.
Caution: no side-effects in arguments please!! They may be
evaluated MANY times!!
These macros operate a stack of objects. Each object starts life
small, and may grow to maturity. (Consider building a word syllable
by syllable.) An object can move while it is growing. Once it has
been "finished" it never changes address again. So the "top of the
stack" is typically an immature growing object, while the rest of the
stack is of mature, fixed size and fixed address objects.
These routines grab large chunks of memory, using a function you
supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
by calling `obstack_chunk_free'. You must define them and declare
them before using any obstack macros.
Each independent stack is represented by a `struct obstack'.
Each of the obstack macros expects a pointer to such a structure
as the first argument.
One motivation for this package is the problem of growing char strings
in symbol tables. Unless you are "fascist pig with a read-only mind"
--Gosper's immortal quote from HAKMEM item 154, out of context--you
would not like to put any arbitrary upper limit on the length of your
symbols.
In practice this often means you will build many short symbols and a
few long symbols. At the time you are reading a symbol you don't know
how long it is. One traditional method is to read a symbol into a
buffer, realloc()ating the buffer every time you try to read a symbol
that is longer than the buffer. This is beaut, but you still will
want to copy the symbol from the buffer to a more permanent
symbol-table entry say about half the time.
With obstacks, you can work differently. Use one obstack for all symbol
names. As you read a symbol, grow the name in the obstack gradually.
When the name is complete, finalize it. Then, if the symbol exists already,
free the newly read name.
The way we do this is to take a large chunk, allocating memory from
low addresses. When you want to build a symbol in the chunk you just
add chars above the current "high water mark" in the chunk. When you
have finished adding chars, because you got to the end of the symbol,
you know how long the chars are, and you can create a new object.
Mostly the chars will not burst over the highest address of the chunk,
because you would typically expect a chunk to be (say) 100 times as
long as an average object.
In case that isn't clear, when we have enough chars to make up
the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
so we just point to it where it lies. No moving of chars is
needed and this is the second win: potentially long strings need
never be explicitly shuffled. Once an object is formed, it does not
change its address during its lifetime.
When the chars burst over a chunk boundary, we allocate a larger
chunk, and then copy the partly formed object from the end of the old
chunk to the beginning of the new larger chunk. We then carry on
accreting characters to the end of the object as we normally would.
A special macro is provided to add a single char at a time to a
growing object. This allows the use of register variables, which
break the ordinary 'growth' macro.
Summary:
We allocate large chunks.
We carve out one object at a time from the current chunk.
Once carved, an object never moves.
We are free to append data of any size to the currently
growing object.
Exactly one object is growing in an obstack at any one time.
You can run one obstack per control block.
You may have as many control blocks as you dare.
Because of the way we do it, you can `unwind' an obstack
back to a previous state. (You may remove objects much
as you would with a stack.)
*/
/* Don't do the contents of this file more than once. */
#ifndef __OBSTACKS__
#define __OBSTACKS__
/* We use subtraction of (char *)0 instead of casting to int
because on word-addressable machines a simple cast to int
may ignore the byte-within-word field of the pointer. */
#ifndef __PTR_TO_INT
#define __PTR_TO_INT(P) ((P) - (char *)0)
#endif
#ifndef __INT_TO_PTR
#define __INT_TO_PTR(P) ((P) + (char *)0)
#endif
/* We need the type of the resulting object. In ANSI C it is ptrdiff_t
but in traditional C it is usually long. If we are in ANSI C and
don't already have ptrdiff_t get it. */
#if defined (__STDC__) && ! defined (offsetof)
#if defined (__GNUC__) && defined (IN_GCC)
/* On Next machine, the system's stddef.h screws up if included
after we have defined just ptrdiff_t, so include all of gstddef.h.
Otherwise, define just ptrdiff_t, which is all we need. */
#ifndef __NeXT__
#define __need_ptrdiff_t
#endif
/* While building GCC, the stddef.h that goes with GCC has this name. */
#include "gstddef.h"
#else
#include <stddef.h>
#endif
#endif
#ifdef __STDC__
#define PTR_INT_TYPE ptrdiff_t
#else
#define PTR_INT_TYPE long
#endif
struct _obstack_chunk /* Lives at front of each chunk. */
{
char *limit; /* 1 past end of this chunk */
struct _obstack_chunk *prev; /* address of prior chunk or NULL */
char contents[4]; /* objects begin here */
};
struct obstack /* control current object in current chunk */
{
long chunk_size; /* preferred size to allocate chunks in */
struct _obstack_chunk* chunk; /* address of current struct obstack_chunk */
char *object_base; /* address of object we are building */
char *next_free; /* where to add next char to current object */
char *chunk_limit; /* address of char after current chunk */
PTR_INT_TYPE temp; /* Temporary for some macros. */
int alignment_mask; /* Mask of alignment for each object. */
struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
void (*freefun) (); /* User's function to free a chunk. */
char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
unsigned maybe_empty_object:1;/* There is a possibility that the current
chunk contains a zero-length object. This
prevents freeing the chunk if we allocate
a bigger chunk to replace it. */
};
/* Declare the external functions we use; they are in obstack.c. */
#ifdef __STDC__
extern void _obstack_newchunk (struct obstack *, int);
extern void _obstack_free (struct obstack *, void *);
extern void _obstack_begin (struct obstack *, int, int,
void *(*) (), void (*) ());
extern void _obstack_begin_1 (struct obstack *, int, int,
void *(*) (), void (*) (), void *);
#else
extern void _obstack_newchunk ();
extern void _obstack_free ();
extern void _obstack_begin ();
extern void _obstack_begin_1 ();
#endif
#ifdef __STDC__
/* Do the function-declarations after the structs
but before defining the macros. */
void obstack_init (struct obstack *obstack);
void * obstack_alloc (struct obstack *obstack, int size);
void * obstack_copy (struct obstack *obstack, void *address, int size);
void * obstack_copy0 (struct obstack *obstack, void *address, int size);
void obstack_free (struct obstack *obstack, void *block);
void obstack_blank (struct obstack *obstack, int size);
void obstack_grow (struct obstack *obstack, void *data, int size);
void obstack_grow0 (struct obstack *obstack, void *data, int size);
void obstack_1grow (struct obstack *obstack, int data_char);
void obstack_ptr_grow (struct obstack *obstack, void *data);
void obstack_int_grow (struct obstack *obstack, int data);
void * obstack_finish (struct obstack *obstack);
int obstack_object_size (struct obstack *obstack);
int obstack_room (struct obstack *obstack);
void obstack_1grow_fast (struct obstack *obstack, int data_char);
void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
void obstack_int_grow_fast (struct obstack *obstack, int data);
void obstack_blank_fast (struct obstack *obstack, int size);
void * obstack_base (struct obstack *obstack);
void * obstack_next_free (struct obstack *obstack);
int obstack_alignment_mask (struct obstack *obstack);
int obstack_chunk_size (struct obstack *obstack);
#endif /* __STDC__ */
/* Non-ANSI C cannot really support alternative functions for these macros,
so we do not declare them. */
/* Pointer to beginning of object being allocated or to be allocated next.
Note that this might not be the final address of the object
because a new chunk might be needed to hold the final size. */
#define obstack_base(h) ((h)->object_base)
/* Size for allocating ordinary chunks. */
#define obstack_chunk_size(h) ((h)->chunk_size)
/* Pointer to next byte not yet allocated in current chunk. */
#define obstack_next_free(h) ((h)->next_free)
/* Mask specifying low bits that should be clear in address of an object. */
#define obstack_alignment_mask(h) ((h)->alignment_mask)
#define obstack_init(h) \
_obstack_begin ((h), 0, 0, \
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
#define obstack_begin(h, size) \
_obstack_begin ((h), (size), 0, \
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
_obstack_begin ((h), (size), (alignment), \
(void *(*) ()) (chunkfun), (void (*) ()) (freefun))
#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
_obstack_begin_1 ((h), (size), (alignment), \
(void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
#define obstack_blank_fast(h,n) ((h)->next_free += (n))
#if defined (__GNUC__) && defined (__STDC__)
#if __GNUC__ < 2 || defined(NeXT)
#define __extension__
#endif
/* For GNU C, if not -traditional,
we can define these macros to compute all args only once
without using a global variable.
Also, we can avoid using the `temp' slot, to make faster code. */
#define obstack_object_size(OBSTACK) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
(unsigned) (__o->next_free - __o->object_base); })
#define obstack_room(OBSTACK) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
(unsigned) (__o->chunk_limit - __o->next_free); })
/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
so that we can avoid having void expressions
in the arms of the conditional expression.
Casting the third operand to void was tried before,
but some compilers won't accept it. */
#define obstack_grow(OBSTACK,where,length) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
int __len = (length); \
((__o->next_free + __len > __o->chunk_limit) \
? (_obstack_newchunk (__o, __len), 0) : 0); \
bcopy (where, __o->next_free, __len); \
__o->next_free += __len; \
(void) 0; })
#define obstack_grow0(OBSTACK,where,length) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
int __len = (length); \
((__o->next_free + __len + 1 > __o->chunk_limit) \
? (_obstack_newchunk (__o, __len + 1), 0) : 0), \
bcopy (where, __o->next_free, __len), \
__o->next_free += __len, \
*(__o->next_free)++ = 0; \
(void) 0; })
#define obstack_1grow(OBSTACK,datum) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
((__o->next_free + 1 > __o->chunk_limit) \
? (_obstack_newchunk (__o, 1), 0) : 0), \
*(__o->next_free)++ = (datum); \
(void) 0; })
/* These assume that the obstack alignment is good enough for pointers or ints,
and that the data added so far to the current object
shares that much alignment. */
#define obstack_ptr_grow(OBSTACK,datum) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
((__o->next_free + sizeof (void *) > __o->chunk_limit) \
? (_obstack_newchunk (__o, sizeof (void *)), 0) : 0), \
*((void **)__o->next_free)++ = ((void *)datum); \
(void) 0; })
#define obstack_int_grow(OBSTACK,datum) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
((__o->next_free + sizeof (int) > __o->chunk_limit) \
? (_obstack_newchunk (__o, sizeof (int)), 0) : 0), \
*((int *)__o->next_free)++ = ((int)datum); \
(void) 0; })
#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr)
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
#define obstack_blank(OBSTACK,length) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
int __len = (length); \
((__o->chunk_limit - __o->next_free < __len) \
? (_obstack_newchunk (__o, __len), 0) : 0); \
__o->next_free += __len; \
(void) 0; })
#define obstack_alloc(OBSTACK,length) \
__extension__ \
({ struct obstack *__h = (OBSTACK); \
obstack_blank (__h, (length)); \
obstack_finish (__h); })
#define obstack_copy(OBSTACK,where,length) \
__extension__ \
({ struct obstack *__h = (OBSTACK); \
obstack_grow (__h, (where), (length)); \
obstack_finish (__h); })
#define obstack_copy0(OBSTACK,where,length) \
__extension__ \
({ struct obstack *__h = (OBSTACK); \
obstack_grow0 (__h, (where), (length)); \
obstack_finish (__h); })
/* The local variable is named __o1 to avoid a name conflict
when obstack_blank is called. */
#define obstack_finish(OBSTACK) \
__extension__ \
({ struct obstack *__o1 = (OBSTACK); \
void *value = (void *) __o1->object_base; \
if (__o1->next_free == value) \
__o1->maybe_empty_object = 1; \
__o1->next_free \
= __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
& ~ (__o1->alignment_mask)); \
((__o1->next_free - (char *)__o1->chunk \
> __o1->chunk_limit - (char *)__o1->chunk) \
? (__o1->next_free = __o1->chunk_limit) : 0); \
__o1->object_base = __o1->next_free; \
value; })
#define obstack_free(OBSTACK, OBJ) \
__extension__ \
({ struct obstack *__o = (OBSTACK); \
void *__obj = (OBJ); \
if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
__o->next_free = __o->object_base = __obj; \
else (obstack_free) (__o, __obj); })
#else /* not __GNUC__ or not __STDC__ */
#define obstack_object_size(h) \
(unsigned) ((h)->next_free - (h)->object_base)
#define obstack_room(h) \
(unsigned) ((h)->chunk_limit - (h)->next_free)
#define obstack_grow(h,where,length) \
( (h)->temp = (length), \
(((h)->next_free + (h)->temp > (h)->chunk_limit) \
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
bcopy (where, (h)->next_free, (h)->temp), \
(h)->next_free += (h)->temp)
#define obstack_grow0(h,where,length) \
( (h)->temp = (length), \
(((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
bcopy (where, (h)->next_free, (h)->temp), \
(h)->next_free += (h)->temp, \
*((h)->next_free)++ = 0)
#define obstack_1grow(h,datum) \
( (((h)->next_free + 1 > (h)->chunk_limit) \
? (_obstack_newchunk ((h), 1), 0) : 0), \
*((h)->next_free)++ = (datum))
#define obstack_ptr_grow(h,datum) \
( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
*((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum))
#define obstack_int_grow(h,datum) \
( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
*((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum))
#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr)
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
#define obstack_blank(h,length) \
( (h)->temp = (length), \
(((h)->chunk_limit - (h)->next_free < (h)->temp) \
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
(h)->next_free += (h)->temp)
#define obstack_alloc(h,length) \
(obstack_blank ((h), (length)), obstack_finish ((h)))
#define obstack_copy(h,where,length) \
(obstack_grow ((h), (where), (length)), obstack_finish ((h)))
#define obstack_copy0(h,where,length) \
(obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
#define obstack_finish(h) \
( ((h)->next_free == (h)->object_base \
? (((h)->maybe_empty_object = 1), 0) \
: 0), \
(h)->temp = __PTR_TO_INT ((h)->object_base), \
(h)->next_free \
= __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
& ~ ((h)->alignment_mask)), \
(((h)->next_free - (char *)(h)->chunk \
> (h)->chunk_limit - (char *)(h)->chunk) \
? ((h)->next_free = (h)->chunk_limit) : 0), \
(h)->object_base = (h)->next_free, \
__INT_TO_PTR ((h)->temp))
#ifdef __STDC__
#define obstack_free(h,obj) \
( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
? (int) ((h)->next_free = (h)->object_base \
= (h)->temp + (char *) (h)->chunk) \
: (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
#else
#define obstack_free(h,obj) \
( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
? (int) ((h)->next_free = (h)->object_base \
= (h)->temp + (char *) (h)->chunk) \
: (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0)))
#endif
#endif /* not __GNUC__ or not __STDC__ */
#endif /* not __OBSTACKS__ */

481
gnu/usr.bin/grep/search.c Normal file
View File

@ -0,0 +1,481 @@
/* search.c - searching subroutines using dfa, kwset and regex for grep.
Copyright (C) 1992 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
Written August 1992 by Mike Haertel. */
#include <ctype.h>
#ifdef STDC_HEADERS
#include <limits.h>
#include <stdlib.h>
#else
#define UCHAR_MAX 255
#include <sys/types.h>
extern char *malloc();
#endif
#ifdef HAVE_MEMCHR
#include <string.h>
#ifdef NEED_MEMORY_H
#include <memory.h>
#endif
#else
#ifdef __STDC__
extern void *memchr();
#else
extern char *memchr();
#endif
#endif
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#undef bcopy
#define bcopy(s, d, n) memcpy((d), (s), (n))
#endif
#ifdef isascii
#define ISALNUM(C) (isascii(C) && isalnum(C))
#define ISUPPER(C) (isascii(C) && isupper(C))
#else
#define ISALNUM(C) isalnum(C)
#define ISUPPER(C) isupper(C)
#endif
#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
#include "grep.h"
#include "dfa.h"
#include "kwset.h"
#include "regex.h"
#define NCHAR (UCHAR_MAX + 1)
#if __STDC__
static void Gcompile(char *, size_t);
static void Ecompile(char *, size_t);
static char *EGexecute(char *, size_t, char **);
static void Fcompile(char *, size_t);
static char *Fexecute(char *, size_t, char **);
#else
static void Gcompile();
static void Ecompile();
static char *EGexecute();
static void Fcompile();
static char *Fexecute();
#endif
/* Here is the matchers vector for the main program. */
struct matcher matchers[] = {
{ "default", Gcompile, EGexecute },
{ "grep", Gcompile, EGexecute },
{ "ggrep", Gcompile, EGexecute },
{ "egrep", Ecompile, EGexecute },
{ "posix-egrep", Ecompile, EGexecute },
{ "gegrep", Ecompile, EGexecute },
{ "fgrep", Fcompile, Fexecute },
{ "gfgrep", Fcompile, Fexecute },
{ 0, 0, 0 },
};
/* For -w, we also consider _ to be word constituent. */
#define WCHAR(C) (ISALNUM(C) || (C) == '_')
/* DFA compiled regexp. */
static struct dfa dfa;
/* Regex compiled regexp. */
static struct re_pattern_buffer regex;
/* KWset compiled pattern. For Ecompile and Gcompile, we compile
a list of strings, at least one of which is known to occur in
any string matching the regexp. */
static kwset_t kwset;
/* Last compiled fixed string known to exactly match the regexp.
If kwsexec() returns < lastexact, then we don't need to
call the regexp matcher at all. */
static int lastexact;
void
dfaerror(mesg)
char *mesg;
{
fatal(mesg, 0);
}
static void
kwsinit()
{
static char trans[NCHAR];
int i;
if (match_icase)
for (i = 0; i < NCHAR; ++i)
trans[i] = TOLOWER(i);
if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
fatal("memory exhausted", 0);
}
/* If the DFA turns out to have some set of fixed strings one of
which must occur in the match, then we build a kwset matcher
to find those strings, and thus quickly filter out impossible
matches. */
static void
kwsmusts()
{
struct dfamust *dm;
char *err;
if (dfa.musts)
{
kwsinit();
/* First, we compile in the substrings known to be exact
matches. The kwset matcher will return the index
of the matching string that it chooses. */
for (dm = dfa.musts; dm; dm = dm->next)
{
if (!dm->exact)
continue;
++lastexact;
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
fatal(err, 0);
}
/* Now, we compile the substrings that will require
the use of the regexp matcher. */
for (dm = dfa.musts; dm; dm = dm->next)
{
if (dm->exact)
continue;
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
fatal(err, 0);
}
if ((err = kwsprep(kwset)) != 0)
fatal(err, 0);
}
}
static void
Gcompile(pattern, size)
char *pattern;
size_t size;
{
#ifdef __STDC__
const
#endif
char *err;
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
fatal(err, 0);
dfainit(&dfa);
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
Then if DFA succeeds we do some hairy stuff using the regex matcher
to decide whether the match should really count. */
if (match_words || match_lines)
{
/* In the whole-word case, we use the pattern:
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
In the whole-line case, we use the pattern:
^(userpattern)$.
BUG: Using [A-Za-z_] is locale-dependent! */
char *n = malloc(size + 50);
int i = 0;
strcpy(n, "");
if (match_lines)
strcpy(n, "^\\(");
if (match_words)
strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
i = strlen(n);
bcopy(pattern, n + i, size);
i += size;
if (match_words)
strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)");
if (match_lines)
strcpy(n + i, "\\)$");
i += strlen(n + i);
dfacomp(n, i, &dfa, 1);
}
else
dfacomp(pattern, size, &dfa, 1);
kwsmusts();
}
static void
Ecompile(pattern, size)
char *pattern;
size_t size;
{
#ifdef __STDC__
const
#endif
char *err;
if (strcmp(matcher, "posix-egrep") == 0)
{
re_set_syntax(RE_SYNTAX_POSIX_EGREP);
dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
}
else
{
re_set_syntax(RE_SYNTAX_EGREP);
dfasyntax(RE_SYNTAX_EGREP, match_icase);
}
if ((err = re_compile_pattern(pattern, size, &regex)) != 0)
fatal(err, 0);
dfainit(&dfa);
/* In the match_words and match_lines cases, we use a different pattern
for the DFA matcher that will quickly throw out cases that won't work.
Then if DFA succeeds we do some hairy stuff using the regex matcher
to decide whether the match should really count. */
if (match_words || match_lines)
{
/* In the whole-word case, we use the pattern:
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
In the whole-line case, we use the pattern:
^(userpattern)$.
BUG: Using [A-Za-z_] is locale-dependent! */
char *n = malloc(size + 50);
int i = 0;
strcpy(n, "");
if (match_lines)
strcpy(n, "^(");
if (match_words)
strcpy(n, "(^|[^0-9A-Za-z_])(");
i = strlen(n);
bcopy(pattern, n + i, size);
i += size;
if (match_words)
strcpy(n + i, ")([^0-9A-Za-z_]|$)");
if (match_lines)
strcpy(n + i, ")$");
i += strlen(n + i);
dfacomp(n, i, &dfa, 1);
}
else
dfacomp(pattern, size, &dfa, 1);
kwsmusts();
}
static char *
EGexecute(buf, size, endp)
char *buf;
size_t size;
char **endp;
{
register char *buflim, *beg, *end, save;
int backref, start, len;
struct kwsmatch kwsm;
static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
Q@#%!# library interface in regex.c. */
buflim = buf + size;
for (beg = end = buf; end < buflim; beg = end + 1)
{
if (kwset)
{
/* Find a possible match using the KWset matcher. */
beg = kwsexec(kwset, beg, buflim - beg, &kwsm);
if (!beg)
goto failure;
/* Narrow down to the line containing the candidate, and
run it through DFA. */
end = memchr(beg, '\n', buflim - beg);
if (!end)
end = buflim;
while (beg > buf && beg[-1] != '\n')
--beg;
save = *end;
if (kwsm.index < lastexact)
goto success;
if (!dfaexec(&dfa, beg, end, 0, (int *) 0, &backref))
{
*end = save;
continue;
}
*end = save;
/* Successful, no backreferences encountered. */
if (!backref)
goto success;
}
else
{
/* No good fixed strings; start with DFA. */
save = *buflim;
beg = dfaexec(&dfa, beg, buflim, 0, (int *) 0, &backref);
*buflim = save;
if (!beg)
goto failure;
/* Narrow down to the line we've found. */
end = memchr(beg, '\n', buflim - beg);
if (!end)
end = buflim;
while (beg > buf && beg[-1] != '\n')
--beg;
/* Successful, no backreferences encountered! */
if (!backref)
goto success;
}
/* If we've made it to this point, this means DFA has seen
a probable match, and we need to run it through Regex. */
regex.not_eol = 0;
if ((start = re_search(&regex, beg, end - beg, 0, end - beg, &regs)) >= 0)
{
len = regs.end[0] - start;
if (!match_lines && !match_words || match_lines && len == end - beg)
goto success;
/* If -w, check if the match aligns with word boundaries.
We do this iteratively because:
(a) the line may contain more than one occurence of the pattern, and
(b) Several alternatives in the pattern might be valid at a given
point, and we may need to consider a shorter one to find a word
boundary. */
if (match_words)
while (start >= 0)
{
if ((start == 0 || !WCHAR(beg[start - 1]))
&& (len == end - beg || !WCHAR(beg[start + len])))
goto success;
if (len > 0)
{
/* Try a shorter length anchored at the same place. */
--len;
regex.not_eol = 1;
len = re_match(&regex, beg, start + len, start, &regs);
}
if (len <= 0)
{
/* Try looking further on. */
if (start == end - beg)
break;
++start;
regex.not_eol = 0;
start = re_search(&regex, beg, end - beg,
start, end - beg - start, &regs);
len = regs.end[0] - start;
}
}
}
}
failure:
return 0;
success:
*endp = end < buflim ? end + 1 : end;
return beg;
}
static void
Fcompile(pattern, size)
char *pattern;
size_t size;
{
char *beg, *lim, *err;
kwsinit();
beg = pattern;
do
{
for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
;
if ((err = kwsincr(kwset, beg, lim - beg)) != 0)
fatal(err, 0);
if (lim < pattern + size)
++lim;
beg = lim;
}
while (beg < pattern + size);
if ((err = kwsprep(kwset)) != 0)
fatal(err, 0);
}
static char *
Fexecute(buf, size, endp)
char *buf;
size_t size;
char **endp;
{
register char *beg, *try, *end;
register size_t len;
struct kwsmatch kwsmatch;
for (beg = buf; beg <= buf + size; ++beg)
{
if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch)))
return 0;
len = kwsmatch.size[0];
if (match_lines)
{
if (beg > buf && beg[-1] != '\n')
continue;
if (beg + len < buf + size && beg[len] != '\n')
continue;
goto success;
}
else if (match_words)
for (try = beg; len && try;)
{
if (try > buf && WCHAR((unsigned char) try[-1]))
break;
if (try + len < buf + size && WCHAR((unsigned char) try[len]))
{
try = kwsexec(kwset, beg, --len, &kwsmatch);
len = kwsmatch.size[0];
}
else
goto success;
}
else
goto success;
}
return 0;
success:
if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0)
++end;
else
end = buf + size;
*endp = end;
while (beg > buf && beg[-1] != '\n')
--beg;
return beg;
}