This commit was manufactured by cvs2svn to create branch 'FSF'.
This commit is contained in:
commit
de798f3fde
29
gnu/usr.bin/grep/AUTHORS
Normal file
29
gnu/usr.bin/grep/AUTHORS
Normal file
@ -0,0 +1,29 @@
|
||||
Mike Haertel wrote the main program and the dfa and kwset matchers.
|
||||
|
||||
Arthur David Olson contributed the heuristics for finding fixed substrings
|
||||
at the end of dfa.c.
|
||||
|
||||
Richard Stallman and Karl Berry wrote the regex backtracking matcher.
|
||||
|
||||
Henry Spencer wrote the original test suite from which grep's was derived.
|
||||
|
||||
Scott Anderson invented the Khadafy test.
|
||||
|
||||
David MacKenzie wrote the automatic configuration software use to
|
||||
produce the configure script.
|
||||
|
||||
Authors of the replacements for standard library routines are identified
|
||||
in the corresponding source files.
|
||||
|
||||
The idea of using Boyer-Moore type algorithms to quickly filter out
|
||||
non-matching text before calling the regexp matcher was originally due
|
||||
to James Woods. He also contributed some code to early versions of
|
||||
GNU grep.
|
||||
|
||||
Finally, I would like to thank Andrew Hume for many fascinating discussions
|
||||
of string searching issues over the years. Hume & Sunday's excellent
|
||||
paper on fast string searching (AT&T Bell Laboratories CSTR #156)
|
||||
describes some of the history of the subject, as well as providing
|
||||
exhaustive performance analysis of various implementation alternatives.
|
||||
The inner loop of GNU grep is similar to Hume & Sunday's recommended
|
||||
"Tuned Boyer Moore" inner loop.
|
35
gnu/usr.bin/grep/NEWS
Normal file
35
gnu/usr.bin/grep/NEWS
Normal file
@ -0,0 +1,35 @@
|
||||
Version 2.0:
|
||||
|
||||
The most important user visible change is that egrep and fgrep have
|
||||
disappeared as separate programs into the single grep program mandated
|
||||
by POSIX 1003.2. New options -G, -E, and -F have been added,
|
||||
selecting grep, egrep, and fgrep behavior respectively. For
|
||||
compatibility with historical practice, hard links named egrep and
|
||||
fgrep are also provided. See the manual page for details.
|
||||
|
||||
In addition, the regular expression facilities described in Posix
|
||||
draft 11.2 are now supported, except for internationalization features
|
||||
related to locale-dependent collating sequence information.
|
||||
|
||||
There is a new option, -L, which is like -l except it lists
|
||||
files which don't contain matches. The reason this option was
|
||||
added is because '-l -v' doesn't do what you expect.
|
||||
|
||||
Performance has been improved; the amount of improvement is platform
|
||||
dependent, but (for example) grep 2.0 typically runs at least 30% faster
|
||||
than grep 1.6 on a DECstation using the MIPS compiler. Where possible,
|
||||
grep now uses mmap() for file input; on a Sun 4 running SunOS 4.1 this
|
||||
may cut system time by as much as half, for a total reduction in running
|
||||
time by nearly 50%. On machines that don't use mmap(), the buffering
|
||||
code has been rewritten to choose more favorable alignments and buffer
|
||||
sizes for read().
|
||||
|
||||
Portability has been substantially cleaned up, and an automatic
|
||||
configure script is now provided.
|
||||
|
||||
The internals have changed in ways too numerous to mention.
|
||||
People brave enough to reuse the DFA matcher in other programs
|
||||
will now have their bravery amply "rewarded", for the interface
|
||||
to that file has been completely changed. Some changes were
|
||||
necessary to track the evolution of the regex package, and since
|
||||
I was changing it anyway I decided to do a general cleanup.
|
42
gnu/usr.bin/grep/getpagesize.h
Normal file
42
gnu/usr.bin/grep/getpagesize.h
Normal file
@ -0,0 +1,42 @@
|
||||
#ifdef BSD
|
||||
#ifndef BSD4_1
|
||||
#define HAVE_GETPAGESIZE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_GETPAGESIZE
|
||||
|
||||
#ifdef VMS
|
||||
#define getpagesize() 512
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef _SC_PAGESIZE
|
||||
#define getpagesize() sysconf(_SC_PAGESIZE)
|
||||
#else
|
||||
|
||||
#ifdef HAVE_SYS_PARAM_H
|
||||
#include <sys/param.h>
|
||||
|
||||
#ifdef EXEC_PAGESIZE
|
||||
#define getpagesize() EXEC_PAGESIZE
|
||||
#else
|
||||
#ifdef NBPG
|
||||
#define getpagesize() NBPG * CLSIZE
|
||||
#ifndef CLSIZE
|
||||
#define CLSIZE 1
|
||||
#endif /* no CLSIZE */
|
||||
#else /* no NBPG */
|
||||
#define getpagesize() NBPC
|
||||
#endif /* no NBPG */
|
||||
#endif /* no EXEC_PAGESIZE */
|
||||
#else /* !HAVE_SYS_PARAM_H */
|
||||
#define getpagesize() 8192 /* punt totally */
|
||||
#endif /* !HAVE_SYS_PARAM_H */
|
||||
#endif /* no _SC_PAGESIZE */
|
||||
|
||||
#endif /* not HAVE_GETPAGESIZE */
|
||||
|
53
gnu/usr.bin/grep/grep.h
Normal file
53
gnu/usr.bin/grep/grep.h
Normal file
@ -0,0 +1,53 @@
|
||||
/* grep.h - interface to grep driver for searching subroutines.
|
||||
Copyright (C) 1992 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#if __STDC__
|
||||
|
||||
extern void fatal(const char *, int);
|
||||
|
||||
/* Grep.c expects the matchers vector to be terminated
|
||||
by an entry with a NULL name, and to contain at least
|
||||
an entry named "default". */
|
||||
|
||||
extern struct matcher
|
||||
{
|
||||
char *name;
|
||||
void (*compile)(char *, size_t);
|
||||
char *(*execute)(char *, size_t, char **);
|
||||
} matchers[];
|
||||
|
||||
#else
|
||||
|
||||
extern void fatal();
|
||||
|
||||
extern struct matcher
|
||||
{
|
||||
char *name;
|
||||
void (*compile)();
|
||||
char *(*execute)();
|
||||
} matchers[];
|
||||
|
||||
#endif
|
||||
|
||||
/* Exported from grep.c. */
|
||||
extern char *matcher;
|
||||
|
||||
/* The following flags are exported from grep for the matchers
|
||||
to look at. */
|
||||
extern int match_icase; /* -i */
|
||||
extern int match_words; /* -w */
|
||||
extern int match_lines; /* -x */
|
805
gnu/usr.bin/grep/kwset.c
Normal file
805
gnu/usr.bin/grep/kwset.c
Normal file
@ -0,0 +1,805 @@
|
||||
/* kwset.c - search for any of a set of keywords.
|
||||
Copyright 1989 Free Software Foundation
|
||||
Written August 1989 by Mike Haertel.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
The author may be reached (Email) at the address mike@ai.mit.edu,
|
||||
or (US mail) as Mike Haertel c/o Free Software Foundation. */
|
||||
|
||||
/* The algorithm implemented by these routines bears a startling resemblence
|
||||
to one discovered by Beate Commentz-Walter, although it is not identical.
|
||||
See "A String Matching Algorithm Fast on the Average," Technical Report,
|
||||
IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
|
||||
Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
|
||||
String Matching: An Aid to Bibliographic Search," CACM June 1975,
|
||||
Vol. 18, No. 6, which describes the failure function used below. */
|
||||
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#else
|
||||
#define INT_MAX 2147483647
|
||||
#define UCHAR_MAX 255
|
||||
#ifdef __STDC__
|
||||
#include <stddef.h>
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
extern char *malloc();
|
||||
extern void free();
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMCHR
|
||||
#include <string.h>
|
||||
#ifdef NEED_MEMORY_H
|
||||
#include <memory.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef __STDC__
|
||||
extern void *memchr();
|
||||
#else
|
||||
extern char *memchr();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef GREP
|
||||
extern char *xmalloc();
|
||||
#define malloc xmalloc
|
||||
#endif
|
||||
|
||||
#include "kwset.h"
|
||||
#include "obstack.h"
|
||||
|
||||
#define NCHAR (UCHAR_MAX + 1)
|
||||
#define obstack_chunk_alloc malloc
|
||||
#define obstack_chunk_free free
|
||||
|
||||
/* Balanced tree of edges and labels leaving a given trie node. */
|
||||
struct tree
|
||||
{
|
||||
struct tree *llink; /* Left link; MUST be first field. */
|
||||
struct tree *rlink; /* Right link (to larger labels). */
|
||||
struct trie *trie; /* Trie node pointed to by this edge. */
|
||||
unsigned char label; /* Label on this edge. */
|
||||
char balance; /* Difference in depths of subtrees. */
|
||||
};
|
||||
|
||||
/* Node of a trie representing a set of reversed keywords. */
|
||||
struct trie
|
||||
{
|
||||
unsigned int accepting; /* Word index of accepted word, or zero. */
|
||||
struct tree *links; /* Tree of edges leaving this node. */
|
||||
struct trie *parent; /* Parent of this node. */
|
||||
struct trie *next; /* List of all trie nodes in level order. */
|
||||
struct trie *fail; /* Aho-Corasick failure function. */
|
||||
int depth; /* Depth of this node from the root. */
|
||||
int shift; /* Shift function for search failures. */
|
||||
int maxshift; /* Max shift of self and descendents. */
|
||||
};
|
||||
|
||||
/* Structure returned opaquely to the caller, containing everything. */
|
||||
struct kwset
|
||||
{
|
||||
struct obstack obstack; /* Obstack for node allocation. */
|
||||
int words; /* Number of words in the trie. */
|
||||
struct trie *trie; /* The trie itself. */
|
||||
int mind; /* Minimum depth of an accepting node. */
|
||||
int maxd; /* Maximum depth of any node. */
|
||||
unsigned char delta[NCHAR]; /* Delta table for rapid search. */
|
||||
struct trie *next[NCHAR]; /* Table of children of the root. */
|
||||
char *target; /* Target string if there's only one. */
|
||||
int mind2; /* Used in Boyer-Moore search for one string. */
|
||||
char *trans; /* Character translation table. */
|
||||
};
|
||||
|
||||
/* Allocate and initialize a keyword set object, returning an opaque
|
||||
pointer to it. Return NULL if memory is not available. */
|
||||
kwset_t
|
||||
kwsalloc(trans)
|
||||
char *trans;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
|
||||
kwset = (struct kwset *) malloc(sizeof (struct kwset));
|
||||
if (!kwset)
|
||||
return 0;
|
||||
|
||||
obstack_init(&kwset->obstack);
|
||||
kwset->words = 0;
|
||||
kwset->trie
|
||||
= (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
|
||||
if (!kwset->trie)
|
||||
{
|
||||
kwsfree((kwset_t) kwset);
|
||||
return 0;
|
||||
}
|
||||
kwset->trie->accepting = 0;
|
||||
kwset->trie->links = 0;
|
||||
kwset->trie->parent = 0;
|
||||
kwset->trie->next = 0;
|
||||
kwset->trie->fail = 0;
|
||||
kwset->trie->depth = 0;
|
||||
kwset->trie->shift = 0;
|
||||
kwset->mind = INT_MAX;
|
||||
kwset->maxd = -1;
|
||||
kwset->target = 0;
|
||||
kwset->trans = trans;
|
||||
|
||||
return (kwset_t) kwset;
|
||||
}
|
||||
|
||||
/* Add the given string to the contents of the keyword set. Return NULL
|
||||
for success, an error message otherwise. */
|
||||
char *
|
||||
kwsincr(kws, text, len)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t len;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
register struct trie *trie;
|
||||
register unsigned char label;
|
||||
register struct tree *link;
|
||||
register int depth;
|
||||
struct tree *links[12];
|
||||
enum { L, R } dirs[12];
|
||||
struct tree *t, *r, *l, *rl, *lr;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
trie = kwset->trie;
|
||||
text += len;
|
||||
|
||||
/* Descend the trie (built of reversed keywords) character-by-character,
|
||||
installing new nodes when necessary. */
|
||||
while (len--)
|
||||
{
|
||||
label = kwset->trans ? kwset->trans[(unsigned char) *--text] : *--text;
|
||||
|
||||
/* Descend the tree of outgoing links for this trie node,
|
||||
looking for the current character and keeping track
|
||||
of the path followed. */
|
||||
link = trie->links;
|
||||
links[0] = (struct tree *) &trie->links;
|
||||
dirs[0] = L;
|
||||
depth = 1;
|
||||
|
||||
while (link && label != link->label)
|
||||
{
|
||||
links[depth] = link;
|
||||
if (label < link->label)
|
||||
dirs[depth++] = L, link = link->llink;
|
||||
else
|
||||
dirs[depth++] = R, link = link->rlink;
|
||||
}
|
||||
|
||||
/* The current character doesn't have an outgoing link at
|
||||
this trie node, so build a new trie node and install
|
||||
a link in the current trie node's tree. */
|
||||
if (!link)
|
||||
{
|
||||
link = (struct tree *) obstack_alloc(&kwset->obstack,
|
||||
sizeof (struct tree));
|
||||
if (!link)
|
||||
return "memory exhausted";
|
||||
link->llink = 0;
|
||||
link->rlink = 0;
|
||||
link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
|
||||
sizeof (struct trie));
|
||||
if (!link->trie)
|
||||
return "memory exhausted";
|
||||
link->trie->accepting = 0;
|
||||
link->trie->links = 0;
|
||||
link->trie->parent = trie;
|
||||
link->trie->next = 0;
|
||||
link->trie->fail = 0;
|
||||
link->trie->depth = trie->depth + 1;
|
||||
link->trie->shift = 0;
|
||||
link->label = label;
|
||||
link->balance = 0;
|
||||
|
||||
/* Install the new tree node in its parent. */
|
||||
if (dirs[--depth] == L)
|
||||
links[depth]->llink = link;
|
||||
else
|
||||
links[depth]->rlink = link;
|
||||
|
||||
/* Back up the tree fixing the balance flags. */
|
||||
while (depth && !links[depth]->balance)
|
||||
{
|
||||
if (dirs[depth] == L)
|
||||
--links[depth]->balance;
|
||||
else
|
||||
++links[depth]->balance;
|
||||
--depth;
|
||||
}
|
||||
|
||||
/* Rebalance the tree by pointer rotations if necessary. */
|
||||
if (depth && ((dirs[depth] == L && --links[depth]->balance)
|
||||
|| (dirs[depth] == R && ++links[depth]->balance)))
|
||||
{
|
||||
switch (links[depth]->balance)
|
||||
{
|
||||
case (char) -2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case L:
|
||||
r = links[depth], t = r->llink, rl = t->rlink;
|
||||
t->rlink = r, r->llink = rl;
|
||||
t->balance = r->balance = 0;
|
||||
break;
|
||||
case R:
|
||||
r = links[depth], l = r->llink, t = l->rlink;
|
||||
rl = t->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (dirs[depth + 1])
|
||||
{
|
||||
case R:
|
||||
l = links[depth], t = l->rlink, lr = t->llink;
|
||||
t->llink = l, l->rlink = lr;
|
||||
t->balance = l->balance = 0;
|
||||
break;
|
||||
case L:
|
||||
l = links[depth], r = l->rlink, t = r->llink;
|
||||
lr = t->llink, rl = t->rlink;
|
||||
t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
|
||||
l->balance = t->balance != 1 ? 0 : -1;
|
||||
r->balance = t->balance != (char) -1 ? 0 : 1;
|
||||
t->balance = 0;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (dirs[depth - 1] == L)
|
||||
links[depth - 1]->llink = t;
|
||||
else
|
||||
links[depth - 1]->rlink = t;
|
||||
}
|
||||
}
|
||||
|
||||
trie = link->trie;
|
||||
}
|
||||
|
||||
/* Mark the node we finally reached as accepting, encoding the
|
||||
index number of this word in the keyword set so far. */
|
||||
if (!trie->accepting)
|
||||
trie->accepting = 1 + 2 * kwset->words;
|
||||
++kwset->words;
|
||||
|
||||
/* Keep track of the longest and shortest string of the keyword set. */
|
||||
if (trie->depth < kwset->mind)
|
||||
kwset->mind = trie->depth;
|
||||
if (trie->depth > kwset->maxd)
|
||||
kwset->maxd = trie->depth;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Enqueue the trie nodes referenced from the given tree in the
|
||||
given queue. */
|
||||
static void
|
||||
enqueue(tree, last)
|
||||
struct tree *tree;
|
||||
struct trie **last;
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
enqueue(tree->llink, last);
|
||||
enqueue(tree->rlink, last);
|
||||
(*last) = (*last)->next = tree->trie;
|
||||
}
|
||||
|
||||
/* Compute the Aho-Corasick failure function for the trie nodes referenced
|
||||
from the given tree, given the failure function for their parent as
|
||||
well as a last resort failure node. */
|
||||
static void
|
||||
treefails(tree, fail, recourse)
|
||||
register struct tree *tree;
|
||||
struct trie *fail;
|
||||
struct trie *recourse;
|
||||
{
|
||||
register struct tree *link;
|
||||
|
||||
if (!tree)
|
||||
return;
|
||||
|
||||
treefails(tree->llink, fail, recourse);
|
||||
treefails(tree->rlink, fail, recourse);
|
||||
|
||||
/* Find, in the chain of fails going back to the root, the first
|
||||
node that has a descendent on the current label. */
|
||||
while (fail)
|
||||
{
|
||||
link = fail->links;
|
||||
while (link && tree->label != link->label)
|
||||
if (tree->label < link->label)
|
||||
link = link->llink;
|
||||
else
|
||||
link = link->rlink;
|
||||
if (link)
|
||||
{
|
||||
tree->trie->fail = link->trie;
|
||||
return;
|
||||
}
|
||||
fail = fail->fail;
|
||||
}
|
||||
|
||||
tree->trie->fail = recourse;
|
||||
}
|
||||
|
||||
/* Set delta entries for the links of the given tree such that
|
||||
the preexisting delta value is larger than the current depth. */
|
||||
static void
|
||||
treedelta(tree, depth, delta)
|
||||
register struct tree *tree;
|
||||
register unsigned int depth;
|
||||
unsigned char delta[];
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treedelta(tree->llink, depth, delta);
|
||||
treedelta(tree->rlink, depth, delta);
|
||||
if (depth < delta[tree->label])
|
||||
delta[tree->label] = depth;
|
||||
}
|
||||
|
||||
/* Return true if A has every label in B. */
|
||||
static int
|
||||
hasevery(a, b)
|
||||
register struct tree *a;
|
||||
register struct tree *b;
|
||||
{
|
||||
if (!b)
|
||||
return 1;
|
||||
if (!hasevery(a, b->llink))
|
||||
return 0;
|
||||
if (!hasevery(a, b->rlink))
|
||||
return 0;
|
||||
while (a && b->label != a->label)
|
||||
if (b->label < a->label)
|
||||
a = a->llink;
|
||||
else
|
||||
a = a->rlink;
|
||||
return !!a;
|
||||
}
|
||||
|
||||
/* Compute a vector, indexed by character code, of the trie nodes
|
||||
referenced from the given tree. */
|
||||
static void
|
||||
treenext(tree, next)
|
||||
struct tree *tree;
|
||||
struct trie *next[];
|
||||
{
|
||||
if (!tree)
|
||||
return;
|
||||
treenext(tree->llink, next);
|
||||
treenext(tree->rlink, next);
|
||||
next[tree->label] = tree->trie;
|
||||
}
|
||||
|
||||
/* Compute the shift for each trie node, as well as the delta
|
||||
table and next cache for the given keyword set. */
|
||||
char *
|
||||
kwsprep(kws)
|
||||
kwset_t kws;
|
||||
{
|
||||
register struct kwset *kwset;
|
||||
register int i;
|
||||
register struct trie *curr, *fail;
|
||||
register char *trans;
|
||||
unsigned char delta[NCHAR];
|
||||
struct trie *last, *next[NCHAR];
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
|
||||
/* Initial values for the delta table; will be changed later. The
|
||||
delta entry for a given character is the smallest depth of any
|
||||
node at which an outgoing edge is labeled by that character. */
|
||||
if (kwset->mind < 256)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
delta[i] = kwset->mind;
|
||||
else
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
delta[i] = 255;
|
||||
|
||||
/* Check if we can use the simple boyer-moore algorithm, instead
|
||||
of the hairy commentz-walter algorithm. */
|
||||
if (kwset->words == 1 && kwset->trans == 0)
|
||||
{
|
||||
/* Looking for just one string. Extract it from the trie. */
|
||||
kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
|
||||
for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
|
||||
{
|
||||
kwset->target[i] = curr->links->label;
|
||||
curr = curr->links->trie;
|
||||
}
|
||||
/* Build the Boyer Moore delta. Boy that's easy compared to CW. */
|
||||
for (i = 0; i < kwset->mind; ++i)
|
||||
delta[(unsigned char) kwset->target[i]] = kwset->mind - (i + 1);
|
||||
kwset->mind2 = kwset->mind;
|
||||
/* Find the minimal delta2 shift that we might make after
|
||||
a backwards match has failed. */
|
||||
for (i = 0; i < kwset->mind - 1; ++i)
|
||||
if (kwset->target[i] == kwset->target[kwset->mind - 1])
|
||||
kwset->mind2 = kwset->mind - (i + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Traverse the nodes of the trie in level order, simultaneously
|
||||
computing the delta table, failure function, and shift function. */
|
||||
for (curr = last = kwset->trie; curr; curr = curr->next)
|
||||
{
|
||||
/* Enqueue the immediate descendents in the level order queue. */
|
||||
enqueue(curr->links, &last);
|
||||
|
||||
curr->shift = kwset->mind;
|
||||
curr->maxshift = kwset->mind;
|
||||
|
||||
/* Update the delta table for the descendents of this node. */
|
||||
treedelta(curr->links, curr->depth, delta);
|
||||
|
||||
/* Compute the failure function for the decendents of this node. */
|
||||
treefails(curr->links, curr->fail, kwset->trie);
|
||||
|
||||
/* Update the shifts at each node in the current node's chain
|
||||
of fails back to the root. */
|
||||
for (fail = curr->fail; fail; fail = fail->fail)
|
||||
{
|
||||
/* If the current node has some outgoing edge that the fail
|
||||
doesn't, then the shift at the fail should be no larger
|
||||
than the difference of their depths. */
|
||||
if (!hasevery(fail->links, curr->links))
|
||||
if (curr->depth - fail->depth < fail->shift)
|
||||
fail->shift = curr->depth - fail->depth;
|
||||
|
||||
/* If the current node is accepting then the shift at the
|
||||
fail and its descendents should be no larger than the
|
||||
difference of their depths. */
|
||||
if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
|
||||
fail->maxshift = curr->depth - fail->depth;
|
||||
}
|
||||
}
|
||||
|
||||
/* Traverse the trie in level order again, fixing up all nodes whose
|
||||
shift exceeds their inherited maxshift. */
|
||||
for (curr = kwset->trie->next; curr; curr = curr->next)
|
||||
{
|
||||
if (curr->maxshift > curr->parent->maxshift)
|
||||
curr->maxshift = curr->parent->maxshift;
|
||||
if (curr->shift > curr->maxshift)
|
||||
curr->shift = curr->maxshift;
|
||||
}
|
||||
|
||||
/* Create a vector, indexed by character code, of the outgoing links
|
||||
from the root node. */
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
next[i] = 0;
|
||||
treenext(kwset->trie->links, next);
|
||||
|
||||
if ((trans = kwset->trans) != 0)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->next[i] = next[(unsigned char) trans[i]];
|
||||
else
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->next[i] = next[i];
|
||||
}
|
||||
|
||||
/* Fix things up for any translation table. */
|
||||
if ((trans = kwset->trans) != 0)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->delta[i] = delta[(unsigned char) trans[i]];
|
||||
else
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
kwset->delta[i] = delta[i];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define U(C) ((unsigned char) (C))
|
||||
|
||||
/* Fast boyer-moore search. */
|
||||
static char *
|
||||
bmexec(kws, text, size)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t size;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
register unsigned char *d1;
|
||||
register char *ep, *sp, *tp;
|
||||
register int d, gc, i, len, md2;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
len = kwset->mind;
|
||||
|
||||
if (len == 0)
|
||||
return text;
|
||||
if (len > size)
|
||||
return 0;
|
||||
if (len == 1)
|
||||
return memchr(text, kwset->target[0], size);
|
||||
|
||||
d1 = kwset->delta;
|
||||
sp = kwset->target + len;
|
||||
gc = U(sp[-2]);
|
||||
md2 = kwset->mind2;
|
||||
tp = text + len;
|
||||
|
||||
/* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
|
||||
if (size > 12 * len)
|
||||
/* 11 is not a bug, the initial offset happens only once. */
|
||||
for (ep = text + size - 11 * len;;)
|
||||
{
|
||||
while (tp <= ep)
|
||||
{
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d == 0)
|
||||
goto found;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d == 0)
|
||||
goto found;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
if (d == 0)
|
||||
goto found;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
d = d1[U(tp[-1])], tp += d;
|
||||
}
|
||||
break;
|
||||
found:
|
||||
if (U(tp[-2]) == gc)
|
||||
{
|
||||
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
|
||||
;
|
||||
if (i > len)
|
||||
return tp - len;
|
||||
}
|
||||
tp += md2;
|
||||
}
|
||||
|
||||
/* Now we have only a few characters left to search. We
|
||||
carefully avoid ever producing an out-of-bounds pointer. */
|
||||
ep = text + size;
|
||||
d = d1[U(tp[-1])];
|
||||
while (d <= ep - tp)
|
||||
{
|
||||
d = d1[U((tp += d)[-1])];
|
||||
if (d != 0)
|
||||
continue;
|
||||
if (tp[-2] == gc)
|
||||
{
|
||||
for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
|
||||
;
|
||||
if (i > len)
|
||||
return tp - len;
|
||||
}
|
||||
d = md2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Hairy multiple string search. */
|
||||
static char *
|
||||
cwexec(kws, text, len, kwsmatch)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t len;
|
||||
struct kwsmatch *kwsmatch;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
struct trie **next, *trie, *accept;
|
||||
char *beg, *lim, *mch, *lmch;
|
||||
register unsigned char c, *delta;
|
||||
register int d;
|
||||
register char *end, *qlim;
|
||||
register struct tree *tree;
|
||||
register char *trans;
|
||||
|
||||
/* Initialize register copies and look for easy ways out. */
|
||||
kwset = (struct kwset *) kws;
|
||||
if (len < kwset->mind)
|
||||
return 0;
|
||||
next = kwset->next;
|
||||
delta = kwset->delta;
|
||||
trans = kwset->trans;
|
||||
lim = text + len;
|
||||
end = text;
|
||||
if ((d = kwset->mind) != 0)
|
||||
mch = 0;
|
||||
else
|
||||
{
|
||||
mch = text, accept = kwset->trie;
|
||||
goto match;
|
||||
}
|
||||
|
||||
if (len >= 4 * kwset->mind)
|
||||
qlim = lim - 4 * kwset->mind;
|
||||
else
|
||||
qlim = 0;
|
||||
|
||||
while (lim - end >= d)
|
||||
{
|
||||
if (qlim && end <= qlim)
|
||||
{
|
||||
end += d - 1;
|
||||
while ((d = delta[c = *end]) && end < qlim)
|
||||
{
|
||||
end += d;
|
||||
end += delta[(unsigned char) *end];
|
||||
end += delta[(unsigned char) *end];
|
||||
}
|
||||
++end;
|
||||
}
|
||||
else
|
||||
d = delta[c = (end += d)[-1]];
|
||||
if (d)
|
||||
continue;
|
||||
beg = end - 1;
|
||||
trie = next[c];
|
||||
if (trie->accepting)
|
||||
{
|
||||
mch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
d = trie->shift;
|
||||
while (beg > text)
|
||||
{
|
||||
c = trans ? trans[(unsigned char) *--beg] : *--beg;
|
||||
tree = trie->links;
|
||||
while (tree && c != tree->label)
|
||||
if (c < tree->label)
|
||||
tree = tree->llink;
|
||||
else
|
||||
tree = tree->rlink;
|
||||
if (tree)
|
||||
{
|
||||
trie = tree->trie;
|
||||
if (trie->accepting)
|
||||
{
|
||||
mch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
d = trie->shift;
|
||||
}
|
||||
if (mch)
|
||||
goto match;
|
||||
}
|
||||
return 0;
|
||||
|
||||
match:
|
||||
/* Given a known match, find the longest possible match anchored
|
||||
at or before its starting point. This is nearly a verbatim
|
||||
copy of the preceding main search loops. */
|
||||
if (lim - mch > kwset->maxd)
|
||||
lim = mch + kwset->maxd;
|
||||
lmch = 0;
|
||||
d = 1;
|
||||
while (lim - end >= d)
|
||||
{
|
||||
if ((d = delta[c = (end += d)[-1]]) != 0)
|
||||
continue;
|
||||
beg = end - 1;
|
||||
if (!(trie = next[c]))
|
||||
{
|
||||
d = 1;
|
||||
continue;
|
||||
}
|
||||
if (trie->accepting && beg <= mch)
|
||||
{
|
||||
lmch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
d = trie->shift;
|
||||
while (beg > text)
|
||||
{
|
||||
c = trans ? trans[(unsigned char) *--beg] : *--beg;
|
||||
tree = trie->links;
|
||||
while (tree && c != tree->label)
|
||||
if (c < tree->label)
|
||||
tree = tree->llink;
|
||||
else
|
||||
tree = tree->rlink;
|
||||
if (tree)
|
||||
{
|
||||
trie = tree->trie;
|
||||
if (trie->accepting && beg <= mch)
|
||||
{
|
||||
lmch = beg;
|
||||
accept = trie;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
d = trie->shift;
|
||||
}
|
||||
if (lmch)
|
||||
{
|
||||
mch = lmch;
|
||||
goto match;
|
||||
}
|
||||
if (!d)
|
||||
d = 1;
|
||||
}
|
||||
|
||||
if (kwsmatch)
|
||||
{
|
||||
kwsmatch->index = accept->accepting / 2;
|
||||
kwsmatch->beg[0] = mch;
|
||||
kwsmatch->size[0] = accept->depth;
|
||||
}
|
||||
return mch;
|
||||
}
|
||||
|
||||
/* Search through the given text for a match of any member of the
|
||||
given keyword set. Return a pointer to the first character of
|
||||
the matching substring, or NULL if no match is found. If FOUNDLEN
|
||||
is non-NULL store in the referenced location the length of the
|
||||
matching substring. Similarly, if FOUNDIDX is non-NULL, store
|
||||
in the referenced location the index number of the particular
|
||||
keyword matched. */
|
||||
char *
|
||||
kwsexec(kws, text, size, kwsmatch)
|
||||
kwset_t kws;
|
||||
char *text;
|
||||
size_t size;
|
||||
struct kwsmatch *kwsmatch;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
char *ret;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
if (kwset->words == 1 && kwset->trans == 0)
|
||||
{
|
||||
ret = bmexec(kws, text, size);
|
||||
if (kwsmatch != 0 && ret != 0)
|
||||
{
|
||||
kwsmatch->index = 0;
|
||||
kwsmatch->beg[0] = ret;
|
||||
kwsmatch->size[0] = kwset->mind;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
return cwexec(kws, text, size, kwsmatch);
|
||||
}
|
||||
|
||||
/* Free the components of the given keyword set. */
|
||||
void
|
||||
kwsfree(kws)
|
||||
kwset_t kws;
|
||||
{
|
||||
struct kwset *kwset;
|
||||
|
||||
kwset = (struct kwset *) kws;
|
||||
obstack_free(&kwset->obstack, 0);
|
||||
free(kws);
|
||||
}
|
69
gnu/usr.bin/grep/kwset.h
Normal file
69
gnu/usr.bin/grep/kwset.h
Normal file
@ -0,0 +1,69 @@
|
||||
/* kwset.h - header declaring the keyword set library.
|
||||
Copyright 1989 Free Software Foundation
|
||||
Written August 1989 by Mike Haertel.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 1, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
The author may be reached (Email) at the address mike@ai.mit.edu,
|
||||
or (US mail) as Mike Haertel c/o Free Software Foundation. */
|
||||
|
||||
struct kwsmatch
|
||||
{
|
||||
int index; /* Index number of matching keyword. */
|
||||
char *beg[1]; /* Begin pointer for each submatch. */
|
||||
size_t size[1]; /* Length of each submatch. */
|
||||
};
|
||||
|
||||
#if __STDC__
|
||||
|
||||
typedef void *kwset_t;
|
||||
|
||||
/* Return an opaque pointer to a newly allocated keyword set, or NULL
|
||||
if enough memory cannot be obtained. The argument if non-NULL
|
||||
specifies a table of character translations to be applied to all
|
||||
pattern and search text. */
|
||||
extern kwset_t kwsalloc(char *);
|
||||
|
||||
/* Incrementally extend the keyword set to include the given string.
|
||||
Return NULL for success, or an error message. Remember an index
|
||||
number for each keyword included in the set. */
|
||||
extern char *kwsincr(kwset_t, char *, size_t);
|
||||
|
||||
/* When the keyword set has been completely built, prepare it for
|
||||
use. Return NULL for success, or an error message. */
|
||||
extern char *kwsprep(kwset_t);
|
||||
|
||||
/* Search through the given buffer for a member of the keyword set.
|
||||
Return a pointer to the leftmost longest match found, or NULL if
|
||||
no match is found. If foundlen is non-NULL, store the length of
|
||||
the matching substring in the integer it points to. Similarly,
|
||||
if foundindex is non-NULL, store the index of the particular
|
||||
keyword found therein. */
|
||||
extern char *kwsexec(kwset_t, char *, size_t, struct kwsmatch *);
|
||||
|
||||
/* Deallocate the given keyword set and all its associated storage. */
|
||||
extern void kwsfree(kwset_t);
|
||||
|
||||
#else
|
||||
|
||||
typedef char *kwset_t;
|
||||
|
||||
extern kwset_t kwsalloc();
|
||||
extern char *kwsincr();
|
||||
extern char *kwsprep();
|
||||
extern char *kwsexec();
|
||||
extern void kwsfree();
|
||||
|
||||
#endif
|
454
gnu/usr.bin/grep/obstack.c
Normal file
454
gnu/usr.bin/grep/obstack.c
Normal file
@ -0,0 +1,454 @@
|
||||
/* obstack.c - subroutines used implicitly by object stack macros
|
||||
Copyright (C) 1988, 1993 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
#include "obstack.h"
|
||||
|
||||
/* This is just to get __GNU_LIBRARY__ defined. */
|
||||
#include <stdio.h>
|
||||
|
||||
/* Comment out all this code if we are using the GNU C Library, and are not
|
||||
actually compiling the library itself. This code is part of the GNU C
|
||||
Library, but also included in many other GNU distributions. Compiling
|
||||
and linking in this code is a waste when using the GNU C library
|
||||
(especially if it is a shared library). Rather than having every GNU
|
||||
program understand `configure --with-gnu-libc' and omit the object files,
|
||||
it is simpler to just do this in the source for each such file. */
|
||||
|
||||
#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
|
||||
|
||||
|
||||
#ifdef __STDC__
|
||||
#define POINTER void *
|
||||
#else
|
||||
#define POINTER char *
|
||||
#endif
|
||||
|
||||
/* Determine default alignment. */
|
||||
struct fooalign {char x; double d;};
|
||||
#define DEFAULT_ALIGNMENT \
|
||||
((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0))
|
||||
/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
|
||||
But in fact it might be less smart and round addresses to as much as
|
||||
DEFAULT_ROUNDING. So we prepare for it to do that. */
|
||||
union fooround {long x; double d;};
|
||||
#define DEFAULT_ROUNDING (sizeof (union fooround))
|
||||
|
||||
/* When we copy a long block of data, this is the unit to do it with.
|
||||
On some machines, copying successive ints does not work;
|
||||
in such a case, redefine COPYING_UNIT to `long' (if that works)
|
||||
or `char' as a last resort. */
|
||||
#ifndef COPYING_UNIT
|
||||
#define COPYING_UNIT int
|
||||
#endif
|
||||
|
||||
/* The non-GNU-C macros copy the obstack into this global variable
|
||||
to avoid multiple evaluation. */
|
||||
|
||||
struct obstack *_obstack;
|
||||
|
||||
/* Define a macro that either calls functions with the traditional malloc/free
|
||||
calling interface, or calls functions with the mmalloc/mfree interface
|
||||
(that adds an extra first argument), based on the state of use_extra_arg.
|
||||
For free, do not use ?:, since some compilers, like the MIPS compilers,
|
||||
do not allow (expr) ? void : void. */
|
||||
|
||||
#define CALL_CHUNKFUN(h, size) \
|
||||
(((h) -> use_extra_arg) \
|
||||
? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
|
||||
: (*(h)->chunkfun) ((size)))
|
||||
|
||||
#define CALL_FREEFUN(h, old_chunk) \
|
||||
do { \
|
||||
if ((h) -> use_extra_arg) \
|
||||
(*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
|
||||
else \
|
||||
(*(h)->freefun) ((old_chunk)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
|
||||
Objects start on multiples of ALIGNMENT (0 means use default).
|
||||
CHUNKFUN is the function to use to allocate chunks,
|
||||
and FREEFUN the function to free them. */
|
||||
|
||||
void
|
||||
_obstack_begin (h, size, alignment, chunkfun, freefun)
|
||||
struct obstack *h;
|
||||
int size;
|
||||
int alignment;
|
||||
POINTER (*chunkfun) ();
|
||||
void (*freefun) ();
|
||||
{
|
||||
register struct _obstack_chunk* chunk; /* points to new chunk */
|
||||
|
||||
if (alignment == 0)
|
||||
alignment = DEFAULT_ALIGNMENT;
|
||||
if (size == 0)
|
||||
/* Default size is what GNU malloc can fit in a 4096-byte block. */
|
||||
{
|
||||
/* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
|
||||
Use the values for range checking, because if range checking is off,
|
||||
the extra bytes won't be missed terribly, but if range checking is on
|
||||
and we used a larger request, a whole extra 4096 bytes would be
|
||||
allocated.
|
||||
|
||||
These number are irrelevant to the new GNU malloc. I suspect it is
|
||||
less sensitive to the size of the request. */
|
||||
int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
|
||||
+ 4 + DEFAULT_ROUNDING - 1)
|
||||
& ~(DEFAULT_ROUNDING - 1));
|
||||
size = 4096 - extra;
|
||||
}
|
||||
|
||||
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
|
||||
h->freefun = freefun;
|
||||
h->chunk_size = size;
|
||||
h->alignment_mask = alignment - 1;
|
||||
h->use_extra_arg = 0;
|
||||
|
||||
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
|
||||
h->next_free = h->object_base = chunk->contents;
|
||||
h->chunk_limit = chunk->limit
|
||||
= (char *) chunk + h->chunk_size;
|
||||
chunk->prev = 0;
|
||||
/* The initial chunk now contains no empty object. */
|
||||
h->maybe_empty_object = 0;
|
||||
}
|
||||
|
||||
void
|
||||
_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
|
||||
struct obstack *h;
|
||||
int size;
|
||||
int alignment;
|
||||
POINTER (*chunkfun) ();
|
||||
void (*freefun) ();
|
||||
POINTER arg;
|
||||
{
|
||||
register struct _obstack_chunk* chunk; /* points to new chunk */
|
||||
|
||||
if (alignment == 0)
|
||||
alignment = DEFAULT_ALIGNMENT;
|
||||
if (size == 0)
|
||||
/* Default size is what GNU malloc can fit in a 4096-byte block. */
|
||||
{
|
||||
/* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
|
||||
Use the values for range checking, because if range checking is off,
|
||||
the extra bytes won't be missed terribly, but if range checking is on
|
||||
and we used a larger request, a whole extra 4096 bytes would be
|
||||
allocated.
|
||||
|
||||
These number are irrelevant to the new GNU malloc. I suspect it is
|
||||
less sensitive to the size of the request. */
|
||||
int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
|
||||
+ 4 + DEFAULT_ROUNDING - 1)
|
||||
& ~(DEFAULT_ROUNDING - 1));
|
||||
size = 4096 - extra;
|
||||
}
|
||||
|
||||
h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
|
||||
h->freefun = freefun;
|
||||
h->chunk_size = size;
|
||||
h->alignment_mask = alignment - 1;
|
||||
h->extra_arg = arg;
|
||||
h->use_extra_arg = 1;
|
||||
|
||||
chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
|
||||
h->next_free = h->object_base = chunk->contents;
|
||||
h->chunk_limit = chunk->limit
|
||||
= (char *) chunk + h->chunk_size;
|
||||
chunk->prev = 0;
|
||||
/* The initial chunk now contains no empty object. */
|
||||
h->maybe_empty_object = 0;
|
||||
}
|
||||
|
||||
/* Allocate a new current chunk for the obstack *H
|
||||
on the assumption that LENGTH bytes need to be added
|
||||
to the current object, or a new object of length LENGTH allocated.
|
||||
Copies any partial object from the end of the old chunk
|
||||
to the beginning of the new one. */
|
||||
|
||||
void
|
||||
_obstack_newchunk (h, length)
|
||||
struct obstack *h;
|
||||
int length;
|
||||
{
|
||||
register struct _obstack_chunk* old_chunk = h->chunk;
|
||||
register struct _obstack_chunk* new_chunk;
|
||||
register long new_size;
|
||||
register int obj_size = h->next_free - h->object_base;
|
||||
register int i;
|
||||
int already;
|
||||
|
||||
/* Compute size for new chunk. */
|
||||
new_size = (obj_size + length) + (obj_size >> 3) + 100;
|
||||
if (new_size < h->chunk_size)
|
||||
new_size = h->chunk_size;
|
||||
|
||||
/* Allocate and initialize the new chunk. */
|
||||
new_chunk = h->chunk = CALL_CHUNKFUN (h, new_size);
|
||||
new_chunk->prev = old_chunk;
|
||||
new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
|
||||
|
||||
/* Move the existing object to the new chunk.
|
||||
Word at a time is fast and is safe if the object
|
||||
is sufficiently aligned. */
|
||||
if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
|
||||
{
|
||||
for (i = obj_size / sizeof (COPYING_UNIT) - 1;
|
||||
i >= 0; i--)
|
||||
((COPYING_UNIT *)new_chunk->contents)[i]
|
||||
= ((COPYING_UNIT *)h->object_base)[i];
|
||||
/* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
|
||||
but that can cross a page boundary on a machine
|
||||
which does not do strict alignment for COPYING_UNITS. */
|
||||
already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
|
||||
}
|
||||
else
|
||||
already = 0;
|
||||
/* Copy remaining bytes one by one. */
|
||||
for (i = already; i < obj_size; i++)
|
||||
new_chunk->contents[i] = h->object_base[i];
|
||||
|
||||
/* If the object just copied was the only data in OLD_CHUNK,
|
||||
free that chunk and remove it from the chain.
|
||||
But not if that chunk might contain an empty object. */
|
||||
if (h->object_base == old_chunk->contents && ! h->maybe_empty_object)
|
||||
{
|
||||
new_chunk->prev = old_chunk->prev;
|
||||
CALL_FREEFUN (h, old_chunk);
|
||||
}
|
||||
|
||||
h->object_base = new_chunk->contents;
|
||||
h->next_free = h->object_base + obj_size;
|
||||
/* The new chunk certainly contains no empty object yet. */
|
||||
h->maybe_empty_object = 0;
|
||||
}
|
||||
|
||||
/* Return nonzero if object OBJ has been allocated from obstack H.
|
||||
This is here for debugging.
|
||||
If you use it in a program, you are probably losing. */
|
||||
|
||||
int
|
||||
_obstack_allocated_p (h, obj)
|
||||
struct obstack *h;
|
||||
POINTER obj;
|
||||
{
|
||||
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
|
||||
register struct _obstack_chunk* plp; /* point to previous chunk if any */
|
||||
|
||||
lp = (h)->chunk;
|
||||
/* We use >= rather than > since the object cannot be exactly at
|
||||
the beginning of the chunk but might be an empty object exactly
|
||||
at the end of an adjacent chunk. */
|
||||
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
|
||||
{
|
||||
plp = lp->prev;
|
||||
lp = plp;
|
||||
}
|
||||
return lp != 0;
|
||||
}
|
||||
|
||||
/* Free objects in obstack H, including OBJ and everything allocate
|
||||
more recently than OBJ. If OBJ is zero, free everything in H. */
|
||||
|
||||
#undef obstack_free
|
||||
|
||||
/* This function has two names with identical definitions.
|
||||
This is the first one, called from non-ANSI code. */
|
||||
|
||||
void
|
||||
_obstack_free (h, obj)
|
||||
struct obstack *h;
|
||||
POINTER obj;
|
||||
{
|
||||
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
|
||||
register struct _obstack_chunk* plp; /* point to previous chunk if any */
|
||||
|
||||
lp = h->chunk;
|
||||
/* We use >= because there cannot be an object at the beginning of a chunk.
|
||||
But there can be an empty object at that address
|
||||
at the end of another chunk. */
|
||||
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
|
||||
{
|
||||
plp = lp->prev;
|
||||
CALL_FREEFUN (h, lp);
|
||||
lp = plp;
|
||||
/* If we switch chunks, we can't tell whether the new current
|
||||
chunk contains an empty object, so assume that it may. */
|
||||
h->maybe_empty_object = 1;
|
||||
}
|
||||
if (lp)
|
||||
{
|
||||
h->object_base = h->next_free = (char *)(obj);
|
||||
h->chunk_limit = lp->limit;
|
||||
h->chunk = lp;
|
||||
}
|
||||
else if (obj != 0)
|
||||
/* obj is not in any of the chunks! */
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* This function is used from ANSI code. */
|
||||
|
||||
void
|
||||
obstack_free (h, obj)
|
||||
struct obstack *h;
|
||||
POINTER obj;
|
||||
{
|
||||
register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */
|
||||
register struct _obstack_chunk* plp; /* point to previous chunk if any */
|
||||
|
||||
lp = h->chunk;
|
||||
/* We use >= because there cannot be an object at the beginning of a chunk.
|
||||
But there can be an empty object at that address
|
||||
at the end of another chunk. */
|
||||
while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj))
|
||||
{
|
||||
plp = lp->prev;
|
||||
CALL_FREEFUN (h, lp);
|
||||
lp = plp;
|
||||
/* If we switch chunks, we can't tell whether the new current
|
||||
chunk contains an empty object, so assume that it may. */
|
||||
h->maybe_empty_object = 1;
|
||||
}
|
||||
if (lp)
|
||||
{
|
||||
h->object_base = h->next_free = (char *)(obj);
|
||||
h->chunk_limit = lp->limit;
|
||||
h->chunk = lp;
|
||||
}
|
||||
else if (obj != 0)
|
||||
/* obj is not in any of the chunks! */
|
||||
abort ();
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* These are now turned off because the applications do not use it
|
||||
and it uses bcopy via obstack_grow, which causes trouble on sysV. */
|
||||
|
||||
/* Now define the functional versions of the obstack macros.
|
||||
Define them to simply use the corresponding macros to do the job. */
|
||||
|
||||
#ifdef __STDC__
|
||||
/* These function definitions do not work with non-ANSI preprocessors;
|
||||
they won't pass through the macro names in parentheses. */
|
||||
|
||||
/* The function names appear in parentheses in order to prevent
|
||||
the macro-definitions of the names from being expanded there. */
|
||||
|
||||
POINTER (obstack_base) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_base (obstack);
|
||||
}
|
||||
|
||||
POINTER (obstack_next_free) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_next_free (obstack);
|
||||
}
|
||||
|
||||
int (obstack_object_size) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_object_size (obstack);
|
||||
}
|
||||
|
||||
int (obstack_room) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_room (obstack);
|
||||
}
|
||||
|
||||
void (obstack_grow) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
obstack_grow (obstack, pointer, length);
|
||||
}
|
||||
|
||||
void (obstack_grow0) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
obstack_grow0 (obstack, pointer, length);
|
||||
}
|
||||
|
||||
void (obstack_1grow) (obstack, character)
|
||||
struct obstack *obstack;
|
||||
int character;
|
||||
{
|
||||
obstack_1grow (obstack, character);
|
||||
}
|
||||
|
||||
void (obstack_blank) (obstack, length)
|
||||
struct obstack *obstack;
|
||||
int length;
|
||||
{
|
||||
obstack_blank (obstack, length);
|
||||
}
|
||||
|
||||
void (obstack_1grow_fast) (obstack, character)
|
||||
struct obstack *obstack;
|
||||
int character;
|
||||
{
|
||||
obstack_1grow_fast (obstack, character);
|
||||
}
|
||||
|
||||
void (obstack_blank_fast) (obstack, length)
|
||||
struct obstack *obstack;
|
||||
int length;
|
||||
{
|
||||
obstack_blank_fast (obstack, length);
|
||||
}
|
||||
|
||||
POINTER (obstack_finish) (obstack)
|
||||
struct obstack *obstack;
|
||||
{
|
||||
return obstack_finish (obstack);
|
||||
}
|
||||
|
||||
POINTER (obstack_alloc) (obstack, length)
|
||||
struct obstack *obstack;
|
||||
int length;
|
||||
{
|
||||
return obstack_alloc (obstack, length);
|
||||
}
|
||||
|
||||
POINTER (obstack_copy) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
return obstack_copy (obstack, pointer, length);
|
||||
}
|
||||
|
||||
POINTER (obstack_copy0) (obstack, pointer, length)
|
||||
struct obstack *obstack;
|
||||
POINTER pointer;
|
||||
int length;
|
||||
{
|
||||
return obstack_copy0 (obstack, pointer, length);
|
||||
}
|
||||
|
||||
#endif /* __STDC__ */
|
||||
|
||||
#endif /* 0 */
|
||||
|
||||
#endif /* _LIBC or not __GNU_LIBRARY__. */
|
484
gnu/usr.bin/grep/obstack.h
Normal file
484
gnu/usr.bin/grep/obstack.h
Normal file
@ -0,0 +1,484 @@
|
||||
/* obstack.h - object stack macros
|
||||
Copyright (C) 1988, 1992 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 2, or (at your option) any
|
||||
later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
|
||||
/* Summary:
|
||||
|
||||
All the apparent functions defined here are macros. The idea
|
||||
is that you would use these pre-tested macros to solve a
|
||||
very specific set of problems, and they would run fast.
|
||||
Caution: no side-effects in arguments please!! They may be
|
||||
evaluated MANY times!!
|
||||
|
||||
These macros operate a stack of objects. Each object starts life
|
||||
small, and may grow to maturity. (Consider building a word syllable
|
||||
by syllable.) An object can move while it is growing. Once it has
|
||||
been "finished" it never changes address again. So the "top of the
|
||||
stack" is typically an immature growing object, while the rest of the
|
||||
stack is of mature, fixed size and fixed address objects.
|
||||
|
||||
These routines grab large chunks of memory, using a function you
|
||||
supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
|
||||
by calling `obstack_chunk_free'. You must define them and declare
|
||||
them before using any obstack macros.
|
||||
|
||||
Each independent stack is represented by a `struct obstack'.
|
||||
Each of the obstack macros expects a pointer to such a structure
|
||||
as the first argument.
|
||||
|
||||
One motivation for this package is the problem of growing char strings
|
||||
in symbol tables. Unless you are "fascist pig with a read-only mind"
|
||||
--Gosper's immortal quote from HAKMEM item 154, out of context--you
|
||||
would not like to put any arbitrary upper limit on the length of your
|
||||
symbols.
|
||||
|
||||
In practice this often means you will build many short symbols and a
|
||||
few long symbols. At the time you are reading a symbol you don't know
|
||||
how long it is. One traditional method is to read a symbol into a
|
||||
buffer, realloc()ating the buffer every time you try to read a symbol
|
||||
that is longer than the buffer. This is beaut, but you still will
|
||||
want to copy the symbol from the buffer to a more permanent
|
||||
symbol-table entry say about half the time.
|
||||
|
||||
With obstacks, you can work differently. Use one obstack for all symbol
|
||||
names. As you read a symbol, grow the name in the obstack gradually.
|
||||
When the name is complete, finalize it. Then, if the symbol exists already,
|
||||
free the newly read name.
|
||||
|
||||
The way we do this is to take a large chunk, allocating memory from
|
||||
low addresses. When you want to build a symbol in the chunk you just
|
||||
add chars above the current "high water mark" in the chunk. When you
|
||||
have finished adding chars, because you got to the end of the symbol,
|
||||
you know how long the chars are, and you can create a new object.
|
||||
Mostly the chars will not burst over the highest address of the chunk,
|
||||
because you would typically expect a chunk to be (say) 100 times as
|
||||
long as an average object.
|
||||
|
||||
In case that isn't clear, when we have enough chars to make up
|
||||
the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
|
||||
so we just point to it where it lies. No moving of chars is
|
||||
needed and this is the second win: potentially long strings need
|
||||
never be explicitly shuffled. Once an object is formed, it does not
|
||||
change its address during its lifetime.
|
||||
|
||||
When the chars burst over a chunk boundary, we allocate a larger
|
||||
chunk, and then copy the partly formed object from the end of the old
|
||||
chunk to the beginning of the new larger chunk. We then carry on
|
||||
accreting characters to the end of the object as we normally would.
|
||||
|
||||
A special macro is provided to add a single char at a time to a
|
||||
growing object. This allows the use of register variables, which
|
||||
break the ordinary 'growth' macro.
|
||||
|
||||
Summary:
|
||||
We allocate large chunks.
|
||||
We carve out one object at a time from the current chunk.
|
||||
Once carved, an object never moves.
|
||||
We are free to append data of any size to the currently
|
||||
growing object.
|
||||
Exactly one object is growing in an obstack at any one time.
|
||||
You can run one obstack per control block.
|
||||
You may have as many control blocks as you dare.
|
||||
Because of the way we do it, you can `unwind' an obstack
|
||||
back to a previous state. (You may remove objects much
|
||||
as you would with a stack.)
|
||||
*/
|
||||
|
||||
|
||||
/* Don't do the contents of this file more than once. */
|
||||
|
||||
#ifndef __OBSTACKS__
|
||||
#define __OBSTACKS__
|
||||
|
||||
/* We use subtraction of (char *)0 instead of casting to int
|
||||
because on word-addressable machines a simple cast to int
|
||||
may ignore the byte-within-word field of the pointer. */
|
||||
|
||||
#ifndef __PTR_TO_INT
|
||||
#define __PTR_TO_INT(P) ((P) - (char *)0)
|
||||
#endif
|
||||
|
||||
#ifndef __INT_TO_PTR
|
||||
#define __INT_TO_PTR(P) ((P) + (char *)0)
|
||||
#endif
|
||||
|
||||
/* We need the type of the resulting object. In ANSI C it is ptrdiff_t
|
||||
but in traditional C it is usually long. If we are in ANSI C and
|
||||
don't already have ptrdiff_t get it. */
|
||||
|
||||
#if defined (__STDC__) && ! defined (offsetof)
|
||||
#if defined (__GNUC__) && defined (IN_GCC)
|
||||
/* On Next machine, the system's stddef.h screws up if included
|
||||
after we have defined just ptrdiff_t, so include all of gstddef.h.
|
||||
Otherwise, define just ptrdiff_t, which is all we need. */
|
||||
#ifndef __NeXT__
|
||||
#define __need_ptrdiff_t
|
||||
#endif
|
||||
|
||||
/* While building GCC, the stddef.h that goes with GCC has this name. */
|
||||
#include "gstddef.h"
|
||||
#else
|
||||
#include <stddef.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
#define PTR_INT_TYPE ptrdiff_t
|
||||
#else
|
||||
#define PTR_INT_TYPE long
|
||||
#endif
|
||||
|
||||
struct _obstack_chunk /* Lives at front of each chunk. */
|
||||
{
|
||||
char *limit; /* 1 past end of this chunk */
|
||||
struct _obstack_chunk *prev; /* address of prior chunk or NULL */
|
||||
char contents[4]; /* objects begin here */
|
||||
};
|
||||
|
||||
struct obstack /* control current object in current chunk */
|
||||
{
|
||||
long chunk_size; /* preferred size to allocate chunks in */
|
||||
struct _obstack_chunk* chunk; /* address of current struct obstack_chunk */
|
||||
char *object_base; /* address of object we are building */
|
||||
char *next_free; /* where to add next char to current object */
|
||||
char *chunk_limit; /* address of char after current chunk */
|
||||
PTR_INT_TYPE temp; /* Temporary for some macros. */
|
||||
int alignment_mask; /* Mask of alignment for each object. */
|
||||
struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
|
||||
void (*freefun) (); /* User's function to free a chunk. */
|
||||
char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
|
||||
unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
|
||||
unsigned maybe_empty_object:1;/* There is a possibility that the current
|
||||
chunk contains a zero-length object. This
|
||||
prevents freeing the chunk if we allocate
|
||||
a bigger chunk to replace it. */
|
||||
};
|
||||
|
||||
/* Declare the external functions we use; they are in obstack.c. */
|
||||
|
||||
#ifdef __STDC__
|
||||
extern void _obstack_newchunk (struct obstack *, int);
|
||||
extern void _obstack_free (struct obstack *, void *);
|
||||
extern void _obstack_begin (struct obstack *, int, int,
|
||||
void *(*) (), void (*) ());
|
||||
extern void _obstack_begin_1 (struct obstack *, int, int,
|
||||
void *(*) (), void (*) (), void *);
|
||||
#else
|
||||
extern void _obstack_newchunk ();
|
||||
extern void _obstack_free ();
|
||||
extern void _obstack_begin ();
|
||||
extern void _obstack_begin_1 ();
|
||||
#endif
|
||||
|
||||
#ifdef __STDC__
|
||||
|
||||
/* Do the function-declarations after the structs
|
||||
but before defining the macros. */
|
||||
|
||||
void obstack_init (struct obstack *obstack);
|
||||
|
||||
void * obstack_alloc (struct obstack *obstack, int size);
|
||||
|
||||
void * obstack_copy (struct obstack *obstack, void *address, int size);
|
||||
void * obstack_copy0 (struct obstack *obstack, void *address, int size);
|
||||
|
||||
void obstack_free (struct obstack *obstack, void *block);
|
||||
|
||||
void obstack_blank (struct obstack *obstack, int size);
|
||||
|
||||
void obstack_grow (struct obstack *obstack, void *data, int size);
|
||||
void obstack_grow0 (struct obstack *obstack, void *data, int size);
|
||||
|
||||
void obstack_1grow (struct obstack *obstack, int data_char);
|
||||
void obstack_ptr_grow (struct obstack *obstack, void *data);
|
||||
void obstack_int_grow (struct obstack *obstack, int data);
|
||||
|
||||
void * obstack_finish (struct obstack *obstack);
|
||||
|
||||
int obstack_object_size (struct obstack *obstack);
|
||||
|
||||
int obstack_room (struct obstack *obstack);
|
||||
void obstack_1grow_fast (struct obstack *obstack, int data_char);
|
||||
void obstack_ptr_grow_fast (struct obstack *obstack, void *data);
|
||||
void obstack_int_grow_fast (struct obstack *obstack, int data);
|
||||
void obstack_blank_fast (struct obstack *obstack, int size);
|
||||
|
||||
void * obstack_base (struct obstack *obstack);
|
||||
void * obstack_next_free (struct obstack *obstack);
|
||||
int obstack_alignment_mask (struct obstack *obstack);
|
||||
int obstack_chunk_size (struct obstack *obstack);
|
||||
|
||||
#endif /* __STDC__ */
|
||||
|
||||
/* Non-ANSI C cannot really support alternative functions for these macros,
|
||||
so we do not declare them. */
|
||||
|
||||
/* Pointer to beginning of object being allocated or to be allocated next.
|
||||
Note that this might not be the final address of the object
|
||||
because a new chunk might be needed to hold the final size. */
|
||||
|
||||
#define obstack_base(h) ((h)->object_base)
|
||||
|
||||
/* Size for allocating ordinary chunks. */
|
||||
|
||||
#define obstack_chunk_size(h) ((h)->chunk_size)
|
||||
|
||||
/* Pointer to next byte not yet allocated in current chunk. */
|
||||
|
||||
#define obstack_next_free(h) ((h)->next_free)
|
||||
|
||||
/* Mask specifying low bits that should be clear in address of an object. */
|
||||
|
||||
#define obstack_alignment_mask(h) ((h)->alignment_mask)
|
||||
|
||||
#define obstack_init(h) \
|
||||
_obstack_begin ((h), 0, 0, \
|
||||
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
|
||||
|
||||
#define obstack_begin(h, size) \
|
||||
_obstack_begin ((h), (size), 0, \
|
||||
(void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free)
|
||||
|
||||
#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
|
||||
_obstack_begin ((h), (size), (alignment), \
|
||||
(void *(*) ()) (chunkfun), (void (*) ()) (freefun))
|
||||
|
||||
#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
|
||||
_obstack_begin_1 ((h), (size), (alignment), \
|
||||
(void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg))
|
||||
|
||||
#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
|
||||
|
||||
#define obstack_blank_fast(h,n) ((h)->next_free += (n))
|
||||
|
||||
#if defined (__GNUC__) && defined (__STDC__)
|
||||
#if __GNUC__ < 2 || defined(NeXT)
|
||||
#define __extension__
|
||||
#endif
|
||||
|
||||
/* For GNU C, if not -traditional,
|
||||
we can define these macros to compute all args only once
|
||||
without using a global variable.
|
||||
Also, we can avoid using the `temp' slot, to make faster code. */
|
||||
|
||||
#define obstack_object_size(OBSTACK) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
(unsigned) (__o->next_free - __o->object_base); })
|
||||
|
||||
#define obstack_room(OBSTACK) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
(unsigned) (__o->chunk_limit - __o->next_free); })
|
||||
|
||||
/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
|
||||
so that we can avoid having void expressions
|
||||
in the arms of the conditional expression.
|
||||
Casting the third operand to void was tried before,
|
||||
but some compilers won't accept it. */
|
||||
#define obstack_grow(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
int __len = (length); \
|
||||
((__o->next_free + __len > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, __len), 0) : 0); \
|
||||
bcopy (where, __o->next_free, __len); \
|
||||
__o->next_free += __len; \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_grow0(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
int __len = (length); \
|
||||
((__o->next_free + __len + 1 > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, __len + 1), 0) : 0), \
|
||||
bcopy (where, __o->next_free, __len), \
|
||||
__o->next_free += __len, \
|
||||
*(__o->next_free)++ = 0; \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_1grow(OBSTACK,datum) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
((__o->next_free + 1 > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, 1), 0) : 0), \
|
||||
*(__o->next_free)++ = (datum); \
|
||||
(void) 0; })
|
||||
|
||||
/* These assume that the obstack alignment is good enough for pointers or ints,
|
||||
and that the data added so far to the current object
|
||||
shares that much alignment. */
|
||||
|
||||
#define obstack_ptr_grow(OBSTACK,datum) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
((__o->next_free + sizeof (void *) > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, sizeof (void *)), 0) : 0), \
|
||||
*((void **)__o->next_free)++ = ((void *)datum); \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_int_grow(OBSTACK,datum) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
((__o->next_free + sizeof (int) > __o->chunk_limit) \
|
||||
? (_obstack_newchunk (__o, sizeof (int)), 0) : 0), \
|
||||
*((int *)__o->next_free)++ = ((int)datum); \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr)
|
||||
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
|
||||
|
||||
#define obstack_blank(OBSTACK,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
int __len = (length); \
|
||||
((__o->chunk_limit - __o->next_free < __len) \
|
||||
? (_obstack_newchunk (__o, __len), 0) : 0); \
|
||||
__o->next_free += __len; \
|
||||
(void) 0; })
|
||||
|
||||
#define obstack_alloc(OBSTACK,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__h = (OBSTACK); \
|
||||
obstack_blank (__h, (length)); \
|
||||
obstack_finish (__h); })
|
||||
|
||||
#define obstack_copy(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__h = (OBSTACK); \
|
||||
obstack_grow (__h, (where), (length)); \
|
||||
obstack_finish (__h); })
|
||||
|
||||
#define obstack_copy0(OBSTACK,where,length) \
|
||||
__extension__ \
|
||||
({ struct obstack *__h = (OBSTACK); \
|
||||
obstack_grow0 (__h, (where), (length)); \
|
||||
obstack_finish (__h); })
|
||||
|
||||
/* The local variable is named __o1 to avoid a name conflict
|
||||
when obstack_blank is called. */
|
||||
#define obstack_finish(OBSTACK) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o1 = (OBSTACK); \
|
||||
void *value = (void *) __o1->object_base; \
|
||||
if (__o1->next_free == value) \
|
||||
__o1->maybe_empty_object = 1; \
|
||||
__o1->next_free \
|
||||
= __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
|
||||
& ~ (__o1->alignment_mask)); \
|
||||
((__o1->next_free - (char *)__o1->chunk \
|
||||
> __o1->chunk_limit - (char *)__o1->chunk) \
|
||||
? (__o1->next_free = __o1->chunk_limit) : 0); \
|
||||
__o1->object_base = __o1->next_free; \
|
||||
value; })
|
||||
|
||||
#define obstack_free(OBSTACK, OBJ) \
|
||||
__extension__ \
|
||||
({ struct obstack *__o = (OBSTACK); \
|
||||
void *__obj = (OBJ); \
|
||||
if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
|
||||
__o->next_free = __o->object_base = __obj; \
|
||||
else (obstack_free) (__o, __obj); })
|
||||
|
||||
#else /* not __GNUC__ or not __STDC__ */
|
||||
|
||||
#define obstack_object_size(h) \
|
||||
(unsigned) ((h)->next_free - (h)->object_base)
|
||||
|
||||
#define obstack_room(h) \
|
||||
(unsigned) ((h)->chunk_limit - (h)->next_free)
|
||||
|
||||
#define obstack_grow(h,where,length) \
|
||||
( (h)->temp = (length), \
|
||||
(((h)->next_free + (h)->temp > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
|
||||
bcopy (where, (h)->next_free, (h)->temp), \
|
||||
(h)->next_free += (h)->temp)
|
||||
|
||||
#define obstack_grow0(h,where,length) \
|
||||
( (h)->temp = (length), \
|
||||
(((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
|
||||
bcopy (where, (h)->next_free, (h)->temp), \
|
||||
(h)->next_free += (h)->temp, \
|
||||
*((h)->next_free)++ = 0)
|
||||
|
||||
#define obstack_1grow(h,datum) \
|
||||
( (((h)->next_free + 1 > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), 1), 0) : 0), \
|
||||
*((h)->next_free)++ = (datum))
|
||||
|
||||
#define obstack_ptr_grow(h,datum) \
|
||||
( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
|
||||
*((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum))
|
||||
|
||||
#define obstack_int_grow(h,datum) \
|
||||
( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
|
||||
? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
|
||||
*((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum))
|
||||
|
||||
#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr)
|
||||
#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint)
|
||||
|
||||
#define obstack_blank(h,length) \
|
||||
( (h)->temp = (length), \
|
||||
(((h)->chunk_limit - (h)->next_free < (h)->temp) \
|
||||
? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
|
||||
(h)->next_free += (h)->temp)
|
||||
|
||||
#define obstack_alloc(h,length) \
|
||||
(obstack_blank ((h), (length)), obstack_finish ((h)))
|
||||
|
||||
#define obstack_copy(h,where,length) \
|
||||
(obstack_grow ((h), (where), (length)), obstack_finish ((h)))
|
||||
|
||||
#define obstack_copy0(h,where,length) \
|
||||
(obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
|
||||
|
||||
#define obstack_finish(h) \
|
||||
( ((h)->next_free == (h)->object_base \
|
||||
? (((h)->maybe_empty_object = 1), 0) \
|
||||
: 0), \
|
||||
(h)->temp = __PTR_TO_INT ((h)->object_base), \
|
||||
(h)->next_free \
|
||||
= __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
|
||||
& ~ ((h)->alignment_mask)), \
|
||||
(((h)->next_free - (char *)(h)->chunk \
|
||||
> (h)->chunk_limit - (char *)(h)->chunk) \
|
||||
? ((h)->next_free = (h)->chunk_limit) : 0), \
|
||||
(h)->object_base = (h)->next_free, \
|
||||
__INT_TO_PTR ((h)->temp))
|
||||
|
||||
#ifdef __STDC__
|
||||
#define obstack_free(h,obj) \
|
||||
( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
|
||||
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
|
||||
? (int) ((h)->next_free = (h)->object_base \
|
||||
= (h)->temp + (char *) (h)->chunk) \
|
||||
: (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
|
||||
#else
|
||||
#define obstack_free(h,obj) \
|
||||
( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \
|
||||
(((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
|
||||
? (int) ((h)->next_free = (h)->object_base \
|
||||
= (h)->temp + (char *) (h)->chunk) \
|
||||
: (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0)))
|
||||
#endif
|
||||
|
||||
#endif /* not __GNUC__ or not __STDC__ */
|
||||
|
||||
#endif /* not __OBSTACKS__ */
|
481
gnu/usr.bin/grep/search.c
Normal file
481
gnu/usr.bin/grep/search.c
Normal file
@ -0,0 +1,481 @@
|
||||
/* search.c - searching subroutines using dfa, kwset and regex for grep.
|
||||
Copyright (C) 1992 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
Written August 1992 by Mike Haertel. */
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#else
|
||||
#define UCHAR_MAX 255
|
||||
#include <sys/types.h>
|
||||
extern char *malloc();
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMCHR
|
||||
#include <string.h>
|
||||
#ifdef NEED_MEMORY_H
|
||||
#include <memory.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef __STDC__
|
||||
extern void *memchr();
|
||||
#else
|
||||
extern char *memchr();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
|
||||
#undef bcopy
|
||||
#define bcopy(s, d, n) memcpy((d), (s), (n))
|
||||
#endif
|
||||
|
||||
#ifdef isascii
|
||||
#define ISALNUM(C) (isascii(C) && isalnum(C))
|
||||
#define ISUPPER(C) (isascii(C) && isupper(C))
|
||||
#else
|
||||
#define ISALNUM(C) isalnum(C)
|
||||
#define ISUPPER(C) isupper(C)
|
||||
#endif
|
||||
|
||||
#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C))
|
||||
|
||||
#include "grep.h"
|
||||
#include "dfa.h"
|
||||
#include "kwset.h"
|
||||
#include "regex.h"
|
||||
|
||||
#define NCHAR (UCHAR_MAX + 1)
|
||||
|
||||
#if __STDC__
|
||||
static void Gcompile(char *, size_t);
|
||||
static void Ecompile(char *, size_t);
|
||||
static char *EGexecute(char *, size_t, char **);
|
||||
static void Fcompile(char *, size_t);
|
||||
static char *Fexecute(char *, size_t, char **);
|
||||
#else
|
||||
static void Gcompile();
|
||||
static void Ecompile();
|
||||
static char *EGexecute();
|
||||
static void Fcompile();
|
||||
static char *Fexecute();
|
||||
#endif
|
||||
|
||||
/* Here is the matchers vector for the main program. */
|
||||
struct matcher matchers[] = {
|
||||
{ "default", Gcompile, EGexecute },
|
||||
{ "grep", Gcompile, EGexecute },
|
||||
{ "ggrep", Gcompile, EGexecute },
|
||||
{ "egrep", Ecompile, EGexecute },
|
||||
{ "posix-egrep", Ecompile, EGexecute },
|
||||
{ "gegrep", Ecompile, EGexecute },
|
||||
{ "fgrep", Fcompile, Fexecute },
|
||||
{ "gfgrep", Fcompile, Fexecute },
|
||||
{ 0, 0, 0 },
|
||||
};
|
||||
|
||||
/* For -w, we also consider _ to be word constituent. */
|
||||
#define WCHAR(C) (ISALNUM(C) || (C) == '_')
|
||||
|
||||
/* DFA compiled regexp. */
|
||||
static struct dfa dfa;
|
||||
|
||||
/* Regex compiled regexp. */
|
||||
static struct re_pattern_buffer regex;
|
||||
|
||||
/* KWset compiled pattern. For Ecompile and Gcompile, we compile
|
||||
a list of strings, at least one of which is known to occur in
|
||||
any string matching the regexp. */
|
||||
static kwset_t kwset;
|
||||
|
||||
/* Last compiled fixed string known to exactly match the regexp.
|
||||
If kwsexec() returns < lastexact, then we don't need to
|
||||
call the regexp matcher at all. */
|
||||
static int lastexact;
|
||||
|
||||
void
|
||||
dfaerror(mesg)
|
||||
char *mesg;
|
||||
{
|
||||
fatal(mesg, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
kwsinit()
|
||||
{
|
||||
static char trans[NCHAR];
|
||||
int i;
|
||||
|
||||
if (match_icase)
|
||||
for (i = 0; i < NCHAR; ++i)
|
||||
trans[i] = TOLOWER(i);
|
||||
|
||||
if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0)))
|
||||
fatal("memory exhausted", 0);
|
||||
}
|
||||
|
||||
/* If the DFA turns out to have some set of fixed strings one of
|
||||
which must occur in the match, then we build a kwset matcher
|
||||
to find those strings, and thus quickly filter out impossible
|
||||
matches. */
|
||||
static void
|
||||
kwsmusts()
|
||||
{
|
||||
struct dfamust *dm;
|
||||
char *err;
|
||||
|
||||
if (dfa.musts)
|
||||
{
|
||||
kwsinit();
|
||||
/* First, we compile in the substrings known to be exact
|
||||
matches. The kwset matcher will return the index
|
||||
of the matching string that it chooses. */
|
||||
for (dm = dfa.musts; dm; dm = dm->next)
|
||||
{
|
||||
if (!dm->exact)
|
||||
continue;
|
||||
++lastexact;
|
||||
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
/* Now, we compile the substrings that will require
|
||||
the use of the regexp matcher. */
|
||||
for (dm = dfa.musts; dm; dm = dm->next)
|
||||
{
|
||||
if (dm->exact)
|
||||
continue;
|
||||
if ((err = kwsincr(kwset, dm->must, strlen(dm->must))) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
if ((err = kwsprep(kwset)) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
Gcompile(pattern, size)
|
||||
char *pattern;
|
||||
size_t size;
|
||||
{
|
||||
#ifdef __STDC__
|
||||
const
|
||||
#endif
|
||||
char *err;
|
||||
|
||||
re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
|
||||
dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase);
|
||||
|
||||
if ((err = re_compile_pattern(pattern, size, ®ex)) != 0)
|
||||
fatal(err, 0);
|
||||
|
||||
dfainit(&dfa);
|
||||
|
||||
/* In the match_words and match_lines cases, we use a different pattern
|
||||
for the DFA matcher that will quickly throw out cases that won't work.
|
||||
Then if DFA succeeds we do some hairy stuff using the regex matcher
|
||||
to decide whether the match should really count. */
|
||||
if (match_words || match_lines)
|
||||
{
|
||||
/* In the whole-word case, we use the pattern:
|
||||
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
|
||||
In the whole-line case, we use the pattern:
|
||||
^(userpattern)$.
|
||||
BUG: Using [A-Za-z_] is locale-dependent! */
|
||||
|
||||
char *n = malloc(size + 50);
|
||||
int i = 0;
|
||||
|
||||
strcpy(n, "");
|
||||
|
||||
if (match_lines)
|
||||
strcpy(n, "^\\(");
|
||||
if (match_words)
|
||||
strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\(");
|
||||
|
||||
i = strlen(n);
|
||||
bcopy(pattern, n + i, size);
|
||||
i += size;
|
||||
|
||||
if (match_words)
|
||||
strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)");
|
||||
if (match_lines)
|
||||
strcpy(n + i, "\\)$");
|
||||
|
||||
i += strlen(n + i);
|
||||
dfacomp(n, i, &dfa, 1);
|
||||
}
|
||||
else
|
||||
dfacomp(pattern, size, &dfa, 1);
|
||||
|
||||
kwsmusts();
|
||||
}
|
||||
|
||||
static void
|
||||
Ecompile(pattern, size)
|
||||
char *pattern;
|
||||
size_t size;
|
||||
{
|
||||
#ifdef __STDC__
|
||||
const
|
||||
#endif
|
||||
char *err;
|
||||
|
||||
if (strcmp(matcher, "posix-egrep") == 0)
|
||||
{
|
||||
re_set_syntax(RE_SYNTAX_POSIX_EGREP);
|
||||
dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase);
|
||||
}
|
||||
else
|
||||
{
|
||||
re_set_syntax(RE_SYNTAX_EGREP);
|
||||
dfasyntax(RE_SYNTAX_EGREP, match_icase);
|
||||
}
|
||||
|
||||
if ((err = re_compile_pattern(pattern, size, ®ex)) != 0)
|
||||
fatal(err, 0);
|
||||
|
||||
dfainit(&dfa);
|
||||
|
||||
/* In the match_words and match_lines cases, we use a different pattern
|
||||
for the DFA matcher that will quickly throw out cases that won't work.
|
||||
Then if DFA succeeds we do some hairy stuff using the regex matcher
|
||||
to decide whether the match should really count. */
|
||||
if (match_words || match_lines)
|
||||
{
|
||||
/* In the whole-word case, we use the pattern:
|
||||
(^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$).
|
||||
In the whole-line case, we use the pattern:
|
||||
^(userpattern)$.
|
||||
BUG: Using [A-Za-z_] is locale-dependent! */
|
||||
|
||||
char *n = malloc(size + 50);
|
||||
int i = 0;
|
||||
|
||||
strcpy(n, "");
|
||||
|
||||
if (match_lines)
|
||||
strcpy(n, "^(");
|
||||
if (match_words)
|
||||
strcpy(n, "(^|[^0-9A-Za-z_])(");
|
||||
|
||||
i = strlen(n);
|
||||
bcopy(pattern, n + i, size);
|
||||
i += size;
|
||||
|
||||
if (match_words)
|
||||
strcpy(n + i, ")([^0-9A-Za-z_]|$)");
|
||||
if (match_lines)
|
||||
strcpy(n + i, ")$");
|
||||
|
||||
i += strlen(n + i);
|
||||
dfacomp(n, i, &dfa, 1);
|
||||
}
|
||||
else
|
||||
dfacomp(pattern, size, &dfa, 1);
|
||||
|
||||
kwsmusts();
|
||||
}
|
||||
|
||||
static char *
|
||||
EGexecute(buf, size, endp)
|
||||
char *buf;
|
||||
size_t size;
|
||||
char **endp;
|
||||
{
|
||||
register char *buflim, *beg, *end, save;
|
||||
int backref, start, len;
|
||||
struct kwsmatch kwsm;
|
||||
static struct re_registers regs; /* This is static on account of a BRAIN-DEAD
|
||||
Q@#%!# library interface in regex.c. */
|
||||
|
||||
buflim = buf + size;
|
||||
|
||||
for (beg = end = buf; end < buflim; beg = end + 1)
|
||||
{
|
||||
if (kwset)
|
||||
{
|
||||
/* Find a possible match using the KWset matcher. */
|
||||
beg = kwsexec(kwset, beg, buflim - beg, &kwsm);
|
||||
if (!beg)
|
||||
goto failure;
|
||||
/* Narrow down to the line containing the candidate, and
|
||||
run it through DFA. */
|
||||
end = memchr(beg, '\n', buflim - beg);
|
||||
if (!end)
|
||||
end = buflim;
|
||||
while (beg > buf && beg[-1] != '\n')
|
||||
--beg;
|
||||
save = *end;
|
||||
if (kwsm.index < lastexact)
|
||||
goto success;
|
||||
if (!dfaexec(&dfa, beg, end, 0, (int *) 0, &backref))
|
||||
{
|
||||
*end = save;
|
||||
continue;
|
||||
}
|
||||
*end = save;
|
||||
/* Successful, no backreferences encountered. */
|
||||
if (!backref)
|
||||
goto success;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No good fixed strings; start with DFA. */
|
||||
save = *buflim;
|
||||
beg = dfaexec(&dfa, beg, buflim, 0, (int *) 0, &backref);
|
||||
*buflim = save;
|
||||
if (!beg)
|
||||
goto failure;
|
||||
/* Narrow down to the line we've found. */
|
||||
end = memchr(beg, '\n', buflim - beg);
|
||||
if (!end)
|
||||
end = buflim;
|
||||
while (beg > buf && beg[-1] != '\n')
|
||||
--beg;
|
||||
/* Successful, no backreferences encountered! */
|
||||
if (!backref)
|
||||
goto success;
|
||||
}
|
||||
/* If we've made it to this point, this means DFA has seen
|
||||
a probable match, and we need to run it through Regex. */
|
||||
regex.not_eol = 0;
|
||||
if ((start = re_search(®ex, beg, end - beg, 0, end - beg, ®s)) >= 0)
|
||||
{
|
||||
len = regs.end[0] - start;
|
||||
if (!match_lines && !match_words || match_lines && len == end - beg)
|
||||
goto success;
|
||||
/* If -w, check if the match aligns with word boundaries.
|
||||
We do this iteratively because:
|
||||
(a) the line may contain more than one occurence of the pattern, and
|
||||
(b) Several alternatives in the pattern might be valid at a given
|
||||
point, and we may need to consider a shorter one to find a word
|
||||
boundary. */
|
||||
if (match_words)
|
||||
while (start >= 0)
|
||||
{
|
||||
if ((start == 0 || !WCHAR(beg[start - 1]))
|
||||
&& (len == end - beg || !WCHAR(beg[start + len])))
|
||||
goto success;
|
||||
if (len > 0)
|
||||
{
|
||||
/* Try a shorter length anchored at the same place. */
|
||||
--len;
|
||||
regex.not_eol = 1;
|
||||
len = re_match(®ex, beg, start + len, start, ®s);
|
||||
}
|
||||
if (len <= 0)
|
||||
{
|
||||
/* Try looking further on. */
|
||||
if (start == end - beg)
|
||||
break;
|
||||
++start;
|
||||
regex.not_eol = 0;
|
||||
start = re_search(®ex, beg, end - beg,
|
||||
start, end - beg - start, ®s);
|
||||
len = regs.end[0] - start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
failure:
|
||||
return 0;
|
||||
|
||||
success:
|
||||
*endp = end < buflim ? end + 1 : end;
|
||||
return beg;
|
||||
}
|
||||
|
||||
static void
|
||||
Fcompile(pattern, size)
|
||||
char *pattern;
|
||||
size_t size;
|
||||
{
|
||||
char *beg, *lim, *err;
|
||||
|
||||
kwsinit();
|
||||
beg = pattern;
|
||||
do
|
||||
{
|
||||
for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
|
||||
;
|
||||
if ((err = kwsincr(kwset, beg, lim - beg)) != 0)
|
||||
fatal(err, 0);
|
||||
if (lim < pattern + size)
|
||||
++lim;
|
||||
beg = lim;
|
||||
}
|
||||
while (beg < pattern + size);
|
||||
|
||||
if ((err = kwsprep(kwset)) != 0)
|
||||
fatal(err, 0);
|
||||
}
|
||||
|
||||
static char *
|
||||
Fexecute(buf, size, endp)
|
||||
char *buf;
|
||||
size_t size;
|
||||
char **endp;
|
||||
{
|
||||
register char *beg, *try, *end;
|
||||
register size_t len;
|
||||
struct kwsmatch kwsmatch;
|
||||
|
||||
for (beg = buf; beg <= buf + size; ++beg)
|
||||
{
|
||||
if (!(beg = kwsexec(kwset, beg, buf + size - beg, &kwsmatch)))
|
||||
return 0;
|
||||
len = kwsmatch.size[0];
|
||||
if (match_lines)
|
||||
{
|
||||
if (beg > buf && beg[-1] != '\n')
|
||||
continue;
|
||||
if (beg + len < buf + size && beg[len] != '\n')
|
||||
continue;
|
||||
goto success;
|
||||
}
|
||||
else if (match_words)
|
||||
for (try = beg; len && try;)
|
||||
{
|
||||
if (try > buf && WCHAR((unsigned char) try[-1]))
|
||||
break;
|
||||
if (try + len < buf + size && WCHAR((unsigned char) try[len]))
|
||||
{
|
||||
try = kwsexec(kwset, beg, --len, &kwsmatch);
|
||||
len = kwsmatch.size[0];
|
||||
}
|
||||
else
|
||||
goto success;
|
||||
}
|
||||
else
|
||||
goto success;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
success:
|
||||
if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0)
|
||||
++end;
|
||||
else
|
||||
end = buf + size;
|
||||
*endp = end;
|
||||
while (beg > buf && beg[-1] != '\n')
|
||||
--beg;
|
||||
return beg;
|
||||
}
|
Loading…
Reference in New Issue
Block a user