Merge FreeBSD changes into 2.4d.

FreeBSD changes OBE'ed by 2.4d:
* rev 1.5 - use collate for alpha character ranges.
This commit is contained in:
ru 2000-01-31 13:28:08 +00:00
parent 2dd28130d9
commit ebee1ce115

View File

@ -79,6 +79,16 @@ extern void free();
#define ISCNTRL(C) (isascii(C) && iscntrl(C))
#endif
/* ISASCIIDIGIT differs from ISDIGIT, as follows:
- Its arg may be any int or unsigned int; it need not be an unsigned char.
- It's guaranteed to evaluate its argument exactly once.
- It's typically faster.
Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless
it's important to use the locale's definition of `digit' even when the
host does not conform to Posix. */
#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
/* If we (don't) have I18N. */
/* glibc defines _ */
#ifndef _
@ -150,29 +160,8 @@ static char **comsubs PARAMS ((char *left, char *right));
static char **addlists PARAMS ((char **old, char **new));
static char **inboth PARAMS ((char **left, char **right));
#ifdef __FreeBSD__
static int
collate_range_cmp(c1, c2)
int c1, c2;
{
static char s1[2], s2[2];
int r;
if (c1 == c2)
return 0;
s1[0] = c1;
s2[0] = c2;
if ((r = strcoll(s1, s2)) == 0)
r = c1 - c2;
return r;
}
#endif
static ptr_t
xcalloc(n, s)
size_t n;
size_t s;
xcalloc (size_t n, size_t s)
{
ptr_t r = calloc(n, s);
@ -182,8 +171,7 @@ xcalloc(n, s)
}
static ptr_t
xmalloc(n)
size_t n;
xmalloc (size_t n)
{
ptr_t r = malloc(n);
@ -194,9 +182,7 @@ xmalloc(n)
}
static ptr_t
xrealloc(p, n)
ptr_t p;
size_t n;
xrealloc (ptr_t p, size_t n)
{
ptr_t r = realloc(p, n);
@ -222,8 +208,7 @@ xrealloc(p, n)
#ifdef DEBUG
static void
prtok(t)
token t;
prtok (token t)
{
char *s;
@ -261,33 +246,25 @@ prtok(t)
/* Stuff pertaining to charclasses. */
static int
tstbit(b, c)
int b;
charclass c;
tstbit (int b, charclass c)
{
return c[b / INTBITS] & 1 << b % INTBITS;
}
static void
setbit(b, c)
int b;
charclass c;
setbit (int b, charclass c)
{
c[b / INTBITS] |= 1 << b % INTBITS;
}
static void
clrbit(b, c)
int b;
charclass c;
clrbit (int b, charclass c)
{
c[b / INTBITS] &= ~(1 << b % INTBITS);
}
static void
copyset(src, dst)
charclass src;
charclass dst;
copyset (charclass src, charclass dst)
{
int i;
@ -296,8 +273,7 @@ copyset(src, dst)
}
static void
zeroset(s)
charclass s;
zeroset (charclass s)
{
int i;
@ -306,8 +282,7 @@ zeroset(s)
}
static void
notset(s)
charclass s;
notset (charclass s)
{
int i;
@ -316,9 +291,7 @@ notset(s)
}
static int
equal(s1, s2)
charclass s1;
charclass s2;
equal (charclass s1, charclass s2)
{
int i;
@ -333,8 +306,7 @@ static struct dfa *dfa;
/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
static int
charclass_index(s)
charclass s;
charclass_index (charclass s)
{
int i;
@ -358,10 +330,7 @@ static unsigned char eolbyte;
/* Entry point to set syntax options. */
void
dfasyntax(bits, fold, eol)
reg_syntax_t bits;
int fold;
int eol;
dfasyntax (reg_syntax_t bits, int fold, int eol)
{
syntax_bits_set = 1;
syntax_bits = bits;
@ -387,10 +356,12 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */
#define FETCH(c, eoferr) \
{ \
if (! lexleft) \
if (eoferr != 0) \
dfaerror(eoferr); \
else \
return lasttok = END; \
{ \
if (eoferr != 0) \
dfaerror (eoferr); \
else \
return lasttok = END; \
} \
(c) = (unsigned char) *lexptr++; \
--lexleft; \
}
@ -413,8 +384,8 @@ FUNC(is_print, ISPRINT)
FUNC(is_graph, ISGRAPH)
FUNC(is_cntrl, ISCNTRL)
static int is_blank(c)
int c;
static int
is_blank (int c)
{
return (c == ' ' || c == '\t');
}
@ -445,8 +416,7 @@ static struct {
#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
static int
looking_at(s)
const char *s;
looking_at (char const *s)
{
size_t len;
@ -457,12 +427,14 @@ looking_at(s)
}
static token
lex()
lex (void)
{
token c, c1, c2;
int backslash = 0, invert;
charclass ccl;
int i;
char lo[2];
char hi[2];
/* Basic plan: We fetch a character. If it's a backslash,
we set the backslash flag and go through the loop again.
@ -595,10 +567,10 @@ lex()
int lo = -1, hi = -1;
char const *p = lexptr;
char const *lim = p + lexleft;
for (; p != lim && ISDIGIT (*p); p++)
for (; p != lim && ISASCIIDIGIT (*p); p++)
lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
if (p != lim && *p == ',')
while (++p != lim && ISDIGIT (*p))
while (++p != lim && ISASCIIDIGIT (*p))
hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
else
hi = lo;
@ -613,13 +585,13 @@ lex()
{M,} - minimum count, maximum is infinity
{M,N} - M through N */
FETCH(c, _("unfinished repeat count"));
if (ISDIGIT(c))
if (ISASCIIDIGIT (c))
{
minrep = c - '0';
for (;;)
{
FETCH(c, _("unfinished repeat count"));
if (!ISDIGIT(c))
if (! ISASCIIDIGIT (c))
break;
minrep = 10 * minrep + c - '0';
}
@ -629,7 +601,7 @@ lex()
if (c == ',')
{
FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c))
if (! ISASCIIDIGIT (c))
maxrep = -1;
else
{
@ -637,7 +609,7 @@ lex()
for (;;)
{
FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c))
if (! ISASCIIDIGIT (c))
break;
maxrep = 10 * maxrep + c - '0';
}
@ -776,35 +748,26 @@ lex()
}
else
c2 = c;
#ifdef __FreeBSD__
if (collate_range_cmp(c, c2) <= 0)
{
token c3;
for (c3 = 0; c3 < NOTCHAR; ++c3) {
if (collate_range_cmp(c, c3) <= 0 &&
collate_range_cmp(c3, c2) <= 0) {
setbit(c3, ccl);
if (case_fold)
if (ISUPPER(c3))
setbit(tolower(c3), ccl);
else if (ISLOWER(c))
setbit(toupper(c3), ccl);
}
}
}
#else
while (c <= c2)
lo[0] = c; lo[1] = '\0';
hi[0] = c2; hi[1] = '\0';
for (c = 0; c < NOTCHAR; c++)
{
setbit(c, ccl);
if (case_fold)
if (ISUPPER(c))
setbit(tolower(c), ccl);
else if (ISLOWER(c))
setbit(toupper(c), ccl);
++c;
char ch[2];
ch[0] = c; ch[1] = '\0';
if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
{
setbit (c, ccl);
if (case_fold)
{
if (ISUPPER (c))
setbit (tolower (c), ccl);
else if (ISLOWER (c))
setbit (toupper (c), ccl);
}
}
}
#endif
skip:
;
}
@ -853,8 +816,7 @@ static int depth; /* Current depth of a hypothetical stack
/* Add the given token to the parse tree, maintaining the depth count and
updating the maximum depth if necessary. */
static void
addtok(t)
token t;
addtok (token t)
{
REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
dfa->tokens[dfa->tindex++] = t;
@ -913,7 +875,7 @@ addtok(t)
The parser builds a parse tree in postfix form in an array of tokens. */
static void
atom()
atom (void)
{
if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@ -936,8 +898,7 @@ atom()
/* Return the number of tokens in the given subexpression. */
static int
nsubtoks(tindex)
int tindex;
nsubtoks (int tindex)
{
int ntoks1;
@ -959,8 +920,7 @@ int tindex;
/* Copy the given subexpression to the top of the tree. */
static void
copytoks(tindex, ntokens)
int tindex, ntokens;
copytoks (int tindex, int ntokens)
{
int i;
@ -969,7 +929,7 @@ copytoks(tindex, ntokens)
}
static void
closure()
closure (void)
{
int tindex, ntokens, i;
@ -1004,7 +964,7 @@ closure()
}
static void
branch()
branch (void)
{
closure();
while (tok != RPAREN && tok != OR && tok >= 0)
@ -1015,8 +975,7 @@ branch()
}
static void
regexp(toplevel)
int toplevel;
regexp (int toplevel)
{
branch();
while (tok == OR)
@ -1034,11 +993,7 @@ regexp(toplevel)
length of the string, so s can include NUL characters. D is a pointer to
the struct dfa to parse into. */
void
dfaparse(s, len, d)
char *s;
size_t len;
struct dfa *d;
dfaparse (char *s, size_t len, struct dfa *d)
{
dfa = d;
lexstart = lexptr = s;
@ -1071,9 +1026,7 @@ dfaparse(s, len, d)
/* Copy one set to another; the destination must be large enough. */
static void
copy(src, dst)
position_set *src;
position_set *dst;
copy (position_set *src, position_set *dst)
{
int i;
@ -1087,9 +1040,7 @@ copy(src, dst)
the same index then their constraints are logically or'd together.
S->elems must point to an array large enough to hold the resulting set. */
static void
insert(p, s)
position p;
position_set *s;
insert (position p, position_set *s)
{
int i;
position t1, t2;
@ -1114,10 +1065,7 @@ insert(p, s)
/* Merge two sets of positions into a third. The result is exactly as if
the positions of both sets were inserted into an initially empty set. */
static void
merge(s1, s2, m)
position_set *s1;
position_set *s2;
position_set *m;
merge (position_set *s1, position_set *s2, position_set *m)
{
int i = 0, j = 0;
@ -1140,9 +1088,7 @@ merge(s1, s2, m)
/* Delete a position from a set. */
static void
delete(p, s)
position p;
position_set *s;
delete (position p, position_set *s)
{
int i;
@ -1159,11 +1105,7 @@ delete(p, s)
state. Newline and letter tell whether we got here on a newline or
letter, respectively. */
static int
state_index(d, s, newline, letter)
struct dfa *d;
position_set *s;
int newline;
int letter;
state_index (struct dfa *d, position_set *s, int newline, int letter)
{
int hash = 0;
int constraint;
@ -1228,12 +1170,8 @@ state_index(d, s, newline, letter)
that position with the elements of its follow labeled with an appropriate
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
static void epsclosure PARAMS ((position_set *s, struct dfa *d));
static void
epsclosure(s, d)
position_set *s;
struct dfa *d;
epsclosure (position_set *s, struct dfa *d)
{
int i, j;
int *visited;
@ -1345,9 +1283,7 @@ epsclosure(s, d)
scheme; the number of elements in each set deeper in the stack can be
used to determine the address of a particular set's array. */
void
dfaanalyze(d, searchflag)
struct dfa *d;
int searchflag;
dfaanalyze (struct dfa *d, int searchflag)
{
int *nullable; /* Nullable stack. */
int *nfirstpos; /* Element count stack for firstpos sets. */
@ -1608,10 +1544,7 @@ dfaanalyze(d, searchflag)
create a new group labeled with the characters of C and insert this
position in that group. */
void
dfastate(s, d, trans)
int s;
struct dfa *d;
int trans[];
dfastate (int s, struct dfa *d, int trans[])
{
position_set grps[NOTCHAR]; /* As many as will ever be needed. */
charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
@ -1853,9 +1786,7 @@ dfastate(s, d, trans)
TODO: Improve this comment, get rid of the unnecessary redundancy. */
static void
build_state(s, d)
int s;
struct dfa *d;
build_state (int s, struct dfa *d)
{
int *trans; /* The new transition table. */
int i;
@ -1931,8 +1862,7 @@ build_state(s, d)
}
static void
build_state_zero(d)
struct dfa *d;
build_state_zero (struct dfa *d)
{
d->tralloc = 1;
d->trcount = 0;
@ -1958,13 +1888,8 @@ build_state_zero(d)
match needs to be verified by a backtracking matcher. Otherwise
we store a 0 in *backref. */
char *
dfaexec(d, begin, end, newline, count, backref)
struct dfa *d;
char *begin;
char *end;
int newline;
int *count;
int *backref;
dfaexec (struct dfa *d, char *begin, char *end,
int newline, int *count, int *backref)
{
register int s, s1, tmp; /* Current state. */
register unsigned char *p; /* Current input character. */
@ -2045,8 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
/* Initialize the components of a dfa that the other routines don't
initialize for themselves. */
void
dfainit(d)
struct dfa *d;
dfainit (struct dfa *d)
{
d->calloc = 1;
MALLOC(d->charclasses, charclass, d->calloc);
@ -2064,11 +1988,7 @@ dfainit(d)
/* Parse and analyze a single string of the given length. */
void
dfacomp(s, len, d, searchflag)
char *s;
size_t len;
struct dfa *d;
int searchflag;
dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
{
if (case_fold) /* dummy folding in service of dfamust() */
{
@ -2107,8 +2027,7 @@ dfacomp(s, len, d, searchflag)
/* Free the storage held by the components of a dfa. */
void
dfafree(d)
struct dfa *d;
dfafree (struct dfa *d)
{
int i;
struct dfamust *dm, *ndm;
@ -2220,9 +2139,7 @@ dfafree(d)
'psi|epsilon' is likelier)? */
static char *
icatalloc(old, new)
char *old;
char *new;
icatalloc (char *old, char *new)
{
char *result;
size_t oldsize, newsize;
@ -2243,16 +2160,13 @@ icatalloc(old, new)
}
static char *
icpyalloc(string)
char *string;
icpyalloc (char *string)
{
return icatalloc((char *) NULL, string);
}
static char *
istrstr(lookin, lookfor)
char *lookin;
char *lookfor;
istrstr (char *lookin, char *lookfor)
{
char *cp;
size_t len;
@ -2265,16 +2179,14 @@ istrstr(lookin, lookfor)
}
static void
ifree(cp)
char *cp;
ifree (char *cp)
{
if (cp != NULL)
free(cp);
}
static void
freelist(cpp)
char **cpp;
freelist (char **cpp)
{
int i;
@ -2288,10 +2200,7 @@ freelist(cpp)
}
static char **
enlist(cpp, new, len)
char **cpp;
char *new;
size_t len;
enlist (char **cpp, char *new, size_t len)
{
int i, j;
@ -2336,9 +2245,7 @@ enlist(cpp, new, len)
list of their distinct common substrings. Return NULL if something
seems wild. */
static char **
comsubs(left, right)
char *left;
char *right;
comsubs (char *left, char *right)
{
char **cpp;
char *lcp;
@ -2372,9 +2279,7 @@ comsubs(left, right)
}
static char **
addlists(old, new)
char **old;
char **new;
addlists (char **old, char **new)
{
int i;
@ -2392,9 +2297,7 @@ char **new;
/* Given two lists of substrings, return a new list giving substrings
common to both. */
static char **
inboth(left, right)
char **left;
char **right;
inboth (char **left, char **right)
{
char **both;
char **temp;
@ -2435,16 +2338,14 @@ typedef struct
} must;
static void
resetmust(mp)
must *mp;
resetmust (must *mp)
{
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
freelist(mp->in);
}
static void
dfamust(dfa)
struct dfa *dfa;
dfamust (struct dfa *dfa)
{
must *musts;
must *mp;