Merge FreeBSD changes into 2.4d.

FreeBSD changes OBE'ed by 2.4d:
* rev 1.5 - use collate for alpha character ranges.
This commit is contained in:
ru 2000-01-31 13:28:08 +00:00
parent 2dd28130d9
commit ebee1ce115

View File

@ -79,6 +79,16 @@ extern void free();
#define ISCNTRL(C) (isascii(C) && iscntrl(C)) #define ISCNTRL(C) (isascii(C) && iscntrl(C))
#endif #endif
/* ISASCIIDIGIT differs from ISDIGIT, as follows:
- Its arg may be any int or unsigned int; it need not be an unsigned char.
- It's guaranteed to evaluate its argument exactly once.
- It's typically faster.
Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless
it's important to use the locale's definition of `digit' even when the
host does not conform to Posix. */
#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
/* If we (don't) have I18N. */ /* If we (don't) have I18N. */
/* glibc defines _ */ /* glibc defines _ */
#ifndef _ #ifndef _
@ -150,29 +160,8 @@ static char **comsubs PARAMS ((char *left, char *right));
static char **addlists PARAMS ((char **old, char **new)); static char **addlists PARAMS ((char **old, char **new));
static char **inboth PARAMS ((char **left, char **right)); static char **inboth PARAMS ((char **left, char **right));
#ifdef __FreeBSD__
static int
collate_range_cmp(c1, c2)
int c1, c2;
{
static char s1[2], s2[2];
int r;
if (c1 == c2)
return 0;
s1[0] = c1;
s2[0] = c2;
if ((r = strcoll(s1, s2)) == 0)
r = c1 - c2;
return r;
}
#endif
static ptr_t static ptr_t
xcalloc(n, s) xcalloc (size_t n, size_t s)
size_t n;
size_t s;
{ {
ptr_t r = calloc(n, s); ptr_t r = calloc(n, s);
@ -182,8 +171,7 @@ xcalloc(n, s)
} }
static ptr_t static ptr_t
xmalloc(n) xmalloc (size_t n)
size_t n;
{ {
ptr_t r = malloc(n); ptr_t r = malloc(n);
@ -194,9 +182,7 @@ xmalloc(n)
} }
static ptr_t static ptr_t
xrealloc(p, n) xrealloc (ptr_t p, size_t n)
ptr_t p;
size_t n;
{ {
ptr_t r = realloc(p, n); ptr_t r = realloc(p, n);
@ -222,8 +208,7 @@ xrealloc(p, n)
#ifdef DEBUG #ifdef DEBUG
static void static void
prtok(t) prtok (token t)
token t;
{ {
char *s; char *s;
@ -261,33 +246,25 @@ prtok(t)
/* Stuff pertaining to charclasses. */ /* Stuff pertaining to charclasses. */
static int static int
tstbit(b, c) tstbit (int b, charclass c)
int b;
charclass c;
{ {
return c[b / INTBITS] & 1 << b % INTBITS; return c[b / INTBITS] & 1 << b % INTBITS;
} }
static void static void
setbit(b, c) setbit (int b, charclass c)
int b;
charclass c;
{ {
c[b / INTBITS] |= 1 << b % INTBITS; c[b / INTBITS] |= 1 << b % INTBITS;
} }
static void static void
clrbit(b, c) clrbit (int b, charclass c)
int b;
charclass c;
{ {
c[b / INTBITS] &= ~(1 << b % INTBITS); c[b / INTBITS] &= ~(1 << b % INTBITS);
} }
static void static void
copyset(src, dst) copyset (charclass src, charclass dst)
charclass src;
charclass dst;
{ {
int i; int i;
@ -296,8 +273,7 @@ copyset(src, dst)
} }
static void static void
zeroset(s) zeroset (charclass s)
charclass s;
{ {
int i; int i;
@ -306,8 +282,7 @@ zeroset(s)
} }
static void static void
notset(s) notset (charclass s)
charclass s;
{ {
int i; int i;
@ -316,9 +291,7 @@ notset(s)
} }
static int static int
equal(s1, s2) equal (charclass s1, charclass s2)
charclass s1;
charclass s2;
{ {
int i; int i;
@ -333,8 +306,7 @@ static struct dfa *dfa;
/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
static int static int
charclass_index(s) charclass_index (charclass s)
charclass s;
{ {
int i; int i;
@ -358,10 +330,7 @@ static unsigned char eolbyte;
/* Entry point to set syntax options. */ /* Entry point to set syntax options. */
void void
dfasyntax(bits, fold, eol) dfasyntax (reg_syntax_t bits, int fold, int eol)
reg_syntax_t bits;
int fold;
int eol;
{ {
syntax_bits_set = 1; syntax_bits_set = 1;
syntax_bits = bits; syntax_bits = bits;
@ -387,10 +356,12 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */
#define FETCH(c, eoferr) \ #define FETCH(c, eoferr) \
{ \ { \
if (! lexleft) \ if (! lexleft) \
if (eoferr != 0) \ { \
dfaerror(eoferr); \ if (eoferr != 0) \
else \ dfaerror (eoferr); \
return lasttok = END; \ else \
return lasttok = END; \
} \
(c) = (unsigned char) *lexptr++; \ (c) = (unsigned char) *lexptr++; \
--lexleft; \ --lexleft; \
} }
@ -413,8 +384,8 @@ FUNC(is_print, ISPRINT)
FUNC(is_graph, ISGRAPH) FUNC(is_graph, ISGRAPH)
FUNC(is_cntrl, ISCNTRL) FUNC(is_cntrl, ISCNTRL)
static int is_blank(c) static int
int c; is_blank (int c)
{ {
return (c == ' ' || c == '\t'); return (c == ' ' || c == '\t');
} }
@ -445,8 +416,7 @@ static struct {
#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
static int static int
looking_at(s) looking_at (char const *s)
const char *s;
{ {
size_t len; size_t len;
@ -457,12 +427,14 @@ looking_at(s)
} }
static token static token
lex() lex (void)
{ {
token c, c1, c2; token c, c1, c2;
int backslash = 0, invert; int backslash = 0, invert;
charclass ccl; charclass ccl;
int i; int i;
char lo[2];
char hi[2];
/* Basic plan: We fetch a character. If it's a backslash, /* Basic plan: We fetch a character. If it's a backslash,
we set the backslash flag and go through the loop again. we set the backslash flag and go through the loop again.
@ -595,10 +567,10 @@ lex()
int lo = -1, hi = -1; int lo = -1, hi = -1;
char const *p = lexptr; char const *p = lexptr;
char const *lim = p + lexleft; char const *lim = p + lexleft;
for (; p != lim && ISDIGIT (*p); p++) for (; p != lim && ISASCIIDIGIT (*p); p++)
lo = (lo < 0 ? 0 : lo * 10) + *p - '0'; lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
if (p != lim && *p == ',') if (p != lim && *p == ',')
while (++p != lim && ISDIGIT (*p)) while (++p != lim && ISASCIIDIGIT (*p))
hi = (hi < 0 ? 0 : hi * 10) + *p - '0'; hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
else else
hi = lo; hi = lo;
@ -613,13 +585,13 @@ lex()
{M,} - minimum count, maximum is infinity {M,} - minimum count, maximum is infinity
{M,N} - M through N */ {M,N} - M through N */
FETCH(c, _("unfinished repeat count")); FETCH(c, _("unfinished repeat count"));
if (ISDIGIT(c)) if (ISASCIIDIGIT (c))
{ {
minrep = c - '0'; minrep = c - '0';
for (;;) for (;;)
{ {
FETCH(c, _("unfinished repeat count")); FETCH(c, _("unfinished repeat count"));
if (!ISDIGIT(c)) if (! ISASCIIDIGIT (c))
break; break;
minrep = 10 * minrep + c - '0'; minrep = 10 * minrep + c - '0';
} }
@ -629,7 +601,7 @@ lex()
if (c == ',') if (c == ',')
{ {
FETCH (c, _("unfinished repeat count")); FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c)) if (! ISASCIIDIGIT (c))
maxrep = -1; maxrep = -1;
else else
{ {
@ -637,7 +609,7 @@ lex()
for (;;) for (;;)
{ {
FETCH (c, _("unfinished repeat count")); FETCH (c, _("unfinished repeat count"));
if (! ISDIGIT (c)) if (! ISASCIIDIGIT (c))
break; break;
maxrep = 10 * maxrep + c - '0'; maxrep = 10 * maxrep + c - '0';
} }
@ -776,35 +748,26 @@ lex()
} }
else else
c2 = c; c2 = c;
#ifdef __FreeBSD__
if (collate_range_cmp(c, c2) <= 0)
{
token c3;
for (c3 = 0; c3 < NOTCHAR; ++c3) { lo[0] = c; lo[1] = '\0';
if (collate_range_cmp(c, c3) <= 0 && hi[0] = c2; hi[1] = '\0';
collate_range_cmp(c3, c2) <= 0) { for (c = 0; c < NOTCHAR; c++)
setbit(c3, ccl);
if (case_fold)
if (ISUPPER(c3))
setbit(tolower(c3), ccl);
else if (ISLOWER(c))
setbit(toupper(c3), ccl);
}
}
}
#else
while (c <= c2)
{ {
setbit(c, ccl); char ch[2];
if (case_fold) ch[0] = c; ch[1] = '\0';
if (ISUPPER(c)) if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
setbit(tolower(c), ccl); {
else if (ISLOWER(c)) setbit (c, ccl);
setbit(toupper(c), ccl); if (case_fold)
++c; {
if (ISUPPER (c))
setbit (tolower (c), ccl);
else if (ISLOWER (c))
setbit (toupper (c), ccl);
}
}
} }
#endif
skip: skip:
; ;
} }
@ -853,8 +816,7 @@ static int depth; /* Current depth of a hypothetical stack
/* Add the given token to the parse tree, maintaining the depth count and /* Add the given token to the parse tree, maintaining the depth count and
updating the maximum depth if necessary. */ updating the maximum depth if necessary. */
static void static void
addtok(t) addtok (token t)
token t;
{ {
REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
dfa->tokens[dfa->tindex++] = t; dfa->tokens[dfa->tindex++] = t;
@ -913,7 +875,7 @@ addtok(t)
The parser builds a parse tree in postfix form in an array of tokens. */ The parser builds a parse tree in postfix form in an array of tokens. */
static void static void
atom() atom (void)
{ {
if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@ -936,8 +898,7 @@ atom()
/* Return the number of tokens in the given subexpression. */ /* Return the number of tokens in the given subexpression. */
static int static int
nsubtoks(tindex) nsubtoks (int tindex)
int tindex;
{ {
int ntoks1; int ntoks1;
@ -959,8 +920,7 @@ int tindex;
/* Copy the given subexpression to the top of the tree. */ /* Copy the given subexpression to the top of the tree. */
static void static void
copytoks(tindex, ntokens) copytoks (int tindex, int ntokens)
int tindex, ntokens;
{ {
int i; int i;
@ -969,7 +929,7 @@ copytoks(tindex, ntokens)
} }
static void static void
closure() closure (void)
{ {
int tindex, ntokens, i; int tindex, ntokens, i;
@ -1004,7 +964,7 @@ closure()
} }
static void static void
branch() branch (void)
{ {
closure(); closure();
while (tok != RPAREN && tok != OR && tok >= 0) while (tok != RPAREN && tok != OR && tok >= 0)
@ -1015,8 +975,7 @@ branch()
} }
static void static void
regexp(toplevel) regexp (int toplevel)
int toplevel;
{ {
branch(); branch();
while (tok == OR) while (tok == OR)
@ -1034,11 +993,7 @@ regexp(toplevel)
length of the string, so s can include NUL characters. D is a pointer to length of the string, so s can include NUL characters. D is a pointer to
the struct dfa to parse into. */ the struct dfa to parse into. */
void void
dfaparse(s, len, d) dfaparse (char *s, size_t len, struct dfa *d)
char *s;
size_t len;
struct dfa *d;
{ {
dfa = d; dfa = d;
lexstart = lexptr = s; lexstart = lexptr = s;
@ -1071,9 +1026,7 @@ dfaparse(s, len, d)
/* Copy one set to another; the destination must be large enough. */ /* Copy one set to another; the destination must be large enough. */
static void static void
copy(src, dst) copy (position_set *src, position_set *dst)
position_set *src;
position_set *dst;
{ {
int i; int i;
@ -1087,9 +1040,7 @@ copy(src, dst)
the same index then their constraints are logically or'd together. the same index then their constraints are logically or'd together.
S->elems must point to an array large enough to hold the resulting set. */ S->elems must point to an array large enough to hold the resulting set. */
static void static void
insert(p, s) insert (position p, position_set *s)
position p;
position_set *s;
{ {
int i; int i;
position t1, t2; position t1, t2;
@ -1114,10 +1065,7 @@ insert(p, s)
/* Merge two sets of positions into a third. The result is exactly as if /* Merge two sets of positions into a third. The result is exactly as if
the positions of both sets were inserted into an initially empty set. */ the positions of both sets were inserted into an initially empty set. */
static void static void
merge(s1, s2, m) merge (position_set *s1, position_set *s2, position_set *m)
position_set *s1;
position_set *s2;
position_set *m;
{ {
int i = 0, j = 0; int i = 0, j = 0;
@ -1140,9 +1088,7 @@ merge(s1, s2, m)
/* Delete a position from a set. */ /* Delete a position from a set. */
static void static void
delete(p, s) delete (position p, position_set *s)
position p;
position_set *s;
{ {
int i; int i;
@ -1159,11 +1105,7 @@ delete(p, s)
state. Newline and letter tell whether we got here on a newline or state. Newline and letter tell whether we got here on a newline or
letter, respectively. */ letter, respectively. */
static int static int
state_index(d, s, newline, letter) state_index (struct dfa *d, position_set *s, int newline, int letter)
struct dfa *d;
position_set *s;
int newline;
int letter;
{ {
int hash = 0; int hash = 0;
int constraint; int constraint;
@ -1228,12 +1170,8 @@ state_index(d, s, newline, letter)
that position with the elements of its follow labeled with an appropriate that position with the elements of its follow labeled with an appropriate
constraint. Repeat exhaustively until no funny positions are left. constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */ S->elems must be large enough to hold the result. */
static void epsclosure PARAMS ((position_set *s, struct dfa *d));
static void static void
epsclosure(s, d) epsclosure (position_set *s, struct dfa *d)
position_set *s;
struct dfa *d;
{ {
int i, j; int i, j;
int *visited; int *visited;
@ -1345,9 +1283,7 @@ epsclosure(s, d)
scheme; the number of elements in each set deeper in the stack can be scheme; the number of elements in each set deeper in the stack can be
used to determine the address of a particular set's array. */ used to determine the address of a particular set's array. */
void void
dfaanalyze(d, searchflag) dfaanalyze (struct dfa *d, int searchflag)
struct dfa *d;
int searchflag;
{ {
int *nullable; /* Nullable stack. */ int *nullable; /* Nullable stack. */
int *nfirstpos; /* Element count stack for firstpos sets. */ int *nfirstpos; /* Element count stack for firstpos sets. */
@ -1608,10 +1544,7 @@ dfaanalyze(d, searchflag)
create a new group labeled with the characters of C and insert this create a new group labeled with the characters of C and insert this
position in that group. */ position in that group. */
void void
dfastate(s, d, trans) dfastate (int s, struct dfa *d, int trans[])
int s;
struct dfa *d;
int trans[];
{ {
position_set grps[NOTCHAR]; /* As many as will ever be needed. */ position_set grps[NOTCHAR]; /* As many as will ever be needed. */
charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
@ -1853,9 +1786,7 @@ dfastate(s, d, trans)
TODO: Improve this comment, get rid of the unnecessary redundancy. */ TODO: Improve this comment, get rid of the unnecessary redundancy. */
static void static void
build_state(s, d) build_state (int s, struct dfa *d)
int s;
struct dfa *d;
{ {
int *trans; /* The new transition table. */ int *trans; /* The new transition table. */
int i; int i;
@ -1931,8 +1862,7 @@ build_state(s, d)
} }
static void static void
build_state_zero(d) build_state_zero (struct dfa *d)
struct dfa *d;
{ {
d->tralloc = 1; d->tralloc = 1;
d->trcount = 0; d->trcount = 0;
@ -1958,13 +1888,8 @@ build_state_zero(d)
match needs to be verified by a backtracking matcher. Otherwise match needs to be verified by a backtracking matcher. Otherwise
we store a 0 in *backref. */ we store a 0 in *backref. */
char * char *
dfaexec(d, begin, end, newline, count, backref) dfaexec (struct dfa *d, char *begin, char *end,
struct dfa *d; int newline, int *count, int *backref)
char *begin;
char *end;
int newline;
int *count;
int *backref;
{ {
register int s, s1, tmp; /* Current state. */ register int s, s1, tmp; /* Current state. */
register unsigned char *p; /* Current input character. */ register unsigned char *p; /* Current input character. */
@ -2045,8 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
/* Initialize the components of a dfa that the other routines don't /* Initialize the components of a dfa that the other routines don't
initialize for themselves. */ initialize for themselves. */
void void
dfainit(d) dfainit (struct dfa *d)
struct dfa *d;
{ {
d->calloc = 1; d->calloc = 1;
MALLOC(d->charclasses, charclass, d->calloc); MALLOC(d->charclasses, charclass, d->calloc);
@ -2064,11 +1988,7 @@ dfainit(d)
/* Parse and analyze a single string of the given length. */ /* Parse and analyze a single string of the given length. */
void void
dfacomp(s, len, d, searchflag) dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
char *s;
size_t len;
struct dfa *d;
int searchflag;
{ {
if (case_fold) /* dummy folding in service of dfamust() */ if (case_fold) /* dummy folding in service of dfamust() */
{ {
@ -2107,8 +2027,7 @@ dfacomp(s, len, d, searchflag)
/* Free the storage held by the components of a dfa. */ /* Free the storage held by the components of a dfa. */
void void
dfafree(d) dfafree (struct dfa *d)
struct dfa *d;
{ {
int i; int i;
struct dfamust *dm, *ndm; struct dfamust *dm, *ndm;
@ -2220,9 +2139,7 @@ dfafree(d)
'psi|epsilon' is likelier)? */ 'psi|epsilon' is likelier)? */
static char * static char *
icatalloc(old, new) icatalloc (char *old, char *new)
char *old;
char *new;
{ {
char *result; char *result;
size_t oldsize, newsize; size_t oldsize, newsize;
@ -2243,16 +2160,13 @@ icatalloc(old, new)
} }
static char * static char *
icpyalloc(string) icpyalloc (char *string)
char *string;
{ {
return icatalloc((char *) NULL, string); return icatalloc((char *) NULL, string);
} }
static char * static char *
istrstr(lookin, lookfor) istrstr (char *lookin, char *lookfor)
char *lookin;
char *lookfor;
{ {
char *cp; char *cp;
size_t len; size_t len;
@ -2265,16 +2179,14 @@ istrstr(lookin, lookfor)
} }
static void static void
ifree(cp) ifree (char *cp)
char *cp;
{ {
if (cp != NULL) if (cp != NULL)
free(cp); free(cp);
} }
static void static void
freelist(cpp) freelist (char **cpp)
char **cpp;
{ {
int i; int i;
@ -2288,10 +2200,7 @@ freelist(cpp)
} }
static char ** static char **
enlist(cpp, new, len) enlist (char **cpp, char *new, size_t len)
char **cpp;
char *new;
size_t len;
{ {
int i, j; int i, j;
@ -2336,9 +2245,7 @@ enlist(cpp, new, len)
list of their distinct common substrings. Return NULL if something list of their distinct common substrings. Return NULL if something
seems wild. */ seems wild. */
static char ** static char **
comsubs(left, right) comsubs (char *left, char *right)
char *left;
char *right;
{ {
char **cpp; char **cpp;
char *lcp; char *lcp;
@ -2372,9 +2279,7 @@ comsubs(left, right)
} }
static char ** static char **
addlists(old, new) addlists (char **old, char **new)
char **old;
char **new;
{ {
int i; int i;
@ -2392,9 +2297,7 @@ char **new;
/* Given two lists of substrings, return a new list giving substrings /* Given two lists of substrings, return a new list giving substrings
common to both. */ common to both. */
static char ** static char **
inboth(left, right) inboth (char **left, char **right)
char **left;
char **right;
{ {
char **both; char **both;
char **temp; char **temp;
@ -2435,16 +2338,14 @@ typedef struct
} must; } must;
static void static void
resetmust(mp) resetmust (must *mp)
must *mp;
{ {
mp->left[0] = mp->right[0] = mp->is[0] = '\0'; mp->left[0] = mp->right[0] = mp->is[0] = '\0';
freelist(mp->in); freelist(mp->in);
} }
static void static void
dfamust(dfa) dfamust (struct dfa *dfa)
struct dfa *dfa;
{ {
must *musts; must *musts;
must *mp; must *mp;