Merge FreeBSD changes into 2.4d.

FreeBSD changes OBE'ed by 2.4d: * rev 1.5 - use collate for alpha character ranges.
2000-01-31 13:28:08 +00:00 · 2000-01-31 13:28:08 +00:00 · ebee1ce115
commit ebee1ce115
parent 2dd28130d9
1 changed files with 92 additions and 191 deletions
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@ -79,6 +79,16 @@ extern void free();
 #define ISCNTRL(C) (isascii(C) && iscntrl(C))
 #endif
 /* ISASCIIDIGIT differs from ISDIGIT, as follows:
   - Its arg may be any int or unsigned int; it need not be an unsigned char.
   - It's guaranteed to evaluate its argument exactly once.
   - It's typically faster.
   Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
   only '0' through '9' are digits.  Prefer ISASCIIDIGIT to ISDIGIT unless
   it's important to use the locale's definition of `digit' even when the
   host does not conform to Posix.  */
 #define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
 /* If we (don't) have I18N.  */
 /* glibc defines _ */
 #ifndef _
@ -150,29 +160,8 @@ static char **comsubs PARAMS ((char *left, char *right));
 static char **addlists PARAMS ((char **old, char **new));
 static char **inboth PARAMS ((char **left, char **right));
 #ifdef __FreeBSD__
 static int
 collate_range_cmp(c1, c2)
 	int c1, c2;
 {
 	static char s1[2], s2[2];
 	int r;
 	if (c1 == c2)
 		return 0;
 	s1[0] = c1;
 	s2[0] = c2;
 	if ((r = strcoll(s1, s2)) == 0)
 		r = c1 - c2;
 	return r;
 }
 #endif
 static ptr_t
-xcalloc(n, s)
+xcalloc (size_t n, size_t s)
     size_t n;
     size_t s;
 {
  ptr_t r = calloc(n, s);
@ -182,8 +171,7 @@ xcalloc(n, s)
 }
 static ptr_t
-xmalloc(n)
+xmalloc (size_t n)
     size_t n;
 {
  ptr_t r = malloc(n);
@ -194,9 +182,7 @@ xmalloc(n)
 }
 static ptr_t
-xrealloc(p, n)
+xrealloc (ptr_t p, size_t n)
     ptr_t p;
     size_t n;
 {
  ptr_t r = realloc(p, n);
@ -222,8 +208,7 @@ xrealloc(p, n)
 #ifdef DEBUG
 static void
-prtok(t)
+prtok (token t)
     token t;
 {
  char *s;
@ -261,33 +246,25 @@ prtok(t)
 /* Stuff pertaining to charclasses. */
 static int
-tstbit(b, c)
+tstbit (int b, charclass c)
     int b;
     charclass c;
 {
  return c[b / INTBITS] & 1 << b % INTBITS;
 }
 static void
-setbit(b, c)
+setbit (int b, charclass c)
     int b;
     charclass c;
 {
  c[b / INTBITS] |= 1 << b % INTBITS;
 }
 static void
-clrbit(b, c)
+clrbit (int b, charclass c)
     int b;
     charclass c;
 {
  c[b / INTBITS] &= ~(1 << b % INTBITS);
 }
 static void
-copyset(src, dst)
+copyset (charclass src, charclass dst)
     charclass src;
     charclass dst;
 {
  int i;
@ -296,8 +273,7 @@ copyset(src, dst)
 }
 static void
-zeroset(s)
+zeroset (charclass s)
     charclass s;
 {
  int i;
@ -306,8 +282,7 @@ zeroset(s)
 }
 static void
-notset(s)
+notset (charclass s)
     charclass s;
 {
  int i;
@ -316,9 +291,7 @@ notset(s)
 }
 static int
-equal(s1, s2)
+equal (charclass s1, charclass s2)
     charclass s1;
     charclass s2;
 {
  int i;
@ -333,8 +306,7 @@ static struct dfa *dfa;
 /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
 static int
-charclass_index(s)
+charclass_index (charclass s)
     charclass s;
 {
  int i;
@ -358,10 +330,7 @@ static unsigned char eolbyte;
 /* Entry point to set syntax options. */
 void
-dfasyntax(bits, fold, eol)
+dfasyntax (reg_syntax_t bits, int fold, int eol)
     reg_syntax_t bits;
     int fold;
     int eol;
 {
  syntax_bits_set = 1;
  syntax_bits = bits;
@ -387,10 +356,12 @@ static int minrep, maxrep;	/* Repeat counts for {m,n}. */
 #define FETCH(c, eoferr)   	      \
  {			   	      \
    if (! lexleft)	   	      \
-      if (eoferr != 0)	   	      \
+      {				      \
-	dfaerror(eoferr);  	      \
+	if (eoferr != 0)	      \
-      else		   	      \
+	  dfaerror (eoferr);	      \
-	return lasttok = END;	      \
+	else		   	      \
 	  return lasttok = END;	      \
      }				      \
    (c) = (unsigned char) *lexptr++;  \
    --lexleft;		   	      \
  }
@ -413,8 +384,8 @@ FUNC(is_print, ISPRINT)
 FUNC(is_graph, ISGRAPH)
 FUNC(is_cntrl, ISCNTRL)
-static int is_blank(c)
+static int
-int c;
+is_blank (int c)
 {
   return (c == ' ' || c == '\t');
 }
@ -445,8 +416,7 @@ static struct {
 #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
 static int
-looking_at(s)
+looking_at (char const *s)
     const char *s;
 {
  size_t len;
@ -457,12 +427,14 @@ looking_at(s)
 }
 static token
-lex()
+lex (void)
 {
  token c, c1, c2;
  int backslash = 0, invert;
  charclass ccl;
  int i;
  char lo[2];
  char hi[2];
  /* Basic plan: We fetch a character.  If it's a backslash,
     we set the backslash flag and go through the loop again.
@ -595,10 +567,10 @@ lex()
 	      int lo = -1, hi = -1;
 	      char const *p = lexptr;
 	      char const *lim = p + lexleft;
-	      for (;  p != lim && ISDIGIT (*p);  p++)
+	      for (;  p != lim && ISASCIIDIGIT (*p);  p++)
 		lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
 	      if (p != lim && *p == ',')
-		while (++p != lim && ISDIGIT (*p))
+		while (++p != lim && ISASCIIDIGIT (*p))
 		  hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
 	      else
 		hi = lo;
@ -613,13 +585,13 @@ lex()
 	     {M,} - minimum count, maximum is infinity
 	     {M,N} - M through N */
 	  FETCH(c, _("unfinished repeat count"));
-	  if (ISDIGIT(c))
+	  if (ISASCIIDIGIT (c))
 	    {
 	      minrep = c - '0';
 	      for (;;)
 		{
 		  FETCH(c, _("unfinished repeat count"));
-		  if (!ISDIGIT(c))
+		  if (! ISASCIIDIGIT (c))
 		    break;
 		  minrep = 10 * minrep + c - '0';
 		}
@ -629,7 +601,7 @@ lex()
 	  if (c == ',')
 	    {
 	      FETCH (c, _("unfinished repeat count"));
-	      if (! ISDIGIT (c))
+	      if (! ISASCIIDIGIT (c))
 		maxrep = -1;
 	      else
 		{
@ -637,7 +609,7 @@ lex()
 		  for (;;)
 		    {
 		      FETCH (c, _("unfinished repeat count"));
-		      if (! ISDIGIT (c))
+		      if (! ISASCIIDIGIT (c))
 			break;
 		      maxrep = 10 * maxrep + c - '0';
 		    }
@ -776,35 +748,26 @@ lex()
 		}
 	      else
 		c2 = c;
 #ifdef __FreeBSD__
 	      if (collate_range_cmp(c, c2) <= 0)
 	      {
 		token c3;
-		for (c3 = 0; c3 < NOTCHAR; ++c3) {
+	      lo[0] = c;  lo[1] = '\0';
-		  if (collate_range_cmp(c, c3) <= 0 &&
+	      hi[0] = c2; hi[1] = '\0';
-		      collate_range_cmp(c3, c2) <= 0) {
+	      for (c = 0; c < NOTCHAR; c++)
 		    setbit(c3, ccl);
 		    if (case_fold)
 		      if (ISUPPER(c3))
 			setbit(tolower(c3), ccl);
 		      else if (ISLOWER(c))
 			setbit(toupper(c3), ccl);
 		  }
 		}
 	      }
 #else
 	      while (c <= c2)
 		{
-		  setbit(c, ccl);
+		  char ch[2];
-		  if (case_fold)
+		  ch[0] = c;  ch[1] = '\0';
-		    if (ISUPPER(c))
+		  if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
-		      setbit(tolower(c), ccl);
+		    {
-		    else if (ISLOWER(c))
+		      setbit (c, ccl);
-		      setbit(toupper(c), ccl);
+		      if (case_fold)
-		  ++c;
+			{
 			  if (ISUPPER (c))
 			    setbit (tolower (c), ccl);
 			  else if (ISLOWER (c))
 			    setbit (toupper (c), ccl);
 			}
 		    }
 		}
-#endif
+
 	    skip:
 	      ;
 	    }
@ -853,8 +816,7 @@ static int depth;		/* Current depth of a hypothetical stack
 /* Add the given token to the parse tree, maintaining the depth count and
   updating the maximum depth if necessary. */
 static void
-addtok(t)
+addtok (token t)
     token t;
 {
  REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
  dfa->tokens[dfa->tindex++] = t;
@ -913,7 +875,7 @@ addtok(t)
   The parser builds a parse tree in postfix form in an array of tokens. */
 static void
-atom()
+atom (void)
 {
  if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
      || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@ -936,8 +898,7 @@ atom()
 /* Return the number of tokens in the given subexpression. */
 static int
-nsubtoks(tindex)
+nsubtoks (int tindex)
 int tindex;
 {
  int ntoks1;
@ -959,8 +920,7 @@ int tindex;
 /* Copy the given subexpression to the top of the tree. */
 static void
-copytoks(tindex, ntokens)
+copytoks (int tindex, int ntokens)
     int tindex, ntokens;
 {
  int i;
@ -969,7 +929,7 @@ copytoks(tindex, ntokens)
 }
 static void
-closure()
+closure (void)
 {
  int tindex, ntokens, i;
@ -1004,7 +964,7 @@ closure()
 }
 static void
-branch()
+branch (void)
 {
  closure();
  while (tok != RPAREN && tok != OR && tok >= 0)
@ -1015,8 +975,7 @@ branch()
 }
 static void
-regexp(toplevel)
+regexp (int toplevel)
     int toplevel;
 {
  branch();
  while (tok == OR)
@ -1034,11 +993,7 @@ regexp(toplevel)
   length of the string, so s can include NUL characters.  D is a pointer to
   the struct dfa to parse into. */
 void
-dfaparse(s, len, d)
+dfaparse (char *s, size_t len, struct dfa *d)
     char *s;
     size_t len;
     struct dfa *d;
 {
  dfa = d;
  lexstart = lexptr = s;
@ -1071,9 +1026,7 @@ dfaparse(s, len, d)
 /* Copy one set to another; the destination must be large enough. */
 static void
-copy(src, dst)
+copy (position_set *src, position_set *dst)
     position_set *src;
     position_set *dst;
 {
  int i;
@ -1087,9 +1040,7 @@ copy(src, dst)
   the same index then their constraints are logically or'd together.
   S->elems must point to an array large enough to hold the resulting set. */
 static void
-insert(p, s)
+insert (position p, position_set *s)
     position p;
     position_set *s;
 {
  int i;
  position t1, t2;
@ -1114,10 +1065,7 @@ insert(p, s)
 /* Merge two sets of positions into a third.  The result is exactly as if
   the positions of both sets were inserted into an initially empty set. */
 static void
-merge(s1, s2, m)
+merge (position_set *s1, position_set *s2, position_set *m)
     position_set *s1;
     position_set *s2;
     position_set *m;
 {
  int i = 0, j = 0;
@ -1140,9 +1088,7 @@ merge(s1, s2, m)
 /* Delete a position from a set. */
 static void
-delete(p, s)
+delete (position p, position_set *s)
     position p;
     position_set *s;
 {
  int i;
@ -1159,11 +1105,7 @@ delete(p, s)
   state.  Newline and letter tell whether we got here on a newline or
   letter, respectively. */
 static int
-state_index(d, s, newline, letter)
+state_index (struct dfa *d, position_set *s, int newline, int letter)
     struct dfa *d;
     position_set *s;
     int newline;
     int letter;
 {
  int hash = 0;
  int constraint;
@ -1228,12 +1170,8 @@ state_index(d, s, newline, letter)
   that position with the elements of its follow labeled with an appropriate
   constraint.  Repeat exhaustively until no funny positions are left.
   S->elems must be large enough to hold the result. */
 static void epsclosure PARAMS ((position_set *s, struct dfa *d));
 static void
-epsclosure(s, d)
+epsclosure (position_set *s, struct dfa *d)
     position_set *s;
     struct dfa *d;
 {
  int i, j;
  int *visited;
@ -1345,9 +1283,7 @@ epsclosure(s, d)
   scheme; the number of elements in each set deeper in the stack can be
   used to determine the address of a particular set's array. */
 void
-dfaanalyze(d, searchflag)
+dfaanalyze (struct dfa *d, int searchflag)
     struct dfa *d;
     int searchflag;
 {
  int *nullable;		/* Nullable stack. */
  int *nfirstpos;		/* Element count stack for firstpos sets. */
@ -1608,10 +1544,7 @@ dfaanalyze(d, searchflag)
   create a new group labeled with the characters of C and insert this
   position in that group. */
 void
-dfastate(s, d, trans)
+dfastate (int s, struct dfa *d, int trans[])
     int s;
     struct dfa *d;
     int trans[];
 {
  position_set grps[NOTCHAR];	/* As many as will ever be needed. */
  charclass labels[NOTCHAR];	/* Labels corresponding to the groups. */
@ -1853,9 +1786,7 @@ dfastate(s, d, trans)
   TODO: Improve this comment, get rid of the unnecessary redundancy. */
 static void
-build_state(s, d)
+build_state (int s, struct dfa *d)
     int s;
     struct dfa *d;
 {
  int *trans;			/* The new transition table. */
  int i;
@ -1931,8 +1862,7 @@ build_state(s, d)
 }
 static void
-build_state_zero(d)
+build_state_zero (struct dfa *d)
     struct dfa *d;
 {
  d->tralloc = 1;
  d->trcount = 0;
@ -1958,13 +1888,8 @@ build_state_zero(d)
   match needs to be verified by a backtracking matcher.  Otherwise
   we store a 0 in *backref. */
 char *
-dfaexec(d, begin, end, newline, count, backref)
+dfaexec (struct dfa *d, char *begin, char *end,
-     struct dfa *d;
+	 int newline, int *count, int *backref)
     char *begin;
     char *end;
     int newline;
     int *count;
     int *backref;
 {
  register int s, s1, tmp;	/* Current state. */
  register unsigned char *p;	/* Current input character. */
@ -2045,8 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
 /* Initialize the components of a dfa that the other routines don't
   initialize for themselves. */
 void
-dfainit(d)
+dfainit (struct dfa *d)
     struct dfa *d;
 {
  d->calloc = 1;
  MALLOC(d->charclasses, charclass, d->calloc);
@ -2064,11 +1988,7 @@ dfainit(d)
 /* Parse and analyze a single string of the given length. */
 void
-dfacomp(s, len, d, searchflag)
+dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
     char *s;
     size_t len;
     struct dfa *d;
     int searchflag;
 {
  if (case_fold)	/* dummy folding in service of dfamust() */
    {
@ -2107,8 +2027,7 @@ dfacomp(s, len, d, searchflag)
 /* Free the storage held by the components of a dfa. */
 void
-dfafree(d)
+dfafree (struct dfa *d)
     struct dfa *d;
 {
  int i;
  struct dfamust *dm, *ndm;
@ -2220,9 +2139,7 @@ dfafree(d)
   'psi|epsilon' is likelier)? */
 static char *
-icatalloc(old, new)
+icatalloc (char *old, char *new)
     char *old;
     char *new;
 {
  char *result;
  size_t oldsize, newsize;
@ -2243,16 +2160,13 @@ icatalloc(old, new)
 }
 static char *
-icpyalloc(string)
+icpyalloc (char *string)
     char *string;
 {
  return icatalloc((char *) NULL, string);
 }
 static char *
-istrstr(lookin, lookfor)
+istrstr (char *lookin, char *lookfor)
     char *lookin;
     char *lookfor;
 {
  char *cp;
  size_t len;
@ -2265,16 +2179,14 @@ istrstr(lookin, lookfor)
 }
 static void
-ifree(cp)
+ifree (char *cp)
     char *cp;
 {
  if (cp != NULL)
    free(cp);
 }
 static void
-freelist(cpp)
+freelist (char **cpp)
     char **cpp;
 {
  int i;
@ -2288,10 +2200,7 @@ freelist(cpp)
 }
 static char **
-enlist(cpp, new, len)
+enlist (char **cpp, char *new, size_t len)
     char **cpp;
     char *new;
     size_t len;
 {
  int i, j;
@ -2336,9 +2245,7 @@ enlist(cpp, new, len)
   list of their distinct common substrings. Return NULL if something
   seems wild. */
 static char **
-comsubs(left, right)
+comsubs (char *left, char *right)
     char *left;
     char *right;
 {
  char **cpp;
  char *lcp;
@ -2372,9 +2279,7 @@ comsubs(left, right)
 }
 static char **
-addlists(old, new)
+addlists (char **old, char **new)
 char **old;
 char **new;
 {
  int i;
@ -2392,9 +2297,7 @@ char **new;
 /* Given two lists of substrings, return a new list giving substrings
   common to both. */
 static char **
-inboth(left, right)
+inboth (char **left, char **right)
     char **left;
     char **right;
 {
  char **both;
  char **temp;
@ -2435,16 +2338,14 @@ typedef struct
 } must;
 static void
-resetmust(mp)
+resetmust (must *mp)
 must *mp;
 {
  mp->left[0] = mp->right[0] = mp->is[0] = '\0';
  freelist(mp->in);
 }
 static void
-dfamust(dfa)
+dfamust (struct dfa *dfa)
 struct dfa *dfa;
 {
  must *musts;
  must *mp;