Merge FreeBSD changes into 2.4d.

FreeBSD changes OBE'ed by 2.4d: * rev 1.5 - use collate for alpha character ranges.
2000-01-31 13:28:08 +00:00 · 2000-01-31 13:28:08 +00:00 · ebee1ce115
commit ebee1ce115
parent 2dd28130d9
1 changed files with 92 additions and 191 deletions
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@ -79,6 +79,16 @@ extern void free();
 #define ISCNTRL(C) (isascii(C) && iscntrl(C))
 #endif

+/* ISASCIIDIGIT differs from ISDIGIT, as follows:
+   - Its arg may be any int or unsigned int; it need not be an unsigned char.
+   - It's guaranteed to evaluate its argument exactly once.
+   - It's typically faster.
+   Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
+   only '0' through '9' are digits.  Prefer ISASCIIDIGIT to ISDIGIT unless
+   it's important to use the locale's definition of `digit' even when the
+   host does not conform to Posix.  */
+#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
+
 /* If we (don't) have I18N.  */
 /* glibc defines _ */
 #ifndef _
@ -150,29 +160,8 @@ static char **comsubs PARAMS ((char *left, char *right));
 static char **addlists PARAMS ((char **old, char **new));
 static char **inboth PARAMS ((char **left, char **right));

-#ifdef __FreeBSD__
-static int
-collate_range_cmp(c1, c2)
-	int c1, c2;
-{
-	static char s1[2], s2[2];
-	int r;
-
-	if (c1 == c2)
-		return 0;
-	s1[0] = c1;
-	s2[0] = c2;
-	if ((r = strcoll(s1, s2)) == 0)
-		r = c1 - c2;
-
-	return r;
-}
-#endif
-
 static ptr_t
-xcalloc(n, s)
-     size_t n;
-     size_t s;
+xcalloc (size_t n, size_t s)
 {
  ptr_t r = calloc(n, s);

@ -182,8 +171,7 @@ xcalloc(n, s)
 }

 static ptr_t
-xmalloc(n)
-     size_t n;
+xmalloc (size_t n)
 {
  ptr_t r = malloc(n);

@ -194,9 +182,7 @@ xmalloc(n)
 }

 static ptr_t
-xrealloc(p, n)
-     ptr_t p;
-     size_t n;
+xrealloc (ptr_t p, size_t n)
 {
  ptr_t r = realloc(p, n);

@ -222,8 +208,7 @@ xrealloc(p, n)
 #ifdef DEBUG

 static void
-prtok(t)
-     token t;
+prtok (token t)
 {
  char *s;

@ -261,33 +246,25 @@ prtok(t)
 /* Stuff pertaining to charclasses. */

 static int
-tstbit(b, c)
-     int b;
-     charclass c;
+tstbit (int b, charclass c)
 {
  return c[b / INTBITS] & 1 << b % INTBITS;
 }

 static void
-setbit(b, c)
-     int b;
-     charclass c;
+setbit (int b, charclass c)
 {
  c[b / INTBITS] |= 1 << b % INTBITS;
 }

 static void
-clrbit(b, c)
-     int b;
-     charclass c;
+clrbit (int b, charclass c)
 {
  c[b / INTBITS] &= ~(1 << b % INTBITS);
 }

 static void
-copyset(src, dst)
-     charclass src;
-     charclass dst;
+copyset (charclass src, charclass dst)
 {
  int i;

@ -296,8 +273,7 @@ copyset(src, dst)
 }

 static void
-zeroset(s)
-     charclass s;
+zeroset (charclass s)
 {
  int i;

@ -306,8 +282,7 @@ zeroset(s)
 }

 static void
-notset(s)
-     charclass s;
+notset (charclass s)
 {
  int i;

@ -316,9 +291,7 @@ notset(s)
 }

 static int
-equal(s1, s2)
-     charclass s1;
-     charclass s2;
+equal (charclass s1, charclass s2)
 {
  int i;

@ -333,8 +306,7 @@ static struct dfa *dfa;

 /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */
 static int
-charclass_index(s)
-     charclass s;
+charclass_index (charclass s)
 {
  int i;

@ -358,10 +330,7 @@ static unsigned char eolbyte;

 /* Entry point to set syntax options. */
 void
-dfasyntax(bits, fold, eol)
-     reg_syntax_t bits;
-     int fold;
-     int eol;
+dfasyntax (reg_syntax_t bits, int fold, int eol)
 {
  syntax_bits_set = 1;
  syntax_bits = bits;
@ -387,10 +356,12 @@ static int minrep, maxrep;	/* Repeat counts for {m,n}. */
 #define FETCH(c, eoferr)   	      \
  {			   	      \
    if (! lexleft)	   	      \
-      if (eoferr != 0)	   	      \
-	dfaerror(eoferr);  	      \
-      else		   	      \
-	return lasttok = END;	      \
+      {				      \
+	if (eoferr != 0)	      \
+	  dfaerror (eoferr);	      \
+	else		   	      \
+	  return lasttok = END;	      \
+      }				      \
    (c) = (unsigned char) *lexptr++;  \
    --lexleft;		   	      \
  }
@ -413,8 +384,8 @@ FUNC(is_print, ISPRINT)
 FUNC(is_graph, ISGRAPH)
 FUNC(is_cntrl, ISCNTRL)

-static int is_blank(c)
-int c;
+static int
+is_blank (int c)
 {
   return (c == ' ' || c == '\t');
 }
@ -445,8 +416,7 @@ static struct {
 #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')

 static int
-looking_at(s)
-     const char *s;
+looking_at (char const *s)
 {
  size_t len;

@ -457,12 +427,14 @@ looking_at(s)
 }

 static token
-lex()
+lex (void)
 {
  token c, c1, c2;
  int backslash = 0, invert;
  charclass ccl;
  int i;
+  char lo[2];
+  char hi[2];

  /* Basic plan: We fetch a character.  If it's a backslash,
     we set the backslash flag and go through the loop again.
@ -595,10 +567,10 @@ lex()
 	      int lo = -1, hi = -1;
 	      char const *p = lexptr;
 	      char const *lim = p + lexleft;
-	      for (;  p != lim && ISDIGIT (*p);  p++)
+	      for (;  p != lim && ISASCIIDIGIT (*p);  p++)
 		lo = (lo < 0 ? 0 : lo * 10) + *p - '0';
 	      if (p != lim && *p == ',')
-		while (++p != lim && ISDIGIT (*p))
+		while (++p != lim && ISASCIIDIGIT (*p))
 		  hi = (hi < 0 ? 0 : hi * 10) + *p - '0';
 	      else
 		hi = lo;
@ -613,13 +585,13 @@ lex()
 	     {M,} - minimum count, maximum is infinity
 	     {M,N} - M through N */
 	  FETCH(c, _("unfinished repeat count"));
-	  if (ISDIGIT(c))
+	  if (ISASCIIDIGIT (c))
 	    {
 	      minrep = c - '0';
 	      for (;;)
 		{
 		  FETCH(c, _("unfinished repeat count"));
-		  if (!ISDIGIT(c))
+		  if (! ISASCIIDIGIT (c))
 		    break;
 		  minrep = 10 * minrep + c - '0';
 		}
@ -629,7 +601,7 @@ lex()
 	  if (c == ',')
 	    {
 	      FETCH (c, _("unfinished repeat count"));
-	      if (! ISDIGIT (c))
+	      if (! ISASCIIDIGIT (c))
 		maxrep = -1;
 	      else
 		{
@ -637,7 +609,7 @@ lex()
 		  for (;;)
 		    {
 		      FETCH (c, _("unfinished repeat count"));
-		      if (! ISDIGIT (c))
+		      if (! ISASCIIDIGIT (c))
 			break;
 		      maxrep = 10 * maxrep + c - '0';
 		    }
@ -776,35 +748,26 @@ lex()
 		}
 	      else
 		c2 = c;
-#ifdef __FreeBSD__
-	      if (collate_range_cmp(c, c2) <= 0)
-	      {
-		token c3;

-		for (c3 = 0; c3 < NOTCHAR; ++c3) {
-		  if (collate_range_cmp(c, c3) <= 0 &&
-		      collate_range_cmp(c3, c2) <= 0) {
-		    setbit(c3, ccl);
-		    if (case_fold)
-		      if (ISUPPER(c3))
-			setbit(tolower(c3), ccl);
-		      else if (ISLOWER(c))
-			setbit(toupper(c3), ccl);
-		  }
-		}
-	      }
-#else
-	      while (c <= c2)
+	      lo[0] = c;  lo[1] = '\0';
+	      hi[0] = c2; hi[1] = '\0';
+	      for (c = 0; c < NOTCHAR; c++)
 		{
-		  setbit(c, ccl);
-		  if (case_fold)
-		    if (ISUPPER(c))
-		      setbit(tolower(c), ccl);
-		    else if (ISLOWER(c))
-		      setbit(toupper(c), ccl);
-		  ++c;
+		  char ch[2];
+		  ch[0] = c;  ch[1] = '\0';
+		  if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0)
+		    {
+		      setbit (c, ccl);
+		      if (case_fold)
+			{
+			  if (ISUPPER (c))
+			    setbit (tolower (c), ccl);
+			  else if (ISLOWER (c))
+			    setbit (toupper (c), ccl);
+			}
+		    }
 		}
-#endif
+
 	    skip:
 	      ;
 	    }
@ -853,8 +816,7 @@ static int depth;		/* Current depth of a hypothetical stack
 /* Add the given token to the parse tree, maintaining the depth count and
   updating the maximum depth if necessary. */
 static void
-addtok(t)
-     token t;
+addtok (token t)
 {
  REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex);
  dfa->tokens[dfa->tindex++] = t;
@ -913,7 +875,7 @@ addtok(t)
   The parser builds a parse tree in postfix form in an array of tokens. */

 static void
-atom()
+atom (void)
 {
  if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
      || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@ -936,8 +898,7 @@ atom()

 /* Return the number of tokens in the given subexpression. */
 static int
-nsubtoks(tindex)
-int tindex;
+nsubtoks (int tindex)
 {
  int ntoks1;

@ -959,8 +920,7 @@ int tindex;

 /* Copy the given subexpression to the top of the tree. */
 static void
-copytoks(tindex, ntokens)
-     int tindex, ntokens;
+copytoks (int tindex, int ntokens)
 {
  int i;

@ -969,7 +929,7 @@ copytoks(tindex, ntokens)
 }

 static void
-closure()
+closure (void)
 {
  int tindex, ntokens, i;

@ -1004,7 +964,7 @@ closure()
 }

 static void
-branch()
+branch (void)
 {
  closure();
  while (tok != RPAREN && tok != OR && tok >= 0)
@ -1015,8 +975,7 @@ branch()
 }

 static void
-regexp(toplevel)
-     int toplevel;
+regexp (int toplevel)
 {
  branch();
  while (tok == OR)
@ -1034,11 +993,7 @@ regexp(toplevel)
   length of the string, so s can include NUL characters.  D is a pointer to
   the struct dfa to parse into. */
 void
-dfaparse(s, len, d)
-     char *s;
-     size_t len;
-     struct dfa *d;
-
+dfaparse (char *s, size_t len, struct dfa *d)
 {
  dfa = d;
  lexstart = lexptr = s;
@ -1071,9 +1026,7 @@ dfaparse(s, len, d)

 /* Copy one set to another; the destination must be large enough. */
 static void
-copy(src, dst)
-     position_set *src;
-     position_set *dst;
+copy (position_set *src, position_set *dst)
 {
  int i;

@ -1087,9 +1040,7 @@ copy(src, dst)
   the same index then their constraints are logically or'd together.
   S->elems must point to an array large enough to hold the resulting set. */
 static void
-insert(p, s)
-     position p;
-     position_set *s;
+insert (position p, position_set *s)
 {
  int i;
  position t1, t2;
@ -1114,10 +1065,7 @@ insert(p, s)
 /* Merge two sets of positions into a third.  The result is exactly as if
   the positions of both sets were inserted into an initially empty set. */
 static void
-merge(s1, s2, m)
-     position_set *s1;
-     position_set *s2;
-     position_set *m;
+merge (position_set *s1, position_set *s2, position_set *m)
 {
  int i = 0, j = 0;

@ -1140,9 +1088,7 @@ merge(s1, s2, m)

 /* Delete a position from a set. */
 static void
-delete(p, s)
-     position p;
-     position_set *s;
+delete (position p, position_set *s)
 {
  int i;

@ -1159,11 +1105,7 @@ delete(p, s)
   state.  Newline and letter tell whether we got here on a newline or
   letter, respectively. */
 static int
-state_index(d, s, newline, letter)
-     struct dfa *d;
-     position_set *s;
-     int newline;
-     int letter;
+state_index (struct dfa *d, position_set *s, int newline, int letter)
 {
  int hash = 0;
  int constraint;
@ -1228,12 +1170,8 @@ state_index(d, s, newline, letter)
   that position with the elements of its follow labeled with an appropriate
   constraint.  Repeat exhaustively until no funny positions are left.
   S->elems must be large enough to hold the result. */
-static void epsclosure PARAMS ((position_set *s, struct dfa *d));
-
 static void
-epsclosure(s, d)
-     position_set *s;
-     struct dfa *d;
+epsclosure (position_set *s, struct dfa *d)
 {
  int i, j;
  int *visited;
@ -1345,9 +1283,7 @@ epsclosure(s, d)
   scheme; the number of elements in each set deeper in the stack can be
   used to determine the address of a particular set's array. */
 void
-dfaanalyze(d, searchflag)
-     struct dfa *d;
-     int searchflag;
+dfaanalyze (struct dfa *d, int searchflag)
 {
  int *nullable;		/* Nullable stack. */
  int *nfirstpos;		/* Element count stack for firstpos sets. */
@ -1608,10 +1544,7 @@ dfaanalyze(d, searchflag)
   create a new group labeled with the characters of C and insert this
   position in that group. */
 void
-dfastate(s, d, trans)
-     int s;
-     struct dfa *d;
-     int trans[];
+dfastate (int s, struct dfa *d, int trans[])
 {
  position_set grps[NOTCHAR];	/* As many as will ever be needed. */
  charclass labels[NOTCHAR];	/* Labels corresponding to the groups. */
@ -1853,9 +1786,7 @@ dfastate(s, d, trans)
   TODO: Improve this comment, get rid of the unnecessary redundancy. */

 static void
-build_state(s, d)
-     int s;
-     struct dfa *d;
+build_state (int s, struct dfa *d)
 {
  int *trans;			/* The new transition table. */
  int i;
@ -1931,8 +1862,7 @@ build_state(s, d)
 }

 static void
-build_state_zero(d)
-     struct dfa *d;
+build_state_zero (struct dfa *d)
 {
  d->tralloc = 1;
  d->trcount = 0;
@ -1958,13 +1888,8 @@ build_state_zero(d)
   match needs to be verified by a backtracking matcher.  Otherwise
   we store a 0 in *backref. */
 char *
-dfaexec(d, begin, end, newline, count, backref)
-     struct dfa *d;
-     char *begin;
-     char *end;
-     int newline;
-     int *count;
-     int *backref;
+dfaexec (struct dfa *d, char *begin, char *end,
+	 int newline, int *count, int *backref)
 {
  register int s, s1, tmp;	/* Current state. */
  register unsigned char *p;	/* Current input character. */
@ -2045,8 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref)
 /* Initialize the components of a dfa that the other routines don't
   initialize for themselves. */
 void
-dfainit(d)
-     struct dfa *d;
+dfainit (struct dfa *d)
 {
  d->calloc = 1;
  MALLOC(d->charclasses, charclass, d->calloc);
@ -2064,11 +1988,7 @@ dfainit(d)

 /* Parse and analyze a single string of the given length. */
 void
-dfacomp(s, len, d, searchflag)
-     char *s;
-     size_t len;
-     struct dfa *d;
-     int searchflag;
+dfacomp (char *s, size_t len, struct dfa *d, int searchflag)
 {
  if (case_fold)	/* dummy folding in service of dfamust() */
    {
@ -2107,8 +2027,7 @@ dfacomp(s, len, d, searchflag)

 /* Free the storage held by the components of a dfa. */
 void
-dfafree(d)
-     struct dfa *d;
+dfafree (struct dfa *d)
 {
  int i;
  struct dfamust *dm, *ndm;
@ -2220,9 +2139,7 @@ dfafree(d)
   'psi|epsilon' is likelier)? */

 static char *
-icatalloc(old, new)
-     char *old;
-     char *new;
+icatalloc (char *old, char *new)
 {
  char *result;
  size_t oldsize, newsize;
@ -2243,16 +2160,13 @@ icatalloc(old, new)
 }

 static char *
-icpyalloc(string)
-     char *string;
+icpyalloc (char *string)
 {
  return icatalloc((char *) NULL, string);
 }

 static char *
-istrstr(lookin, lookfor)
-     char *lookin;
-     char *lookfor;
+istrstr (char *lookin, char *lookfor)
 {
  char *cp;
  size_t len;
@ -2265,16 +2179,14 @@ istrstr(lookin, lookfor)
 }

 static void
-ifree(cp)
-     char *cp;
+ifree (char *cp)
 {
  if (cp != NULL)
    free(cp);
 }

 static void
-freelist(cpp)
-     char **cpp;
+freelist (char **cpp)
 {
  int i;

@ -2288,10 +2200,7 @@ freelist(cpp)
 }

 static char **
-enlist(cpp, new, len)
-     char **cpp;
-     char *new;
-     size_t len;
+enlist (char **cpp, char *new, size_t len)
 {
  int i, j;

@ -2336,9 +2245,7 @@ enlist(cpp, new, len)
   list of their distinct common substrings. Return NULL if something
   seems wild. */
 static char **
-comsubs(left, right)
-     char *left;
-     char *right;
+comsubs (char *left, char *right)
 {
  char **cpp;
  char *lcp;
@ -2372,9 +2279,7 @@ comsubs(left, right)
 }

 static char **
-addlists(old, new)
-char **old;
-char **new;
+addlists (char **old, char **new)
 {
  int i;

@ -2392,9 +2297,7 @@ char **new;
 /* Given two lists of substrings, return a new list giving substrings
   common to both. */
 static char **
-inboth(left, right)
-     char **left;
-     char **right;
+inboth (char **left, char **right)
 {
  char **both;
  char **temp;
@ -2435,16 +2338,14 @@ typedef struct
 } must;

 static void
-resetmust(mp)
-must *mp;
+resetmust (must *mp)
 {
  mp->left[0] = mp->right[0] = mp->is[0] = '\0';
  freelist(mp->in);
 }

 static void
-dfamust(dfa)
-struct dfa *dfa;
+dfamust (struct dfa *dfa)
 {
  must *musts;
  must *mp;