Merge FreeBSD changes into 2.3:

* rev 1.5 - use collate for alpha character ranges * rev 1.3 - use -lgnuregex Submitted by: ru
1999-11-20 23:09:17 +00:00 · 1999-11-20 23:09:17 +00:00 · 7423a9f8e9
commit 7423a9f8e9
parent 95f1a897f9
1 changed files with 268 additions and 171 deletions
--- a/gnu/usr.bin/grep/dfa.c
+++ b/gnu/usr.bin/grep/dfa.c
@ -1,5 +1,5 @@
 /* dfa.c - deterministic extended regexp routines for GNU
-   Copyright (C) 1988 Free Software Foundation, Inc.
+   Copyright (C) 1988, 1998 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -13,19 +13,25 @@

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA */

 /* Written June, 1988 by Mike Haertel
   Modified July, 1988 by Arthur David Olson to assist BMG speedups  */

+/* $FreeBSD$ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>

+#include <sys/types.h>
 #ifdef STDC_HEADERS
 #include <stdlib.h>
 #else
-#include <sys/types.h>
 extern char *calloc(), *malloc(), *realloc();
 extern void free();
 #endif
@ -38,59 +44,140 @@ extern void free();
 #include <strings.h>
 #endif

+#ifndef DEBUG	/* use the same approach as regex.c */
+#undef assert
+#define assert(e)
+#endif /* DEBUG */
+
 #ifndef isgraph
-#define isgraph(C) (isprint((unsigned char)C) && !isspace((unsigned char)C))
+#define isgraph(C) (isprint(C) && !isspace(C))
 #endif

-#define ISALPHA(C) isalpha((unsigned char)C)
-#define ISUPPER(C) isupper((unsigned char)C)
-#define ISLOWER(C) islower((unsigned char)C)
-#define ISDIGIT(C) isdigit((unsigned char)C)
-#define ISXDIGIT(C) isxdigit((unsigned char)C)
-#define ISSPACE(C) isspace((unsigned char)C)
-#define ISPUNCT(C) ispunct((unsigned char)C)
-#define ISALNUM(C) isalnum((unsigned char)C)
-#define ISPRINT(C) isprint((unsigned char)C)
-#define ISGRAPH(C) isgraph((unsigned char)C)
-#define ISCNTRL(C) iscntrl((unsigned char)C)
-
-#include "dfa.h"
-#include <gnuregex.h>
-
-#if __STDC__
-typedef void *ptr_t;
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISALPHA(C) isalpha(C)
+#define ISUPPER(C) isupper(C)
+#define ISLOWER(C) islower(C)
+#define ISDIGIT(C) isdigit(C)
+#define ISXDIGIT(C) isxdigit(C)
+#define ISSPACE(C) isspace(C)
+#define ISPUNCT(C) ispunct(C)
+#define ISALNUM(C) isalnum(C)
+#define ISPRINT(C) isprint(C)
+#define ISGRAPH(C) isgraph(C)
+#define ISCNTRL(C) iscntrl(C)
 #else
-typedef char *ptr_t;
+#define ISALPHA(C) (isascii(C) && isalpha(C))
+#define ISUPPER(C) (isascii(C) && isupper(C))
+#define ISLOWER(C) (isascii(C) && islower(C))
+#define ISDIGIT(C) (isascii(C) && isdigit(C))
+#define ISXDIGIT(C) (isascii(C) && isxdigit(C))
+#define ISSPACE(C) (isascii(C) && isspace(C))
+#define ISPUNCT(C) (isascii(C) && ispunct(C))
+#define ISALNUM(C) (isascii(C) && isalnum(C))
+#define ISPRINT(C) (isascii(C) && isprint(C))
+#define ISGRAPH(C) (isascii(C) && isgraph(C))
+#define ISCNTRL(C) (isascii(C) && iscntrl(C))
 #endif

-static void	dfamust();
+/* If we (don't) have I18N.  */
+/* glibc defines _ */
+#ifndef _
+# ifdef HAVE_LIBINTL_H
+#  include <libintl.h>
+#  ifndef _
+#   define _(Str) gettext (Str)
+#  endif
+# else
+#  define _(Str) (Str)
+# endif
+#endif

 #ifdef __FreeBSD__
-static int collate_range_cmp (a, b)
-	int a, b;
-{
-	int r;
-	static char s[2][2];
+#include <gnuregex.h>
+#else
+#include "regex.h"
+#endif
+#include "dfa.h"

-	if ((unsigned char)a == (unsigned char)b)
+/* HPUX, define those as macros in sys/param.h */
+#ifdef setbit
+# undef setbit
+#endif
+#ifdef clrbit
+# undef clrbit
+#endif
+
+static void dfamust PARAMS ((struct dfa *dfa));
+
+static ptr_t xcalloc PARAMS ((size_t n, size_t s));
+static ptr_t xmalloc PARAMS ((size_t n));
+static ptr_t xrealloc PARAMS ((ptr_t p, size_t n));
+#ifdef DEBUG
+static void prtok PARAMS ((token t));
+#endif
+static int tstbit PARAMS ((int b, charclass c));
+static void setbit PARAMS ((int b, charclass c));
+static void clrbit PARAMS ((int b, charclass c));
+static void copyset PARAMS ((charclass src, charclass dst));
+static void zeroset PARAMS ((charclass s));
+static void notset PARAMS ((charclass s));
+static int equal PARAMS ((charclass s1, charclass s2));
+static int charclass_index PARAMS ((charclass s));
+static int looking_at PARAMS ((const char *s));
+static token lex PARAMS ((void));
+static void addtok PARAMS ((token t));
+static void atom PARAMS ((void));
+static int nsubtoks PARAMS ((int tindex));
+static void copytoks PARAMS ((int tindex, int ntokens));
+static void closure PARAMS ((void));
+static void branch PARAMS ((void));
+static void regexp PARAMS ((int toplevel));
+static void copy PARAMS ((position_set *src, position_set *dst));
+static void insert PARAMS ((position p, position_set *s));
+static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m));
+static void delete PARAMS ((position p, position_set *s));
+static int state_index PARAMS ((struct dfa *d, position_set *s,
+			  int newline, int letter));
+static void build_state PARAMS ((int s, struct dfa *d));
+static void build_state_zero PARAMS ((struct dfa *d));
+static char *icatalloc PARAMS ((char *old, char *new));
+static char *icpyalloc PARAMS ((char *string));
+static char *istrstr PARAMS ((char *lookin, char *lookfor));
+static void ifree PARAMS ((char *cp));
+static void freelist PARAMS ((char **cpp));
+static char **enlist PARAMS ((char **cpp, char *new, size_t len));
+static char **comsubs PARAMS ((char *left, char *right));
+static char **addlists PARAMS ((char **old, char **new));
+static char **inboth PARAMS ((char **left, char **right));
+
+#ifdef __FreeBSD__
+static int
+collate_range_cmp(c1, c2)
+	int c1, c2;
+{
+	static char s1[2], s2[2];
+	int r;
+
+	if (c1 == c2)
 		return 0;
-	s[0][0] = a;
-	s[1][0] = b;
-	if ((r = strcoll(s[0], s[1])) == 0)
-		r = (unsigned char)a - (unsigned char)b;
+	s1[0] = c1;
+	s2[0] = c2;
+	if ((r = strcoll(s1, s2)) == 0)
+		r = c1 - c2;
+
 	return r;
 }
 #endif

 static ptr_t
 xcalloc(n, s)
-     int n;
+     size_t n;
     size_t s;
 {
  ptr_t r = calloc(n, s);

  if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
  return r;
 }

@ -102,7 +189,7 @@ xmalloc(n)

  assert(n != 0);
  if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
  return r;
 }

@ -115,11 +202,11 @@ xrealloc(p, n)

  assert(n != 0);
  if (!r)
-    dfaerror("Memory exhausted");
+    dfaerror(_("Memory exhausted"));
  return r;
 }

-#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t)))
 #define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
 #define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))

@ -261,7 +348,7 @@ charclass_index(s)
 }

 /* Syntax bits controlling the behavior of the lexical analyzer. */
-static int syntax_bits, syntax_bits_set;
+static reg_syntax_t syntax_bits, syntax_bits_set;

 /* Flag for case-folding letters into sets. */
 static int case_fold;
@ -269,7 +356,7 @@ static int case_fold;
 /* Entry point to set syntax options. */
 void
 dfasyntax(bits, fold)
-     int bits;
+     reg_syntax_t bits;
     int fold;
 {
  syntax_bits_set = 1;
@ -284,7 +371,7 @@ dfasyntax(bits, fold)

 static char *lexstart;		/* Pointer to beginning of input string. */
 static char *lexptr;		/* Pointer to next input character. */
-static lexleft;			/* Number of characters remaining. */
+static int lexleft;		/* Number of characters remaining. */
 static token lasttok;		/* Previous token returned; initially END. */
 static int laststart;		/* True if we're separated from beginning or (, |
 				   only by zero-width characters. */
@ -298,12 +385,16 @@ static int minrep, maxrep;	/* Repeat counts for {m,n}. */
      if (eoferr != 0)	   	      \
 	dfaerror(eoferr);  	      \
      else		   	      \
-	return END;	   	      \
+	return lasttok = END;	      \
    (c) = (unsigned char) *lexptr++;  \
    --lexleft;		   	      \
  }

+#ifdef __STDC__
+#define FUNC(F, P) static int F(int c) { return P(c); }
+#else
 #define FUNC(F, P) static int F(c) int c; { return P(c); }
+#endif

 FUNC(is_alpha, ISALPHA)
 FUNC(is_upper, ISUPPER)
@ -317,32 +408,42 @@ FUNC(is_print, ISPRINT)
 FUNC(is_graph, ISGRAPH)
 FUNC(is_cntrl, ISCNTRL)

+static int is_blank(c)
+int c;
+{
+   return (c == ' ' || c == '\t');
+}
+
 /* The following list maps the names of the Posix named character classes
   to predicate functions that determine whether a given character is in
   the class.  The leading [ has already been eaten by the lexical analyzer. */
 static struct {
-  char *name;
-  int (*pred)();
+  const char *name;
+  int (*pred) PARAMS ((int));
 } prednames[] = {
-  ":alpha:]", is_alpha,
-  ":upper:]", is_upper,
-  ":lower:]", is_lower,
-  ":digit:]", is_digit,
-  ":xdigit:]", is_xdigit,
-  ":space:]", is_space,
-  ":punct:]", is_punct,
-  ":alnum:]", is_alnum,
-  ":print:]", is_print,
-  ":graph:]", is_graph,
-  ":cntrl:]", is_cntrl,
-  0
+  { ":alpha:]", is_alpha },
+  { ":upper:]", is_upper },
+  { ":lower:]", is_lower },
+  { ":digit:]", is_digit },
+  { ":xdigit:]", is_xdigit },
+  { ":space:]", is_space },
+  { ":punct:]", is_punct },
+  { ":alnum:]", is_alnum },
+  { ":print:]", is_print },
+  { ":graph:]", is_graph },
+  { ":cntrl:]", is_cntrl },
+  { ":blank:]", is_blank },
+  { 0 }
 };

+/* Return non-zero if C is a `word-constituent' byte; zero otherwise.  */
+#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_')
+
 static int
 looking_at(s)
-     char *s;
+     const char *s;
 {
-  int len;
+  size_t len;

  len = strlen(s);
  if (lexleft < len)
@ -373,7 +474,7 @@ lex()
 	  if (backslash)
 	    goto normal_char;
 	  if (lexleft == 0)
-	    dfaerror("Unfinished \\ escape");
+	    dfaerror(_("Unfinished \\ escape"));
 	  backslash = 1;
 	  break;

@ -419,23 +520,33 @@ lex()
 	    }
 	  goto normal_char;

+	case '`':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = BEGLINE;	/* FIXME: should be beginning of string */
+	  goto normal_char;
+
+	case '\'':
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+	    return lasttok = ENDLINE;	/* FIXME: should be end of string */
+	  goto normal_char;
+
 	case '<':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = BEGWORD;
 	  goto normal_char;

 	case '>':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = ENDWORD;
 	  goto normal_char;

 	case 'b':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = LIMWORD;
 	  goto normal_char;

 	case 'B':
-	  if (backslash)
+	  if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
 	    return lasttok = NOTLIMWORD;
 	  goto normal_char;

@ -475,24 +586,24 @@ lex()
 	     {M,} - minimum count, maximum is infinity
 	     {,M} - 0 through M
 	     {M,N} - M through N */
-	  FETCH(c, "unfinished repeat count");
+	  FETCH(c, _("unfinished repeat count"));
 	  if (ISDIGIT(c))
 	    {
 	      minrep = c - '0';
 	      for (;;)
 		{
-		  FETCH(c, "unfinished repeat count");
+		  FETCH(c, _("unfinished repeat count"));
 		  if (!ISDIGIT(c))
 		    break;
 		  minrep = 10 * minrep + c - '0';
 		}
 	    }
 	  else if (c != ',')
-	    dfaerror("malformed repeat count");
+	    dfaerror(_("malformed repeat count"));
 	  if (c == ',')
 	    for (;;)
 	      {
-		FETCH(c, "unfinished repeat count");
+		FETCH(c, _("unfinished repeat count"));
 		if (!ISDIGIT(c))
 		  break;
 		maxrep = 10 * maxrep + c - '0';
@ -502,11 +613,11 @@ lex()
 	  if (!(syntax_bits & RE_NO_BK_BRACES))
 	    {
 	      if (c != '\\')
-		dfaerror("malformed repeat count");
-	      FETCH(c, "unfinished repeat count");
+		dfaerror(_("malformed repeat count"));
+	      FETCH(c, _("unfinished repeat count"));
 	    }
 	  if (c != '}')
-	    dfaerror("malformed repeat count");
+	    dfaerror(_("malformed repeat count"));
 	  laststart = 0;
 	  return lasttok = REPMN;

@ -556,11 +667,11 @@ lex()

 	case 'w':
 	case 'W':
-	  if (!backslash)
+	  if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
 	    goto normal_char;
 	  zeroset(ccl);
 	  for (c2 = 0; c2 < NOTCHAR; ++c2)
-	    if (ISALNUM(c2))
+	    if (IS_WORD_CONSTITUENT(c2))
 	      setbit(c2, ccl);
 	  if (c == 'W')
 	    notset(ccl);
@ -571,10 +682,10 @@ lex()
 	  if (backslash)
 	    goto normal_char;
 	  zeroset(ccl);
-	  FETCH(c, "Unbalanced [");
+	  FETCH(c, _("Unbalanced ["));
 	  if (c == '^')
 	    {
-	      FETCH(c, "Unbalanced [");
+	      FETCH(c, _("Unbalanced ["));
 	      invert = 1;
 	    }
 	  else
@ -591,20 +702,25 @@ lex()
 		for (c1 = 0; prednames[c1].name; ++c1)
 		  if (looking_at(prednames[c1].name))
 		    {
+			int (*pred)() = prednames[c1].pred;
+			if (case_fold
+			    && (pred == is_upper || pred == is_lower))
+				pred = is_alpha;
+
 		      for (c2 = 0; c2 < NOTCHAR; ++c2)
-			if ((*prednames[c1].pred)(c2))
+			if ((*pred)(c2))
 			  setbit(c2, ccl);
 		      lexptr += strlen(prednames[c1].name);
 		      lexleft -= strlen(prednames[c1].name);
-		      FETCH(c1, "Unbalanced [");
+		      FETCH(c1, _("Unbalanced ["));
 		      goto skip;
 		    }
 	      if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-		FETCH(c, "Unbalanced [");
-	      FETCH(c1, "Unbalanced [");
+		FETCH(c, _("Unbalanced ["));
+	      FETCH(c1, _("Unbalanced ["));
 	      if (c1 == '-')
 		{
-		  FETCH(c2, "Unbalanced [");
+		  FETCH(c2, _("Unbalanced ["));
 		  if (c2 == ']')
 		    {
 		      /* In the case [x-], the - is an ordinary hyphen,
@ -617,31 +733,28 @@ lex()
 		    {
 		      if (c2 == '\\'
 			  && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
-			FETCH(c2, "Unbalanced [");
-		      FETCH(c1, "Unbalanced [");
+			FETCH(c2, _("Unbalanced ["));
+		      FETCH(c1, _("Unbalanced ["));
 		    }
 		}
 	      else
 		c2 = c;
 #ifdef __FreeBSD__
-	      { token c3;
+	      if (collate_range_cmp(c, c2) <= 0)
+	      {
+		token c3;

-		if (collate_range_cmp(c, c2) > 0) {
-		  FETCH(c2, "Invalid range");
-		  goto skip;
-		}
-
-		for (c3 = 0; c3 < NOTCHAR; ++c3)
-		  if (   collate_range_cmp(c, c3) <= 0
-		      && collate_range_cmp(c3, c2) <= 0
-		     ) {
+		for (c3 = 0; c3 < NOTCHAR; ++c3) {
+		  if (collate_range_cmp(c, c3) <= 0 &&
+		      collate_range_cmp(c3, c2) <= 0) {
 		    setbit(c3, ccl);
 		    if (case_fold)
 		      if (ISUPPER(c3))
-			setbit(tolower((unsigned char)c3), ccl);
-		      else if (ISLOWER(c3))
-			setbit(toupper((unsigned char)c3), ccl);
+			setbit(tolower(c3), ccl);
+		      else if (ISLOWER(c))
+			setbit(toupper(c3), ccl);
 		  }
+		}
 	      }
 #else
 	      while (c <= c2)
@ -649,9 +762,9 @@ lex()
 		  setbit(c, ccl);
 		  if (case_fold)
 		    if (ISUPPER(c))
-		      setbit(tolower((unsigned char)c), ccl);
+		      setbit(tolower(c), ccl);
 		    else if (ISLOWER(c))
-		      setbit(toupper((unsigned char)c), ccl);
+		      setbit(toupper(c), ccl);
 		  ++c;
 		}
 #endif
@ -675,10 +788,10 @@ lex()
 	    {
 	      zeroset(ccl);
 	      setbit(c, ccl);
-	      if (isupper((unsigned char)c))
-		setbit(tolower((unsigned char)c), ccl);
+	      if (isupper(c))
+		setbit(tolower(c), ccl);
 	      else
-		setbit(toupper((unsigned char)c), ccl);
+		setbit(toupper(c), ccl);
 	      return lasttok = CSET + charclass_index(ccl);
 	    }
 	  return c;
@ -688,12 +801,13 @@ lex()
  /* The above loop should consume at most a backslash
     and some other character. */
  abort();
+  return END;	/* keeps pedantic compilers happy. */
 }

 /* Recursive descent parser for regular expressions. */

 static token tok;		/* Lookahead token. */
-static depth;			/* Current depth of a hypothetical stack
+static int depth;		/* Current depth of a hypothetical stack
 				   holding deferred productions.  This is
 				   used to determine the depth that will be
 				   required of the real stack later on in
@ -761,12 +875,6 @@ addtok(t)

   The parser builds a parse tree in postfix form in an array of tokens. */

-#if __STDC__
-static void regexp(int);
-#else
-static void regexp();
-#endif
-
 static void
 atom()
 {
@ -782,7 +890,7 @@ atom()
      tok = lex();
      regexp(0);
      if (tok != RPAREN)
-	dfaerror("Unbalanced (");
+	dfaerror(_("Unbalanced ("));
      tok = lex();
    }
  else
@ -792,6 +900,7 @@ atom()
 /* Return the number of tokens in the given subexpression. */
 static int
 nsubtoks(tindex)
+int tindex;
 {
  int ntoks1;

@ -902,7 +1011,7 @@ dfaparse(s, len, d)
  parens = 0;

  if (! syntax_bits_set)
-    dfaerror("No syntax specified");
+    dfaerror(_("No syntax specified"));

  tok = lex();
  depth = d->depth;
@ -910,7 +1019,7 @@ dfaparse(s, len, d)
  regexp(1);

  if (tok != END)
-    dfaerror("Unbalanced )");
+    dfaerror(_("Unbalanced )"));

  addtok(END - d->nregexps);
  addtok(CAT);
@ -949,7 +1058,7 @@ insert(p, s)
  position t1, t2;

  for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
-    ;
+    continue;
  if (i < s->nelem && p.index == s->elems[i].index)
    s->elems[i].constraint |= p.constraint;
  else
@ -1082,7 +1191,9 @@ state_index(d, s, newline, letter)
   that position with the elements of its follow labeled with an appropriate
   constraint.  Repeat exhaustively until no funny positions are left.
   S->elems must be large enough to hold the result. */
-void
+static void epsclosure PARAMS ((position_set *s, struct dfa *d));
+
+static void
 epsclosure(s, d)
     position_set *s;
     struct dfa *d;
@ -1484,7 +1595,7 @@ dfastate(s, d, trans)
  int state_newline;		/* New state on a newline transition. */
  int wants_letter;		/* New state wants to know letter context. */
  int state_letter;		/* New state on a letter transition. */
-  static initialized;		/* Flag for static initialization. */
+  static int initialized;	/* Flag for static initialization. */
  int i, j, k;

  /* Initialize the set of letters, if necessary. */
@ -1492,7 +1603,7 @@ dfastate(s, d, trans)
    {
      initialized = 1;
      for (i = 0; i < NOTCHAR; ++i)
-	if (ISALNUM(i))
+	if (IS_WORD_CONSTITUENT(i))
 	  setbit(i, letters);
      setbit('\n', newline);
    }
@ -1531,7 +1642,7 @@ dfastate(s, d, trans)

 	  /* If there are no characters left, there's no point in going on. */
 	  for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
-	    ;
+	    continue;
 	  if (j == CHARCLASS_INTS)
 	    continue;
 	}
@ -1549,7 +1660,7 @@ dfastate(s, d, trans)
 	     matches. */
 	  intersectf = 0;
 	  for (k = 0; k < CHARCLASS_INTS; ++k)
-	    (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
+	    (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
 	  if (! intersectf)
 	    continue;

@ -1560,8 +1671,8 @@ dfastate(s, d, trans)
 	      /* Even an optimizing compiler can't know this for sure. */
 	      int match = matches[k], label = labels[j][k];

-	      (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
-	      (matches[k] = match & ~label) ? matchesf = 1 : 0;
+	      (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
+	      (matches[k] = match & ~label) ? (matchesf = 1) : 0;
 	    }

 	  /* If there were leftovers, create a new group labeled with them. */
@ -1625,12 +1736,8 @@ dfastate(s, d, trans)
      else
 	state_letter = state;
      for (i = 0; i < NOTCHAR; ++i)
-	if (i == '\n')
-	  trans[i] = state_newline;
-	else if (ISALNUM(i))
-	  trans[i] = state_letter;
-	else
-	  trans[i] = state;
+	trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
+      trans['\n'] = state_newline;
    }
  else
    for (i = 0; i < NOTCHAR; ++i)
@ -1688,7 +1795,7 @@ dfastate(s, d, trans)

 	      if (c == '\n')
 		trans[c] = state_newline;
-	      else if (ISALNUM(c))
+	      else if (IS_WORD_CONSTITUENT(c))
 		trans[c] = state_letter;
 	      else if (c < NOTCHAR)
 		trans[c] = state;
@ -1822,12 +1929,12 @@ dfaexec(d, begin, end, newline, count, backref)
     int *count;
     int *backref;
 {
-  register s, s1, tmp;		/* Current state. */
+  register int s, s1, tmp;	/* Current state. */
  register unsigned char *p;	/* Current input character. */
-  register **trans, *t;		/* Copy of d->trans so it can be optimized
+  register int **trans, *t;	/* Copy of d->trans so it can be optimized
 				   into a register. */
-  static sbit[NOTCHAR];	/* Table for anding with d->success. */
-  static sbit_init;
+  static int sbit[NOTCHAR];	/* Table for anding with d->success. */
+  static int sbit_init;

  if (! sbit_init)
    {
@ -1835,12 +1942,8 @@ dfaexec(d, begin, end, newline, count, backref)

      sbit_init = 1;
      for (i = 0; i < NOTCHAR; ++i)
-	if (i == '\n')
-	  sbit[i] = 4;
-	else if (ISALNUM(i))
-	  sbit[i] = 2;
-	else
-	  sbit[i] = 1;
+	sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
+      sbit['\n'] = 4;
    }

  if (! d->tralloc)
@ -1853,30 +1956,21 @@ dfaexec(d, begin, end, newline, count, backref)

  for (;;)
    {
-      /* The dreaded inner loop. */
-      if ((t = trans[s]) != 0)
-	do
-	  {
-	    s1 = t[*p++];
-	    if (! (t = trans[s1]))
-	      goto last_was_s;
-	    s = t[*p++];
-	  }
-        while ((t = trans[s]) != 0);
-      goto last_was_s1;
-    last_was_s:
-      tmp = s, s = s1, s1 = tmp;
-    last_was_s1:
+      while ((t = trans[s]) != 0) { /* hand-optimized loop */
+	s1 = t[*p++];
+        if ((t = trans[s1]) == 0) {
+           tmp = s ; s = s1 ; s1 = tmp ; /* swap */
+           break;
+        }
+	s = t[*p++];
+      }

      if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
 	{
 	  if (d->success[s] & sbit[*p])
 	    {
 	      if (backref)
-		if (d->states[s].backref)
-		  *backref = 1;
-		else
-		  *backref = 0;
+		*backref = (d->states[s].backref != 0);
 	      return (char *) p;
 	    }

@ -1940,24 +2034,24 @@ dfacomp(s, len, d, searchflag)
 {
  if (case_fold)	/* dummy folding in service of dfamust() */
    {
-      char *copy;
+      char *lcopy;
      int i;

-      copy = malloc(len);
-      if (!copy)
-	dfaerror("out of memory");
+      lcopy = malloc(len);
+      if (!lcopy)
+	dfaerror(_("out of memory"));

      /* This is a kludge. */
      case_fold = 0;
      for (i = 0; i < len; ++i)
-	if (ISUPPER(s[i]))
-	  copy[i] = tolower((unsigned char)s[i]);
+	if (ISUPPER ((unsigned char) s[i]))
+	  lcopy[i] = tolower ((unsigned char) s[i]);
 	else
-	  copy[i] = s[i];
+	  lcopy[i] = s[i];

      dfainit(d);
-      dfaparse(copy, len, d);
-      free(copy);
+      dfaparse(lcopy, len, d);
+      free(lcopy);
      dfamust(d);
      d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
      case_fold = 1;
@ -1995,9 +2089,10 @@ dfafree(d)
      free((ptr_t) d->trans[i]);
    else if (d->fails[i])
      free((ptr_t) d->fails[i]);
-  free((ptr_t) d->realtrans);
-  free((ptr_t) d->fails);
-  free((ptr_t) d->newlines);
+  if (d->realtrans) free((ptr_t) d->realtrans);
+  if (d->fails) free((ptr_t) d->fails);
+  if (d->newlines) free((ptr_t) d->newlines);
+  if (d->success) free((ptr_t) d->success);
  for (dm = d->musts; dm; dm = ndm)
    {
      ndm = dm->next;
@ -2092,7 +2187,7 @@ icatalloc(old, new)
     char *new;
 {
  char *result;
-  int oldsize, newsize;
+  size_t oldsize, newsize;

  newsize = (new == NULL) ? 0 : strlen(new);
  if (old == NULL)
@ -2122,7 +2217,7 @@ istrstr(lookin, lookfor)
     char *lookfor;
 {
  char *cp;
-  int len;
+  size_t len;

  len = strlen(lookfor);
  for (cp = lookin; *cp != '\0'; ++cp)
@ -2158,7 +2253,7 @@ static char **
 enlist(cpp, new, len)
     char **cpp;
     char *new;
-     int len;
+     size_t len;
 {
  int i, j;

@ -2210,7 +2305,7 @@ comsubs(left, right)
  char **cpp;
  char *lcp;
  char *rcp;
-  int i, len;
+  size_t i, len;

  if (left == NULL || right == NULL)
    return NULL;
@ -2225,7 +2320,7 @@ comsubs(left, right)
      while (rcp != NULL)
 	{
 	  for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
-	    ;
+	    continue;
 	  if (i > len)
 	    len = i;
 	  rcp = index(rcp + 1, *lcp);
@ -2285,6 +2380,7 @@ inboth(left, right)
 	    }
 	  both = addlists(both, temp);
 	  freelist(temp);
+	  free(temp);
 	  if (both == NULL)
 	    return NULL;
 	}
@ -2321,8 +2417,9 @@ struct dfa *dfa;
  token t;
  static must must0;
  struct dfamust *dm;
+  static char empty_string[] = "";

-  result = "";
+  result = empty_string;
  exact = 0;
  musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts);
  if (musts == NULL)
@ -2509,7 +2606,7 @@ struct dfa *dfa;
 	      resetmust(mp);
 	      mp->is[0] = mp->left[0] = mp->right[0] = t;
 	      mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-	      mp->in = enlist(mp->in, mp->is, 1);
+	      mp->in = enlist(mp->in, mp->is, (size_t)1);
 	      if (mp->in == NULL)
 		goto done;
 	    }