314 lines
7.0 KiB
C
Raw Normal View History

/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
* Commodore Business Machines.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef lint
static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93";
#endif /* not lint */
/*
* regular expression parser
*
* external functions and return values are:
* rxp_compile(s)
* TRUE success
* FALSE parse failure; error message will be in char rxperr[]
* metas are:
* {...} optional pattern, equialent to [...|]
* | alternate pattern
* [...] pattern delimiters
*
* rxp_match(s)
* TRUE string s matches compiled pattern
* FALSE match failure or regexp error
*
* rxp_expand()
* char * reverse-engineered regular expression string
* NULL regexp error
*/
#include <stdio.h>
#include <ctype.h>
#include "quiz.h"
/* regexp tokens, arg */
#define LIT (-1) /* literal character, char */
#define SOT (-2) /* start text anchor, - */
#define EOT (-3) /* end text anchor, - */
#define GRP_S (-4) /* start alternate grp, ptr_to_end */
#define GRP_E (-5) /* end group, - */
#define ALT_S (-6) /* alternate starts, ptr_to_next */
#define ALT_E (-7) /* alternate ends, - */
#define END (-8) /* end of regexp, - */
typedef short Rxp_t; /* type for regexp tokens */
static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
char rxperr[128]; /* parser error message */
static int rxp__compile __P((char *, int));
static char *rxp__expand __P((int));
static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
int
rxp_compile(s)
register char * s;
{
return (rxp__compile(s, TRUE));
}
static int
rxp__compile(s, first)
register char *s;
int first;
{
static Rxp_t *rp;
static char *sp;
Rxp_t *grp_ptr;
Rxp_t *alt_ptr;
int esc, err;
esc = 0;
if (first) {
rp = rxpbuf;
sp = s;
*rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
*rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
*rp++ = 0;
}
*rp++ = ALT_S;
alt_ptr = rp;
*rp++ = 0;
for (; *sp; ++sp) {
if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
(void)snprintf(rxperr, sizeof(rxperr),
"regular expression too long %s", s);
return (FALSE);
}
if (*sp == ':' && !esc)
break;
if (esc) {
*rp++ = LIT;
*rp++ = *sp;
esc = 0;
}
else switch (*sp) {
case '\\':
esc = 1;
break;
case '{':
case '[':
*rp++ = GRP_S;
grp_ptr = rp;
*rp++ = 0;
sp++;
if ((err = rxp__compile(s, FALSE)) != TRUE)
return (err);
*rp++ = GRP_E;
*grp_ptr = rp - rxpbuf;
break;
case '}':
case ']':
case '|':
*rp++ = ALT_E;
*alt_ptr = rp - rxpbuf;
if (*sp != ']') {
*rp++ = ALT_S;
alt_ptr = rp;
*rp++ = 0;
}
if (*sp != '|') {
if (*sp != ']') {
*rp++ = ALT_E;
*alt_ptr = rp - rxpbuf;
}
if (first) {
(void)snprintf(rxperr, sizeof(rxperr),
"unmatched alternator in regexp %s",
s);
return (FALSE);
}
return (TRUE);
}
break;
default:
*rp++ = LIT;
*rp++ = *sp;
esc = 0;
break;
}
}
if (!first) {
(void)snprintf(rxperr, sizeof(rxperr),
"unmatched alternator in regexp %s", s);
return (FALSE);
}
*rp++ = ALT_E;
*alt_ptr = rp - rxpbuf;
*rp++ = GRP_E;
*(rxpbuf + 2) = rp - rxpbuf;
*rp++ = EOT;
*rp = END;
return (TRUE);
}
/*
* match string against compiled regular expression
*/
int
rxp_match(s)
register char * s;
{
return (rxp__match(s, TRUE, NULL, NULL, NULL));
}
static int
rxp__match(s, first, j_succ, j_fail, sp_fail)
char *s;
int first;
Rxp_t *j_succ; /* jump here on successful alt match */
Rxp_t *j_fail; /* jump here on failed match */
char *sp_fail; /* reset sp to here on failed match */
{
static Rxp_t *rp;
static char *sp;
register int ch;
Rxp_t *grp_end;
int err;
if (first) {
rp = rxpbuf;
sp = s;
}
while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
switch(*rp) {
case LIT:
rp++;
ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
if (ch != *sp++) {
rp = j_fail;
sp = sp_fail;
return (TRUE);
}
rp++;
break;
case SOT:
if (sp != s)
return (FALSE);
rp++;
break;
case EOT:
if (*sp != 0)
return (FALSE);
rp++;
break;
case GRP_S:
rp++;
grp_end = rxpbuf + *rp++;
break;
case ALT_S:
rp++;
if ((err = rxp__match(sp,
FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
return (err);
break;
case ALT_E:
rp = j_succ;
return (TRUE);
case GRP_E:
default:
return (FALSE);
}
return (*rp != END ? FALSE : TRUE);
}
/*
* Reverse engineer the regular expression, by picking first of all alternates.
*/
char *
rxp_expand()
{
return (rxp__expand(TRUE));
}
static char *
rxp__expand(first)
int first;
{
static char buf[RXP_LINE_SZ/2];
static Rxp_t *rp;
static char *bp;
Rxp_t *grp_ptr;
char *err;
if (first) {
rp = rxpbuf;
bp = buf;
}
while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
switch(*rp) {
case LIT:
rp++;
*bp++ = *rp++;
break;
case GRP_S:
rp++;
grp_ptr = rxpbuf + *rp;
rp++;
if ((err = rxp__expand(FALSE)) == NULL)
return (err);
rp = grp_ptr;
break;
case ALT_E:
return (buf);
case ALT_S:
rp++;
/* FALLTHROUGH */
case SOT:
case EOT:
case GRP_E:
rp++;
break;
default:
return (NULL);
}
if (first) {
if (*rp != END)
return (NULL);
*bp = '\0';
}
return (buf);
}