Trim down libcompat by removing <regexp.h>.
Erwin ran an exp-run with libcompat and <regexp.h> removed. It turns out the regexp library is almost entirely unused. In fact, it looks like it is sometimes used by accident. Because these function names clash with libc's <regex.h>, some application use both <regex.h> and libcompat, which means they link against the wrong regex library. This commit removes the regexp library and reimplements re_comp() and re_exec() using <regex.h>. It seems the grammar of the regular expressions accepted by these functions is similar to POSIX EREs. After this commit, 1 low-profile port will be broken, but the maintainer already has a patch for it sitting in his mailbox.
This commit is contained in:
parent
c9bb666937
commit
763ed73371
@ -14,6 +14,10 @@
|
||||
# The file is partitioned: OLD_FILES first, then OLD_LIBS and OLD_DIRS last.
|
||||
#
|
||||
|
||||
# 20100314: removal of regexp.h
|
||||
OLD_FILES+=usr/include/regexp.h
|
||||
OLD_FILES+=usr/share/man/man3/regexp.3.gz
|
||||
OLD_FILES+=usr/share/man/man3/regsub.3.gz
|
||||
# 20100303: actual removal of utmp.h
|
||||
OLD_FILES+=usr/include/utmp.h
|
||||
# 20100227: [ia64] removed <machine/sapicreg.h> and <machine/sapicvar.h>
|
||||
|
@ -17,7 +17,7 @@ INCS= a.out.h ar.h assert.h bitstring.h complex.h cpio.h _ctype.h ctype.h \
|
||||
ndbm.h netconfig.h \
|
||||
netdb.h nl_types.h nlist.h nss.h nsswitch.h paths.h \
|
||||
printf.h proc_service.h pthread.h \
|
||||
pthread_np.h pwd.h ranlib.h readpassphrase.h regex.h regexp.h \
|
||||
pthread_np.h pwd.h ranlib.h readpassphrase.h regex.h \
|
||||
res_update.h resolv.h runetype.h search.h semaphore.h setjmp.h \
|
||||
signal.h spawn.h stab.h \
|
||||
stdbool.h stddef.h stdio.h stdlib.h string.h stringlist.h \
|
||||
|
@ -1,70 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1986 by University of Toronto.
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley
|
||||
* by Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regexp.h 8.1 (Berkeley) 6/2/93
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _REGEXP_H_
|
||||
#define _REGEXP_H_
|
||||
|
||||
#ifdef __GNUC__
|
||||
#warning "this file includes <regexp.h> which is deprecated, use <regex.h> instead"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Definitions etc. for regexp(3) routines.
|
||||
*
|
||||
* Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
|
||||
* not the System V one.
|
||||
*/
|
||||
#define NSUBEXP 10
|
||||
typedef struct regexp {
|
||||
char *startp[NSUBEXP];
|
||||
char *endp[NSUBEXP];
|
||||
char regstart; /* Internal use only. */
|
||||
char reganch; /* Internal use only. */
|
||||
char *regmust; /* Internal use only. */
|
||||
int regmlen; /* Internal use only. */
|
||||
char program[1]; /* Unwarranted chumminess with compiler. */
|
||||
} regexp;
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
|
||||
__BEGIN_DECLS
|
||||
regexp *regcomp(const char *);
|
||||
int regexec(const regexp *, const char *);
|
||||
void regsub(const regexp *, const char *, char *);
|
||||
void regerror(const char *);
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_REGEXP_H_ */
|
@ -100,15 +100,10 @@ returns \-1 for an internal error.
|
||||
The
|
||||
.Fn re_comp
|
||||
function
|
||||
returns one of the following strings if an error occurs:
|
||||
.Bd -unfilled -offset indent
|
||||
No previous regular expression,
|
||||
Regular expression too long,
|
||||
unmatched \e(,
|
||||
missing ],
|
||||
too many \e(\e) pairs,
|
||||
unmatched \e).
|
||||
.Ed
|
||||
returns
|
||||
.Dq no previous regular expression
|
||||
or one of the strings generated by
|
||||
.Xr regerror 3 .
|
||||
.Sh SEE ALSO
|
||||
.Xr ed 1 ,
|
||||
.Xr egrep 1 ,
|
||||
|
@ -44,49 +44,49 @@ __FBSDID("$FreeBSD$");
|
||||
static char sccsid[] = "@(#)regex.c 5.1 (Berkeley) 3/29/92";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
#include <stddef.h>
|
||||
#include <regexp.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static regexp *re_regexp;
|
||||
static int re_goterr;
|
||||
static char *re_errstr;
|
||||
static regex_t re_regexp;
|
||||
static int re_gotexp;
|
||||
static char re_errstr[100];
|
||||
|
||||
char *
|
||||
re_comp(char *s)
|
||||
{
|
||||
if (s == NULL || *s == '\0') {
|
||||
if (re_regexp == NULL)
|
||||
return "no previous regular expression";
|
||||
return (NULL);
|
||||
}
|
||||
if (re_regexp)
|
||||
free(re_regexp);
|
||||
if (re_errstr)
|
||||
free(re_errstr);
|
||||
re_goterr = 0;
|
||||
re_regexp = regcomp(s);
|
||||
return (re_goterr ? re_errstr : NULL);
|
||||
}
|
||||
|
||||
int
|
||||
re_exec(char *s)
|
||||
re_comp(const char *s)
|
||||
{
|
||||
int rc;
|
||||
|
||||
re_goterr = 0;
|
||||
rc = regexec(re_regexp, s);
|
||||
return (re_goterr ? -1 : rc);
|
||||
if (s == NULL || *s == '\0') {
|
||||
if (!re_gotexp)
|
||||
return __DECONST(char *,
|
||||
"no previous regular expression");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
if (re_gotexp) {
|
||||
regfree(&re_regexp);
|
||||
re_gotexp = 0;
|
||||
}
|
||||
|
||||
rc = regcomp(&re_regexp, s, REG_EXTENDED);
|
||||
if (rc == 0) {
|
||||
re_gotexp = 1;
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
regerror(rc, &re_regexp, re_errstr, sizeof(re_errstr));
|
||||
re_errstr[sizeof(re_errstr) - 1] = '\0';
|
||||
return (re_errstr);
|
||||
}
|
||||
|
||||
void
|
||||
regerror(const char *s)
|
||||
int
|
||||
re_exec(const char *s)
|
||||
{
|
||||
re_goterr = 1;
|
||||
if (re_errstr)
|
||||
free(re_errstr);
|
||||
re_errstr = strdup(s);
|
||||
int rc;
|
||||
|
||||
if (!re_gotexp)
|
||||
return (-1);
|
||||
rc = regexec(&re_regexp, s, 0, NULL, 0);
|
||||
return (rc == 0 ? 1 : 0);
|
||||
}
|
@ -1,19 +1,15 @@
|
||||
# @(#)Makefile 8.1 (Berkeley) 6/4/93
|
||||
# $FreeBSD$
|
||||
|
||||
LIB=compat
|
||||
LIB= compat
|
||||
CFLAGS+=-DLIBC_SCCS -DSYSLIBC_SCCS -I${.CURDIR}/../libc/locale
|
||||
NO_PIC=
|
||||
|
||||
WARNS?= 0
|
||||
|
||||
.PATH: ${.CURDIR}/4.1/${MACHINE_ARCH} ${.CURDIR}/4.1 \
|
||||
${.CURDIR}/4.3/${MACHINE_ARCH} ${.CURDIR}/4.3 \
|
||||
${.CURDIR}/4.4/${MACHINE_ARCH} ${.CURDIR}/4.4 \
|
||||
${.CURDIR}/regexp
|
||||
.PATH: ${.CURDIR}/4.1 ${.CURDIR}/4.3 ${.CURDIR}/4.4
|
||||
|
||||
# compat 4.1 sources
|
||||
# XXX MISSING: tell.c
|
||||
SRCS+= ascftime.c cftime.c ftime.c getpw.c
|
||||
|
||||
MAN+= 4.1/ftime.3 4.1/getpw.3
|
||||
@ -22,27 +18,15 @@ MAN+= 4.1/cftime.3
|
||||
MLINKS+=cftime.3 ascftime.3
|
||||
|
||||
# compat 4.3 sources
|
||||
# XXX MISSING: ecvt.c gcvt.c sibuf.c sobuf.c strout.c
|
||||
SRCS+= cfree.c regex.c rexec.c
|
||||
SRCS+= cfree.c re_comp.c rexec.c
|
||||
|
||||
# XXX MISSING: ecvt.0
|
||||
MAN+= 4.3/cfree.3 4.3/re_comp.3 4.3/rexec.3
|
||||
|
||||
# XXX MISSING: ecvt.3, so can't MLINK
|
||||
#MLINKS+=ecvt.3 fcvt.3 ecvt.3 gcvt.3
|
||||
MLINKS+=re_comp.3 re_exec.3
|
||||
|
||||
# compat 4.4 sources
|
||||
SRCS+= cuserid.c
|
||||
|
||||
MAN+= 4.4/cuserid.3
|
||||
|
||||
# regexp sources
|
||||
SRCS+= regerror.c regexp.c regsub.c
|
||||
|
||||
MAN+= regexp/regexp.3
|
||||
|
||||
# XXX name clash with libc
|
||||
# MLINKS+=regexp.3 regcomp.3 regexp.3 regexec.3 regexp.3 regerror.3
|
||||
MLINKS+=regexp.3 regsub.3
|
||||
|
||||
.include <bsd.lib.mk>
|
||||
|
@ -1,22 +0,0 @@
|
||||
This entire subtree is copyright the University of Toronto.
|
||||
The following copyright notice applies to all files found here. None of
|
||||
these files contain AT&T proprietary source code.
|
||||
_____________________________________________________________________________
|
||||
|
||||
Copyright (c) 1986 by University of Toronto.
|
||||
Written by Henry Spencer. Not derived from licensed software.
|
||||
|
||||
Permission is granted to anyone to use this software for any
|
||||
purpose on any computer system, and to redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The author is not responsible for the consequences of use of
|
||||
this software, no matter how awful, even if they arise
|
||||
from defects in it.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either
|
||||
by explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not
|
||||
be misrepresented as being the original software.
|
||||
|
@ -1,84 +0,0 @@
|
||||
This is a nearly-public-domain reimplementation of the V8 regexp(3) package.
|
||||
It gives C programs the ability to use egrep-style regular expressions, and
|
||||
does it in a much cleaner fashion than the analogous routines in SysV.
|
||||
|
||||
Copyright (c) 1986 by University of Toronto.
|
||||
Written by Henry Spencer. Not derived from licensed software.
|
||||
|
||||
Permission is granted to anyone to use this software for any
|
||||
purpose on any computer system, and to redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The author is not responsible for the consequences of use of
|
||||
this software, no matter how awful, even if they arise
|
||||
from defects in it.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either
|
||||
by explicit claim or by omission.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not
|
||||
be misrepresented as being the original software.
|
||||
|
||||
Barring a couple of small items in the BUGS list, this implementation is
|
||||
believed 100% compatible with V8. It should even be binary-compatible,
|
||||
sort of, since the only fields in a "struct regexp" that other people have
|
||||
any business touching are declared in exactly the same way at the same
|
||||
location in the struct (the beginning).
|
||||
|
||||
This implementation is *NOT* AT&T/Bell code, and is not derived from licensed
|
||||
software. Even though U of T is a V8 licensee. This software is based on
|
||||
a V8 manual page sent to me by Dennis Ritchie (the manual page enclosed
|
||||
here is a complete rewrite and hence is not covered by AT&T copyright).
|
||||
The software was nearly complete at the time of arrival of our V8 tape.
|
||||
I haven't even looked at V8 yet, although a friend elsewhere at U of T has
|
||||
been kind enough to run a few test programs using the V8 regexp(3) to resolve
|
||||
a few fine points. I admit to some familiarity with regular-expression
|
||||
implementations of the past, but the only one that this code traces any
|
||||
ancestry to is the one published in Kernighan & Plauger (from which this
|
||||
one draws ideas but not code).
|
||||
|
||||
Simplistically: put this stuff into a source directory, copy regexp.h into
|
||||
/usr/include, inspect Makefile for compilation options that need changing
|
||||
to suit your local environment, and then do "make r". This compiles the
|
||||
regexp(3) functions, compiles a test program, and runs a large set of
|
||||
regression tests. If there are no complaints, then put regexp.o, regsub.o,
|
||||
and regerror.o into your C library, and regexp.3 into your manual-pages
|
||||
directory.
|
||||
|
||||
Note that if you don't put regexp.h into /usr/include *before* compiling,
|
||||
you'll have to add "-I." to CFLAGS before compiling.
|
||||
|
||||
The files are:
|
||||
|
||||
Makefile instructions to make everything
|
||||
regexp.3 manual page
|
||||
regexp.h header file, for /usr/include
|
||||
regexp.c source for regcomp() and regexec()
|
||||
regsub.c source for regsub()
|
||||
regerror.c source for default regerror()
|
||||
regmagic.h internal header file
|
||||
try.c source for test program
|
||||
timer.c source for timing program
|
||||
tests test list for try and timer
|
||||
|
||||
This implementation uses nondeterministic automata rather than the
|
||||
deterministic ones found in some other implementations, which makes it
|
||||
simpler, smaller, and faster at compiling regular expressions, but slower
|
||||
at executing them. In theory, anyway. This implementation does employ
|
||||
some special-case optimizations to make the simpler cases (which do make
|
||||
up the bulk of regular expressions actually used) run quickly. In general,
|
||||
if you want blazing speed you're in the wrong place. Replacing the insides
|
||||
of egrep with this stuff is probably a mistake; if you want your own egrep
|
||||
you're going to have to do a lot more work. But if you want to use regular
|
||||
expressions a little bit in something else, you're in luck. Note that many
|
||||
existing text editors use nondeterministic regular-expression implementations,
|
||||
so you're in good company.
|
||||
|
||||
This stuff should be pretty portable, given appropriate option settings.
|
||||
If your chars have less than 8 bits, you're going to have to change the
|
||||
internal representation of the automaton, although knowledge of the details
|
||||
of this is fairly localized. There are no "reserved" char values except for
|
||||
NUL, and no special significance is attached to the top bit of chars.
|
||||
The string(3) functions are used a fair bit, on the grounds that they are
|
||||
probably faster than coding the operations in line. Some attempts at code
|
||||
tuning have been made, but this is invariably a bit machine-specific.
|
@ -1,18 +0,0 @@
|
||||
#include <regexp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void
|
||||
regerror(s)
|
||||
const char *s;
|
||||
{
|
||||
#ifdef ERRAVAIL
|
||||
error("regexp: %s", s);
|
||||
#else
|
||||
/*
|
||||
fprintf(stderr, "regexp(3): %s\n", s);
|
||||
exit(1);
|
||||
*/
|
||||
return; /* let std. egrep handle errors */
|
||||
#endif
|
||||
/* NOTREACHED */
|
||||
}
|
@ -1,319 +0,0 @@
|
||||
.\" Copyright (c) 1991, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)regexp.3 8.1 (Berkeley) 6/4/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd June 4, 1993
|
||||
.Dt REGEXP 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm regcomp ,
|
||||
.Nm regexec ,
|
||||
.Nm regsub ,
|
||||
.Nm regerror
|
||||
.Nd regular expression handlers
|
||||
.Sh LIBRARY
|
||||
.Lb libcompat
|
||||
.Sh SYNOPSIS
|
||||
.In regexp.h
|
||||
.Ft regexp *
|
||||
.Fn regcomp "const char *exp"
|
||||
.Ft int
|
||||
.Fn regexec "const regexp *prog" "const char *string"
|
||||
.Ft void
|
||||
.Fn regsub "const regexp *prog" "const char *source" "char *dest"
|
||||
.Sh DESCRIPTION
|
||||
.Bf Sy
|
||||
This interface is made obsolete by
|
||||
.Xr regex 3 .
|
||||
.Ef
|
||||
.Pp
|
||||
The
|
||||
.Fn regcomp ,
|
||||
.Fn regexec ,
|
||||
.Fn regsub ,
|
||||
and
|
||||
.Fn regerror
|
||||
functions
|
||||
implement
|
||||
.Xr egrep 1 Ns -style
|
||||
regular expressions and supporting facilities.
|
||||
.Pp
|
||||
The
|
||||
.Fn regcomp
|
||||
function
|
||||
compiles a regular expression into a structure of type
|
||||
.Vt regexp ,
|
||||
and returns a pointer to it.
|
||||
The space has been allocated using
|
||||
.Xr malloc 3
|
||||
and may be released by
|
||||
.Xr free 3 .
|
||||
.Pp
|
||||
The
|
||||
.Fn regexec
|
||||
function
|
||||
matches a
|
||||
.Dv NUL Ns -terminated
|
||||
.Fa string
|
||||
against the compiled regular expression
|
||||
in
|
||||
.Fa prog .
|
||||
It returns 1 for success and 0 for failure, and adjusts the contents of
|
||||
.Fa prog Ns 's
|
||||
.Em startp
|
||||
and
|
||||
.Em endp
|
||||
(see below) accordingly.
|
||||
.Pp
|
||||
The members of a
|
||||
.Vt regexp
|
||||
structure include at least the following (not necessarily in order):
|
||||
.Bd -literal -offset indent
|
||||
char *startp[NSUBEXP];
|
||||
char *endp[NSUBEXP];
|
||||
.Ed
|
||||
.Pp
|
||||
where
|
||||
.Dv NSUBEXP
|
||||
is defined (as 10) in the header file.
|
||||
Once a successful
|
||||
.Fn regexec
|
||||
has been done using the
|
||||
.Fn regexp ,
|
||||
each
|
||||
.Em startp Ns - Em endp
|
||||
pair describes one substring
|
||||
within the
|
||||
.Fa string ,
|
||||
with the
|
||||
.Em startp
|
||||
pointing to the first character of the substring and
|
||||
the
|
||||
.Em endp
|
||||
pointing to the first character following the substring.
|
||||
The 0th substring is the substring of
|
||||
.Fa string
|
||||
that matched the whole
|
||||
regular expression.
|
||||
The others are those substrings that matched parenthesized expressions
|
||||
within the regular expression, with parenthesized expressions numbered
|
||||
in left-to-right order of their opening parentheses.
|
||||
.Pp
|
||||
The
|
||||
.Fn regsub
|
||||
function
|
||||
copies
|
||||
.Fa source
|
||||
to
|
||||
.Fa dest ,
|
||||
making substitutions according to the
|
||||
most recent
|
||||
.Fn regexec
|
||||
performed using
|
||||
.Fa prog .
|
||||
Each instance of `&' in
|
||||
.Fa source
|
||||
is replaced by the substring
|
||||
indicated by
|
||||
.Em startp Ns Bq
|
||||
and
|
||||
.Em endp Ns Bq .
|
||||
Each instance of
|
||||
.Sq \e Ns Em n ,
|
||||
where
|
||||
.Em n
|
||||
is a digit, is replaced by
|
||||
the substring indicated by
|
||||
.Em startp Ns Bq Em n
|
||||
and
|
||||
.Em endp Ns Bq Em n .
|
||||
To get a literal `&' or
|
||||
.Sq \e Ns Em n
|
||||
into
|
||||
.Fa dest ,
|
||||
prefix it with `\e';
|
||||
to get a literal `\e' preceding `&' or
|
||||
.Sq \e Ns Em n ,
|
||||
prefix it with
|
||||
another `\e'.
|
||||
.Pp
|
||||
The
|
||||
.Fn regerror
|
||||
function
|
||||
is called whenever an error is detected in
|
||||
.Fn regcomp ,
|
||||
.Fn regexec ,
|
||||
or
|
||||
.Fn regsub .
|
||||
The default
|
||||
.Fn regerror
|
||||
writes the string
|
||||
.Fa msg ,
|
||||
with a suitable indicator of origin,
|
||||
on the standard
|
||||
error output
|
||||
and invokes
|
||||
.Xr exit 3 .
|
||||
The
|
||||
.Fn regerror
|
||||
function
|
||||
can be replaced by the user if other actions are desirable.
|
||||
.Sh REGULAR EXPRESSION SYNTAX
|
||||
A regular expression is zero or more
|
||||
.Em branches ,
|
||||
separated by `|'.
|
||||
It matches anything that matches one of the branches.
|
||||
.Pp
|
||||
A branch is zero or more
|
||||
.Em pieces ,
|
||||
concatenated.
|
||||
It matches a match for the first, followed by a match for the second, etc.
|
||||
.Pp
|
||||
A piece is an
|
||||
.Em atom
|
||||
possibly followed by `*', `+', or `?'.
|
||||
An atom followed by `*' matches a sequence of 0 or more matches of the atom.
|
||||
An atom followed by `+' matches a sequence of 1 or more matches of the atom.
|
||||
An atom followed by `?' matches a match of the atom, or the null string.
|
||||
.Pp
|
||||
An atom is a regular expression in parentheses (matching a match for the
|
||||
regular expression), a
|
||||
.Em range
|
||||
(see below), `.'
|
||||
(matching any single character), `^' (matching the null string at the
|
||||
beginning of the input string), `$' (matching the null string at the
|
||||
end of the input string), a `\e' followed by a single character (matching
|
||||
that character), or a single character with no other significance
|
||||
(matching that character).
|
||||
.Pp
|
||||
A
|
||||
.Em range
|
||||
is a sequence of characters enclosed in `[]'.
|
||||
It normally matches any single character from the sequence.
|
||||
If the sequence begins with `^',
|
||||
it matches any single character
|
||||
.Em not
|
||||
from the rest of the sequence.
|
||||
If two characters in the sequence are separated by `\-', this is shorthand
|
||||
for the full list of
|
||||
.Tn ASCII
|
||||
characters between them
|
||||
(e.g.\& `[0-9]' matches any decimal digit).
|
||||
To include a literal `]' in the sequence, make it the first character
|
||||
(following a possible `^').
|
||||
To include a literal `\-', make it the first or last character.
|
||||
.Sh AMBIGUITY
|
||||
If a regular expression could match two different parts of the input string,
|
||||
it will match the one which begins earliest.
|
||||
If both begin in the same place but match different lengths, or match
|
||||
the same length in different ways, life gets messier, as follows.
|
||||
.Pp
|
||||
In general, the possibilities in a list of branches are considered in
|
||||
left-to-right order, the possibilities for `*', `+', and `?' are
|
||||
considered longest-first, nested constructs are considered from the
|
||||
outermost in, and concatenated constructs are considered leftmost-first.
|
||||
The match that will be chosen is the one that uses the earliest
|
||||
possibility in the first choice that has to be made.
|
||||
If there is more than one choice, the next will be made in the same manner
|
||||
(earliest possibility) subject to the decision on the first choice.
|
||||
And so forth.
|
||||
.Pp
|
||||
For example,
|
||||
.Sq Li (ab|a)b*c
|
||||
could match
|
||||
`abc' in one of two ways.
|
||||
The first choice is between `ab' and `a'; since `ab' is earlier, and does
|
||||
lead to a successful overall match, it is chosen.
|
||||
Since the `b' is already spoken for,
|
||||
the `b*' must match its last possibility\(emthe empty string\(emsince
|
||||
it must respect the earlier choice.
|
||||
.Pp
|
||||
In the particular case where no `|'s are present and there is only one
|
||||
`*', `+', or `?', the net effect is that the longest possible
|
||||
match will be chosen.
|
||||
So
|
||||
.Sq Li ab* ,
|
||||
presented with `xabbbby', will match `abbbb'.
|
||||
Note that if
|
||||
.Sq Li ab* ,
|
||||
is tried against `xabyabbbz', it
|
||||
will match `ab' just after `x', due to the begins-earliest rule.
|
||||
(In effect, the decision on where to start the match is the first choice
|
||||
to be made, hence subsequent choices must respect it even if this leads them
|
||||
to less-preferred alternatives.)
|
||||
.Sh RETURN VALUES
|
||||
The
|
||||
.Fn regcomp
|
||||
function
|
||||
returns
|
||||
.Dv NULL
|
||||
for a failure
|
||||
.Pf ( Fn regerror
|
||||
permitting),
|
||||
where failures are syntax errors, exceeding implementation limits,
|
||||
or applying `+' or `*' to a possibly-null operand.
|
||||
.Sh SEE ALSO
|
||||
.Xr ed 1 ,
|
||||
.Xr egrep 1 ,
|
||||
.Xr ex 1 ,
|
||||
.Xr expr 1 ,
|
||||
.Xr fgrep 1 ,
|
||||
.Xr grep 1 ,
|
||||
.Xr regex 3
|
||||
.Sh HISTORY
|
||||
Both code and manual page for
|
||||
.Fn regcomp ,
|
||||
.Fn regexec ,
|
||||
.Fn regsub ,
|
||||
and
|
||||
.Fn regerror
|
||||
were written at the University of Toronto
|
||||
and appeared in
|
||||
.Bx 4.3 tahoe .
|
||||
They are intended to be compatible with the Bell V8
|
||||
.Xr regexp 3 ,
|
||||
but are not derived from Bell code.
|
||||
.Sh BUGS
|
||||
Empty branches and empty regular expressions are not portable to V8.
|
||||
.Pp
|
||||
The restriction against
|
||||
applying `*' or `+' to a possibly-null operand is an artifact of the
|
||||
simplistic implementation.
|
||||
.Pp
|
||||
Does not support
|
||||
.Xr egrep 1 Ns 's
|
||||
newline-separated branches;
|
||||
neither does the V8
|
||||
.Xr regexp 3 ,
|
||||
though.
|
||||
.Pp
|
||||
Due to emphasis on
|
||||
compactness and simplicity,
|
||||
it is not strikingly fast.
|
||||
It does give special attention to handling simple cases quickly.
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +0,0 @@
|
||||
/*
|
||||
* The first byte of the regexp internal "program" is actually this magic
|
||||
* number; the start node begins in the second byte.
|
||||
*/
|
||||
#define MAGIC 0234
|
@ -1,85 +0,0 @@
|
||||
/*
|
||||
* regsub
|
||||
*
|
||||
* Copyright (c) 1986 by University of Toronto.
|
||||
* Written by Henry Spencer. Not derived from licensed software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any
|
||||
* purpose on any computer system, and to redistribute it freely,
|
||||
* subject to the following restrictions:
|
||||
*
|
||||
* 1. The author is not responsible for the consequences of use of
|
||||
* this software, no matter how awful, even if they arise
|
||||
* from defects in it.
|
||||
*
|
||||
* 2. The origin of this software must not be misrepresented, either
|
||||
* by explicit claim or by omission.
|
||||
*
|
||||
* 3. Altered versions must be plainly marked as such, and must not
|
||||
* be misrepresented as being the original software.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <regexp.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "regmagic.h"
|
||||
|
||||
#ifndef CHARBITS
|
||||
#define UCHARAT(p) ((int)*(unsigned char *)(p))
|
||||
#else
|
||||
#define UCHARAT(p) ((int)*(p)&CHARBITS)
|
||||
#endif
|
||||
|
||||
/*
|
||||
- regsub - perform substitutions after a regexp match
|
||||
*/
|
||||
void
|
||||
regsub(prog, source, dest)
|
||||
const regexp *prog;
|
||||
const char *source;
|
||||
char *dest;
|
||||
{
|
||||
char *src;
|
||||
char *dst;
|
||||
char c;
|
||||
int no;
|
||||
int len;
|
||||
extern char *strncpy();
|
||||
|
||||
if (prog == NULL || source == NULL || dest == NULL) {
|
||||
regerror("NULL parm to regsub");
|
||||
return;
|
||||
}
|
||||
if (UCHARAT(prog->program) != MAGIC) {
|
||||
regerror("damaged regexp fed to regsub");
|
||||
return;
|
||||
}
|
||||
|
||||
src = (char *)source;
|
||||
dst = dest;
|
||||
while ((c = *src++) != '\0') {
|
||||
if (c == '&')
|
||||
no = 0;
|
||||
else if (c == '\\' && '0' <= *src && *src <= '9')
|
||||
no = *src++ - '0';
|
||||
else
|
||||
no = -1;
|
||||
if (no < 0) { /* Ordinary character. */
|
||||
if (c == '\\' && (*src == '\\' || *src == '&'))
|
||||
c = *src++;
|
||||
*dst++ = c;
|
||||
} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
|
||||
len = prog->endp[no] - prog->startp[no];
|
||||
(void) strncpy(dst, prog->startp[no], len);
|
||||
dst += len;
|
||||
if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
|
||||
regerror("damaged match string");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
*dst++ = '\0';
|
||||
}
|
Loading…
Reference in New Issue
Block a user