libregex: implement \` and \' (begin-of-subj, end-of-subj)
These are GNU extensions, generally equivalent to ^ and $ except that the new syntax will not match beginning of line after the first in a multi-line expression or the end of line before absolute last in a multi-line expression.
This commit is contained in:
parent
7518fb346f
commit
ca53e5aedf
@ -109,7 +109,7 @@ static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatc
|
||||
static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
|
||||
static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
|
||||
static const char *walk(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, bool fast);
|
||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
|
||||
static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft, int sflags);
|
||||
#define MAX_RECURSION 100
|
||||
#define BOL (OUT-1)
|
||||
#define EOL (BOL-1)
|
||||
@ -119,6 +119,10 @@ static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_
|
||||
#define EOW (BOL-5)
|
||||
#define BADCHAR (BOL-6)
|
||||
#define NONCHAR(c) ((c) <= OUT)
|
||||
/* sflags */
|
||||
#define SBOS 0x0001
|
||||
#define SEOS 0x0002
|
||||
|
||||
#ifdef REDEBUG
|
||||
static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
@ -457,6 +461,8 @@ dissect(struct match *m,
|
||||
case OEOL:
|
||||
case OBOW:
|
||||
case OEOW:
|
||||
case OBOS:
|
||||
case OEOS:
|
||||
break;
|
||||
case OANY:
|
||||
case OANYOF:
|
||||
@ -657,6 +663,18 @@ backref(struct match *m,
|
||||
if (wc == BADCHAR || !CHIN(cs, wc))
|
||||
return(NULL);
|
||||
break;
|
||||
case OBOS:
|
||||
if (sp == m->beginp && (m->eflags & REG_NOTBOL) == 0)
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OEOS:
|
||||
if (sp == m->endp && (m->eflags & REG_NOTEOL) == 0)
|
||||
{ /* yes */ }
|
||||
else
|
||||
return(NULL);
|
||||
break;
|
||||
case OBOL:
|
||||
if ((sp == m->beginp && !(m->eflags®_NOTBOL)) ||
|
||||
(sp > m->offp && sp < m->endp &&
|
||||
@ -819,15 +837,16 @@ walk(struct match *m, const char *start, const char *stop, sopno startst,
|
||||
wint_t c;
|
||||
wint_t lastc; /* previous c */
|
||||
wint_t flagch;
|
||||
int i;
|
||||
int i, sflags;
|
||||
const char *matchp; /* last p at which a match ended */
|
||||
size_t clen;
|
||||
|
||||
sflags = 0;
|
||||
AT("slow", start, stop, startst, stopst);
|
||||
CLEAR(st);
|
||||
SET1(st, startst);
|
||||
SP("sstart", st, *p);
|
||||
st = step(m->g, startst, stopst, st, NOTHING, st);
|
||||
st = step(m->g, startst, stopst, st, NOTHING, st, sflags);
|
||||
if (fast)
|
||||
ASSIGN(fresh, st);
|
||||
matchp = NULL;
|
||||
@ -844,6 +863,7 @@ walk(struct match *m, const char *start, const char *stop, sopno startst,
|
||||
for (;;) {
|
||||
/* next character */
|
||||
lastc = c;
|
||||
sflags = 0;
|
||||
if (p == m->endp) {
|
||||
c = OUT;
|
||||
clen = 0;
|
||||
@ -866,9 +886,20 @@ walk(struct match *m, const char *start, const char *stop, sopno startst,
|
||||
flagch = (flagch == BOL) ? BOLEOL : EOL;
|
||||
i += m->g->neol;
|
||||
}
|
||||
if (lastc == OUT && (m->eflags & REG_NOTBOL) == 0) {
|
||||
sflags |= SBOS;
|
||||
/* Step one more for BOS. */
|
||||
i++;
|
||||
}
|
||||
if (c == OUT && (m->eflags & REG_NOTEOL) == 0) {
|
||||
sflags |= SEOS;
|
||||
/* Step one more for EOS. */
|
||||
i++;
|
||||
}
|
||||
if (i != 0) {
|
||||
for (; i > 0; i--)
|
||||
st = step(m->g, startst, stopst, st, flagch, st);
|
||||
st = step(m->g, startst, stopst, st, flagch, st,
|
||||
sflags);
|
||||
SP("sboleol", st, c);
|
||||
}
|
||||
|
||||
@ -882,7 +913,7 @@ walk(struct match *m, const char *start, const char *stop, sopno startst,
|
||||
flagch = EOW;
|
||||
}
|
||||
if (flagch == BOW || flagch == EOW) {
|
||||
st = step(m->g, startst, stopst, st, flagch, st);
|
||||
st = step(m->g, startst, stopst, st, flagch, st, sflags);
|
||||
SP("sboweow", st, c);
|
||||
}
|
||||
|
||||
@ -903,9 +934,10 @@ walk(struct match *m, const char *start, const char *stop, sopno startst,
|
||||
else
|
||||
ASSIGN(st, empty);
|
||||
assert(c != OUT);
|
||||
st = step(m->g, startst, stopst, tmp, c, st);
|
||||
st = step(m->g, startst, stopst, tmp, c, st, sflags);
|
||||
SP("saft", st, c);
|
||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
|
||||
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st, sflags),
|
||||
st));
|
||||
p += clen;
|
||||
}
|
||||
|
||||
@ -939,7 +971,8 @@ step(struct re_guts *g,
|
||||
sopno stop, /* state after stop state within strip */
|
||||
states bef, /* states reachable before */
|
||||
wint_t ch, /* character or NONCHAR code */
|
||||
states aft) /* states already known reachable after */
|
||||
states aft, /* states already known reachable after */
|
||||
int sflags) /* state flags */
|
||||
{
|
||||
cset *cs;
|
||||
sop s;
|
||||
@ -960,6 +993,14 @@ step(struct re_guts *g,
|
||||
if (ch == OPND(s))
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OBOS:
|
||||
if ((ch == BOL || ch == BOLEOL) && (sflags & SBOS) != 0)
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OEOS:
|
||||
if ((ch == EOL || ch == BOLEOL) && (sflags & SEOS) != 0)
|
||||
FWD(aft, bef, 1);
|
||||
break;
|
||||
case OBOL:
|
||||
if (ch == BOL || ch == BOLEOL)
|
||||
FWD(aft, bef, 1);
|
||||
|
@ -480,6 +480,12 @@ p_ere_exp(struct parse *p, struct branchc *bc)
|
||||
if (p->gnuext) {
|
||||
handled = 1;
|
||||
switch (wc) {
|
||||
case '`':
|
||||
EMIT(OBOS, 0);
|
||||
break;
|
||||
case '\'':
|
||||
EMIT(OEOS, 0);
|
||||
break;
|
||||
case 'W':
|
||||
case 'w':
|
||||
case 'S':
|
||||
@ -833,6 +839,12 @@ p_simp_re(struct parse *p, struct branchc *bc)
|
||||
if (p->gnuext) {
|
||||
handled = true;
|
||||
switch (c) {
|
||||
case BACKSL|'`':
|
||||
EMIT(OBOS, 0);
|
||||
break;
|
||||
case BACKSL|'\'':
|
||||
EMIT(OEOS, 0);
|
||||
break;
|
||||
case BACKSL|'W':
|
||||
case BACKSL|'w':
|
||||
case BACKSL|'S':
|
||||
@ -1878,6 +1890,8 @@ findmust(struct parse *p, struct re_guts *g)
|
||||
case OEOW:
|
||||
case OBOL:
|
||||
case OEOL:
|
||||
case OBOS:
|
||||
case OEOS:
|
||||
case O_QUEST:
|
||||
case O_CH:
|
||||
case OEND:
|
||||
|
@ -104,6 +104,8 @@ typedef unsigned long sopno;
|
||||
#define O_CH (18L<<OPSHIFT) /* end choice back to OOR1 */
|
||||
#define OBOW (19L<<OPSHIFT) /* begin word - */
|
||||
#define OEOW (20L<<OPSHIFT) /* end word - */
|
||||
#define OBOS (21L<<OPSHIFT) /* begin subj. - */
|
||||
#define OEOS (22L<<OPSHIFT) /* end subj. - */
|
||||
|
||||
/*
|
||||
* Structures for [] character-set representation.
|
||||
|
@ -25,8 +25,12 @@ a\|b\|c b abc a
|
||||
#\B[abc]\B & <abc> b
|
||||
#\B[abc]+ - <abc> bc
|
||||
#\B[abc]\+ b <abc> bc
|
||||
#\`abc\' & abc abc
|
||||
#\`.+\' - abNc abNc
|
||||
#\`.\+\' b abNc abNc
|
||||
#(\`a) - Na
|
||||
#(a\') - aN
|
||||
\`abc & abc abc
|
||||
abc\' & abc abc
|
||||
\`abc\' & abc abc
|
||||
\`.+\' - abNc abNc
|
||||
\`.\+\' b abNc abNc
|
||||
(\`a) - Na
|
||||
(a\`) - aN
|
||||
(a\') - aN
|
||||
(\'a) - Na
|
||||
|
Loading…
Reference in New Issue
Block a user