regex(3): Handle invalid {} constructs consistently and adjust tests
Currently, regex(3) exhibits the following wrong behavior as demonstrated with sed: - echo "a{1,2,3}b" | sed -r "s/{/_/" (1) - echo "a{1,2,3}b" | sed "s/\}/_/" (2) - echo "a{1,2,3}b" | sed -r "s/{}/_/" (3) Cases (1) and (3) should throw errors but they actually succeed, and (2) throws an error when it should match the literal '}'. The correct behavior was decided by comparing to the behavior with the equivalent BRE (1)(3) or ERE (2) and consulting POSIX, along with some reasonable evaluation. Tests were also adjusted/added accordingly. PR: 166861 Reviewed by: emaste, ngie, pfg Approved by: emaste (mentor) MFC after: never Differential Revision: https://reviews.freebsd.org/D10315
This commit is contained in:
parent
c20505134f
commit
39d016e70f
@ -1,9 +1,24 @@
|
||||
# the dreaded bounded repetitions
|
||||
{ & { {
|
||||
{abc & {abc {abc
|
||||
# Begin FreeBSD
|
||||
{ C BADRPT
|
||||
{ b { {
|
||||
\{ - { {
|
||||
\{ bC BADRPT
|
||||
{} C BADRPT
|
||||
{} b {} {}
|
||||
\{\} - {} {}
|
||||
\{\} bC BADRPT
|
||||
} & } }
|
||||
\} & } }
|
||||
{abc b {abc {abc
|
||||
{abc C BADRPT
|
||||
# End FreeBSD
|
||||
{1 C BADRPT
|
||||
{1} C BADRPT
|
||||
a{b & a{b a{b
|
||||
# Begin FreeBSD
|
||||
a{b b a{b a{b
|
||||
a{b C BADRPT
|
||||
# End FreeBSD
|
||||
a{1}b - ab ab
|
||||
a\{1\}b b ab ab
|
||||
a{1,}b - ab ab
|
||||
@ -16,9 +31,15 @@ a{1a C EBRACE
|
||||
a\{1a bC EBRACE
|
||||
a{1a} C BADBR
|
||||
a\{1a\} bC BADBR
|
||||
a{,2} - a{,2} a{,2}
|
||||
# Begin FreeBSD
|
||||
a{,2} b a{,2} a{,2}
|
||||
a{,2} C BADBR
|
||||
# End FreeBSD
|
||||
a\{,2\} bC BADBR
|
||||
a{,} - a{,} a{,}
|
||||
# Begin FreeBSD
|
||||
a{,} b a{,} a{,}
|
||||
a{,} C BADBR
|
||||
# End FreeBSD
|
||||
a\{,\} bC BADBR
|
||||
a{1,x} C BADBR
|
||||
a\{1,x\} bC BADBR
|
||||
|
@ -15,7 +15,10 @@ a?{1} C BADRPT
|
||||
a{1}* C BADRPT
|
||||
a{1}+ C BADRPT
|
||||
a{1}? C BADRPT
|
||||
a*{b} - a{b} a{b}
|
||||
# Begin FreeBSD
|
||||
a*{b} b a{b} a{b}
|
||||
a*{b} C BADRPT
|
||||
# End FreeBSD
|
||||
a\{1\}\{1\} bC BADRPT
|
||||
a*\{1\} bC BADRPT
|
||||
a\{1\}* bC BADRPT
|
||||
|
@ -412,6 +412,7 @@ p_ere_exp(struct parse *p, struct branchc *bc)
|
||||
case '*':
|
||||
case '+':
|
||||
case '?':
|
||||
case '{':
|
||||
SETERROR(REG_BADRPT);
|
||||
break;
|
||||
case '.':
|
||||
@ -438,9 +439,6 @@ p_ere_exp(struct parse *p, struct branchc *bc)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '{': /* okay as ordinary except if digit follows */
|
||||
(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
|
||||
/* FALLTHROUGH */
|
||||
default:
|
||||
if (p->error != 0)
|
||||
return (false);
|
||||
@ -454,9 +452,11 @@ p_ere_exp(struct parse *p, struct branchc *bc)
|
||||
return (false);
|
||||
c = PEEK();
|
||||
/* we call { a repetition if followed by a digit */
|
||||
if (!( c == '*' || c == '+' || c == '?' ||
|
||||
(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
|
||||
if (!( c == '*' || c == '+' || c == '?' || c == '{'))
|
||||
return (false); /* no repetition, we're done */
|
||||
else if (c == '{')
|
||||
(void)REQUIRE(MORE2() && \
|
||||
(isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
|
||||
NEXT();
|
||||
|
||||
(void)REQUIRE(!wascaret, REG_BADRPT);
|
||||
@ -757,7 +757,6 @@ p_simp_re(struct parse *p, struct branchc *bc)
|
||||
(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
|
||||
break;
|
||||
case BACKSL|')': /* should not get here -- must be user */
|
||||
case BACKSL|'}':
|
||||
SETERROR(REG_EPAREN);
|
||||
break;
|
||||
case BACKSL|'1':
|
||||
|
Loading…
Reference in New Issue
Block a user