regex(3): Handle invalid {} constructs consistently and adjust tests

Currently, regex(3) exhibits the following wrong behavior as demonstrated
with sed:

 - echo "a{1,2,3}b" | sed -r "s/{/_/"     (1)
 - echo "a{1,2,3}b" | sed "s/\}/_/"       (2)
 - echo "a{1,2,3}b" | sed -r "s/{}/_/"    (3)

Cases (1) and (3) should throw errors but they actually succeed, and (2)
throws an error when it should match the literal '}'. The correct behavior
was decided by comparing to the behavior with the equivalent BRE (1)(3) or
ERE (2) and consulting POSIX, along with some reasonable evaluation.

Tests were also adjusted/added accordingly.

PR:		166861
Reviewed by:	emaste, ngie, pfg
Approved by:	emaste (mentor)
MFC after:	never
Differential Revision:	https://reviews.freebsd.org/D10315
This commit is contained in:
kevans 2017-08-08 04:10:46 +00:00
parent c20505134f
commit 39d016e70f
3 changed files with 35 additions and 12 deletions

View File

@ -1,9 +1,24 @@
# the dreaded bounded repetitions
{ & { {
{abc & {abc {abc
# Begin FreeBSD
{ C BADRPT
{ b { {
\{ - { {
\{ bC BADRPT
{} C BADRPT
{} b {} {}
\{\} - {} {}
\{\} bC BADRPT
} & } }
\} & } }
{abc b {abc {abc
{abc C BADRPT
# End FreeBSD
{1 C BADRPT
{1} C BADRPT
a{b & a{b a{b
# Begin FreeBSD
a{b b a{b a{b
a{b C BADRPT
# End FreeBSD
a{1}b - ab ab
a\{1\}b b ab ab
a{1,}b - ab ab
@ -16,9 +31,15 @@ a{1a C EBRACE
a\{1a bC EBRACE
a{1a} C BADBR
a\{1a\} bC BADBR
a{,2} - a{,2} a{,2}
# Begin FreeBSD
a{,2} b a{,2} a{,2}
a{,2} C BADBR
# End FreeBSD
a\{,2\} bC BADBR
a{,} - a{,} a{,}
# Begin FreeBSD
a{,} b a{,} a{,}
a{,} C BADBR
# End FreeBSD
a\{,\} bC BADBR
a{1,x} C BADBR
a\{1,x\} bC BADBR

View File

@ -15,7 +15,10 @@ a?{1} C BADRPT
a{1}* C BADRPT
a{1}+ C BADRPT
a{1}? C BADRPT
a*{b} - a{b} a{b}
# Begin FreeBSD
a*{b} b a{b} a{b}
a*{b} C BADRPT
# End FreeBSD
a\{1\}\{1\} bC BADRPT
a*\{1\} bC BADRPT
a\{1\}* bC BADRPT

View File

@ -412,6 +412,7 @@ p_ere_exp(struct parse *p, struct branchc *bc)
case '*':
case '+':
case '?':
case '{':
SETERROR(REG_BADRPT);
break;
case '.':
@ -438,9 +439,6 @@ p_ere_exp(struct parse *p, struct branchc *bc)
break;
}
break;
case '{': /* okay as ordinary except if digit follows */
(void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
/* FALLTHROUGH */
default:
if (p->error != 0)
return (false);
@ -454,9 +452,11 @@ p_ere_exp(struct parse *p, struct branchc *bc)
return (false);
c = PEEK();
/* we call { a repetition if followed by a digit */
if (!( c == '*' || c == '+' || c == '?' ||
(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
if (!( c == '*' || c == '+' || c == '?' || c == '{'))
return (false); /* no repetition, we're done */
else if (c == '{')
(void)REQUIRE(MORE2() && \
(isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
NEXT();
(void)REQUIRE(!wascaret, REG_BADRPT);
@ -757,7 +757,6 @@ p_simp_re(struct parse *p, struct branchc *bc)
(void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
break;
case BACKSL|')': /* should not get here -- must be user */
case BACKSL|'}':
SETERROR(REG_EPAREN);
break;
case BACKSL|'1':