sed: process \r, \n, and \t

This is both reasonable and a common GNUism that a lot of ported software
expects.

Universally process \r, \n, and \t into carriage return, newline, and tab
respectively. Newline still doesn't function in contexts where it can't
(e.g. BRE), but we process it anyways rather than passing
UB \n (escaped ordinary) through to the underlying regex engine.

Adding a --posix flag to disable these was considered, but sed.1 already
declares this version of sed a super-set of POSIX specification and this
behavior is the most likely expected when one attempts to use one of these
escape sequences in pattern space.

This differs from pre-r197362 behavior in that we now honor the three
arguably most common escape sequences used with sed(1) and we do so outside
of character classes, too.

Other escape sequences, like \s and \S, will come later when GNU extensions
are added to libregex; sed will likely link against libregex by default,
since the GNU extensions tend to be fairly un-intrusive.

PR:		229925
Reviewed by:	bapt, emaste, pfg
Differential Revision:	https://reviews.freebsd.org/D22750
This commit is contained in:
Kyle Evans 2019-12-10 19:16:00 +00:00
parent 0d42317659
commit 6e816d8711
3 changed files with 69 additions and 8 deletions

View File

@ -395,10 +395,21 @@ compile_delimited(char *p, char *d, int is_tr)
continue;
} else if (*p == '\\' && p[1] == '[') {
*d++ = *p++;
} else if (*p == '\\' && p[1] == c)
} else if (*p == '\\' && p[1] == c) {
p++;
else if (*p == '\\' && p[1] == 'n') {
*d++ = '\n';
} else if (*p == '\\' &&
(p[1] == 'n' || p[1] == 'r' || p[1] == 't')) {
switch (p[1]) {
case 'n':
*d++ = '\n';
break;
case 'r':
*d++ = '\r';
break;
case 't':
*d++ = '\t';
break;
}
p += 2;
continue;
} else if (*p == '\\' && p[1] == '\\') {
@ -428,13 +439,29 @@ compile_ccl(char **sp, char *t)
*t++ = *s++;
if (*s == ']')
*t++ = *s++;
for (; *s && (*t = *s) != ']'; s++, t++)
for (; *s && (*t = *s) != ']'; s++, t++) {
if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
*++t = *++s, t++, s++;
for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
if ((c = *s) == '\0')
return NULL;
} else if (*s == '\\') {
switch (s[1]) {
case 'n':
*t = '\n';
s++;
break;
case 'r':
*t = '\r';
s++;
break;
case 't':
*t = '\t';
s++;
break;
}
}
}
return (*s == ']') ? *sp = ++s, ++t : NULL;
}
@ -521,8 +548,23 @@ compile_subst(char *p, struct s_subst *s)
linenum, fname, *p);
if (s->maxbref < ref)
s->maxbref = ref;
} else if (*p == '&' || *p == '\\')
*sp++ = '\\';
} else {
switch (*p) {
case '&':
case '\\':
*sp++ = '\\';
break;
case 'n':
*p = '\n';
break;
case 'r':
*p = '\r';
break;
case 't':
*p = '\t';
break;
}
}
} else if (*p == c) {
if (*++p == '\0' && more) {
if (cu_fgets(lbuf, sizeof(lbuf), &more))

View File

@ -1,2 +1 @@
1
2
1X2

View File

@ -69,9 +69,29 @@ inplace_command_q_body()
atf_check -s not-exit:0 stat -q '.!'*
}
atf_test_case escape_subst
escape_subst_head()
{
atf_set "descr" "Verify functional escaping of \\n, \\r, and \\t"
}
escape_subst_body()
{
printf "a\nt\\\t\n\tb\n\t\tc\r\n" > a
tr -d '\r' < a > b
printf "a\tb c\rx\n" > c
atf_check -o 'inline:a\nt\\t\n' sed '/\t/d' a
atf_check -o 'inline:a\nt\\t\n b\n c\r\n' sed 's/\t/ /g' a
atf_check -o 'inline:a\nt\\t\n\t\tb\n\t\t\t\tc\r\n' sed 's/\t/\t\t/g' a
atf_check -o 'inline:a\nt\n\tb\n\t\tc\r\n' sed 's/\\t//g' a
atf_check -o file:b sed 's/\r//' a
atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c
}
atf_init_test_cases()
{
atf_add_test_case inplace_command_q
atf_add_test_case inplace_hardlink_src
atf_add_test_case inplace_symlink_src
atf_add_test_case escape_subst
}