diff --git a/usr.bin/sed/compile.c b/usr.bin/sed/compile.c index e03103420559..99fa2b17a92f 100644 --- a/usr.bin/sed/compile.c +++ b/usr.bin/sed/compile.c @@ -49,6 +49,7 @@ static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; #include #include #include +#include #include #include #include @@ -365,6 +366,51 @@ semicolon: EATSPACE(); } } +static int +hex2char(const char *in, char *out, int len) +{ + long ord; + char *endptr, hexbuf[3]; + + hexbuf[0] = in[0]; + hexbuf[1] = len > 1 ? in[1] : '\0'; + hexbuf[2] = '\0'; + + errno = 0; + ord = strtol(hexbuf, &endptr, 16); + if (*endptr != '\0' || errno != 0) + return (ERANGE); + *out = (char)ord; + return (0); +} + +static bool +hexdigit(char c) +{ + int lc; + + lc = tolower(c); + return isdigit(lc) || (lc >= 'a' && lc <= 'f'); +} + +static bool +dohex(const char *in, char *out, int *len) +{ + int tmplen; + + if (!hexdigit(in[0])) + return (false); + tmplen = 1; + if (hexdigit(in[1])) + ++tmplen; + if (hex2char(in, out, tmplen) == 0) { + *len = tmplen; + return (true); + } + + return (false); +} + /* * Get a delimited string. P points to the delimiter of the string; d points * to a buffer area. Newline and delimiter escapes are processed; other @@ -377,6 +423,7 @@ semicolon: EATSPACE(); static char * compile_delimited(char *p, char *d, int is_tr) { + int hexlen; char c; c = *p++; @@ -412,6 +459,12 @@ compile_delimited(char *p, char *d, int is_tr) } p += 2; continue; + } else if (*p == '\\' && p[1] == 'x') { + if (dohex(&p[2], d, &hexlen)) { + ++d; + p += hexlen + 2; + continue; + } } else if (*p == '\\' && p[1] == '\\') { if (is_tr) p++; @@ -431,7 +484,7 @@ compile_delimited(char *p, char *d, int is_tr) static char * compile_ccl(char **sp, char *t) { - int c, d; + int c, d, hexlen; char *s = *sp; *t++ = *s++; @@ -459,6 +512,10 @@ compile_ccl(char **sp, char *t) *t = '\t'; s++; break; + case 'x': + if (dohex(&s[2], t, &hexlen)) + s += hexlen + 1; + break; } } } @@ -499,7 +556,7 @@ static char * compile_subst(char *p, struct s_subst *s) { static char lbuf[_POSIX2_LINE_MAX + 1]; - int asize, size; + int asize, hexlen, size; u_char ref; char c, *text, *op, *sp; int more = 1, sawesc = 0; @@ -563,6 +620,21 @@ compile_subst(char *p, struct s_subst *s) case 't': *p = '\t'; break; + case 'x': +#define ADVANCE_N(s, n) \ + do { \ + char *adv = (s); \ + while (*(adv + (n) - 1) != '\0') { \ + *adv = *(adv + (n)); \ + ++adv; \ + } \ + *adv = '\0'; \ + } while (0); + if (dohex(&p[1], p, &hexlen)) { + ADVANCE_N(p + 1, + hexlen); + } + break; } } } else if (*p == c) { diff --git a/usr.bin/sed/tests/sed2_test.sh b/usr.bin/sed/tests/sed2_test.sh index 48e1c2b216f6..c7f4b29a8f88 100755 --- a/usr.bin/sed/tests/sed2_test.sh +++ b/usr.bin/sed/tests/sed2_test.sh @@ -88,10 +88,39 @@ escape_subst_body() atf_check -o 'inline:abcx\n' sed 's/[ \r\t]//g' c } +atf_test_case hex_subst +hex_subst_head() +{ + atf_set "descr" "Verify proper conversion of hex escapes" +} +hex_subst_body() +{ + printf "test='foo'" > a + printf "test='27foo'" > b + printf "\rn" > c + printf "xx" > d + + atf_check -o 'inline:test="foo"' sed 's/\x27/"/g' a + atf_check -o "inline:'test'='foo'" sed 's/test/\x27test\x27/g' a + + # Make sure we take trailing digits literally. + atf_check -o "inline:test=\"foo'" sed 's/\x2727/"/g' b + + # Single digit \x should work as well. + atf_check -o "inline:xn" sed 's/\xd/x/' c + + # Invalid digit should cause us to ignore the sequence. This test + # invokes UB, escapes of an ordinary character. A future change will + # make regex(3) on longer tolerate this and we'll need to adjust what + # we're doing, but for now this will suffice. + atf_check -o "inline:" sed 's/\xx//' d +} + atf_init_test_cases() { atf_add_test_case inplace_command_q atf_add_test_case inplace_hardlink_src atf_add_test_case inplace_symlink_src atf_add_test_case escape_subst + atf_add_test_case hex_subst }