sh: Add $'quoting' (C-style escape sequences).
A string between $' and ' may contain backslash escape sequences similar to the ones in a C string constant (except that a single-quote must be escaped and a double-quote need not be). Details are in the sh(1) man page. This construct is useful to include unprintable characters, tabs and newlines in strings; while this can be done with a command substitution containing a printf command, that needs ugly workarounds if the result is to end with a newline as command substitution removes all trailing newlines. The construct may also be useful in future to describe unprintable characters without needing to write those characters themselves in 'set -x', 'export -p' and the like. The implementation attempts to comply to the proposal for the next issue of the POSIX specification. Because this construct is not in POSIX.1-2008, using it in scripts intended to be portable is unwise. Matching the minimal locale support in the rest of sh, the \u and \U sequences are currently not useful. Exp-run done by: pav (with some other sh(1) changes)
This commit is contained in:
parent
7ec44d66a6
commit
5a49f52603
@ -64,6 +64,7 @@ struct synclass synclass[] = {
|
||||
{ "CWORD", "character is nothing special" },
|
||||
{ "CNL", "newline character" },
|
||||
{ "CBACK", "a backslash character" },
|
||||
{ "CSBACK", "a backslash character in single quotes" },
|
||||
{ "CSQUOTE", "single quote" },
|
||||
{ "CDQUOTE", "double quote" },
|
||||
{ "CENDQUOTE", "a terminating quote" },
|
||||
@ -224,6 +225,7 @@ main(int argc __unused, char **argv __unused)
|
||||
init();
|
||||
fputs("\n/* syntax table used when in single quotes */\n", cfile);
|
||||
add("\n", "CNL");
|
||||
add("\\", "CSBACK");
|
||||
add("'", "CENDQUOTE");
|
||||
/* ':/' for tilde expansion, '-' for [a\-x] pattern ranges */
|
||||
add("!*?[=~:/-", "CCTL");
|
||||
|
145
bin/sh/parser.c
145
bin/sh/parser.c
@ -1126,6 +1126,127 @@ parsebackq(char *out, struct nodelist **pbqlist,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Called to parse a backslash escape sequence inside $'...'.
|
||||
* The backslash has already been read.
|
||||
*/
|
||||
static char *
|
||||
readcstyleesc(char *out)
|
||||
{
|
||||
int c, v, i, n;
|
||||
|
||||
c = pgetc();
|
||||
switch (c) {
|
||||
case '\0':
|
||||
synerror("Unterminated quoted string");
|
||||
case '\n':
|
||||
plinno++;
|
||||
if (doprompt)
|
||||
setprompt(2);
|
||||
else
|
||||
setprompt(0);
|
||||
return out;
|
||||
case '\\':
|
||||
case '\'':
|
||||
case '"':
|
||||
v = c;
|
||||
break;
|
||||
case 'a': v = '\a'; break;
|
||||
case 'b': v = '\b'; break;
|
||||
case 'e': v = '\033'; break;
|
||||
case 'f': v = '\f'; break;
|
||||
case 'n': v = '\n'; break;
|
||||
case 'r': v = '\r'; break;
|
||||
case 't': v = '\t'; break;
|
||||
case 'v': v = '\v'; break;
|
||||
case 'x':
|
||||
v = 0;
|
||||
for (;;) {
|
||||
c = pgetc();
|
||||
if (c >= '0' && c <= '9')
|
||||
v = (v << 4) + c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
v = (v << 4) + c - 'A' + 10;
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
v = (v << 4) + c - 'a' + 10;
|
||||
else
|
||||
break;
|
||||
}
|
||||
pungetc();
|
||||
break;
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
v = c - '0';
|
||||
c = pgetc();
|
||||
if (c >= '0' && c <= '7') {
|
||||
v <<= 3;
|
||||
v += c - '0';
|
||||
c = pgetc();
|
||||
if (c >= '0' && c <= '7') {
|
||||
v <<= 3;
|
||||
v += c - '0';
|
||||
} else
|
||||
pungetc();
|
||||
} else
|
||||
pungetc();
|
||||
break;
|
||||
case 'c':
|
||||
c = pgetc();
|
||||
if (c < 0x3f || c > 0x7a || c == 0x60)
|
||||
synerror("Bad escape sequence");
|
||||
if (c == '\\' && pgetc() != '\\')
|
||||
synerror("Bad escape sequence");
|
||||
if (c == '?')
|
||||
v = 127;
|
||||
else
|
||||
v = c & 0x1f;
|
||||
break;
|
||||
case 'u':
|
||||
case 'U':
|
||||
n = c == 'U' ? 8 : 4;
|
||||
v = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
c = pgetc();
|
||||
if (c >= '0' && c <= '9')
|
||||
v = (v << 4) + c - '0';
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
v = (v << 4) + c - 'A' + 10;
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
v = (v << 4) + c - 'a' + 10;
|
||||
else
|
||||
synerror("Bad escape sequence");
|
||||
}
|
||||
if (v == 0 || (v >= 0xd800 && v <= 0xdfff))
|
||||
synerror("Bad escape sequence");
|
||||
/* We really need iconv here. */
|
||||
if (v > 127)
|
||||
v = '?';
|
||||
break;
|
||||
default:
|
||||
synerror("Bad escape sequence");
|
||||
}
|
||||
v = (char)v;
|
||||
/*
|
||||
* We can't handle NUL bytes.
|
||||
* POSIX says we should skip till the closing quote.
|
||||
*/
|
||||
if (v == '\0') {
|
||||
while ((c = pgetc()) != '\'') {
|
||||
if (c == '\\')
|
||||
c = pgetc();
|
||||
if (c == PEOF)
|
||||
synerror("Unterminated quoted string");
|
||||
}
|
||||
pungetc();
|
||||
return out;
|
||||
}
|
||||
if (SQSYNTAX[v] == CCTL)
|
||||
USTPUTC(CTLESC, out);
|
||||
USTPUTC(v, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If eofmark is NULL, read a word or a redirection symbol. If eofmark
|
||||
* is not NULL, read a here document. In the latter case, eofmark is the
|
||||
@ -1158,6 +1279,7 @@ readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
|
||||
struct tokenstate state_static[MAXNEST_static];
|
||||
int maxnest = MAXNEST_static;
|
||||
struct tokenstate *state = state_static;
|
||||
int sqiscstyle = 0;
|
||||
|
||||
startlinno = plinno;
|
||||
quotef = 0;
|
||||
@ -1188,6 +1310,12 @@ readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
|
||||
setprompt(0);
|
||||
c = pgetc();
|
||||
goto loop; /* continue outer loop */
|
||||
case CSBACK:
|
||||
if (sqiscstyle) {
|
||||
out = readcstyleesc(out);
|
||||
break;
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
case CWORD:
|
||||
USTPUTC(c, out);
|
||||
break;
|
||||
@ -1232,6 +1360,7 @@ readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
|
||||
case CSQUOTE:
|
||||
USTPUTC(CTLQUOTEMARK, out);
|
||||
state[level].syntax = SQSYNTAX;
|
||||
sqiscstyle = 0;
|
||||
break;
|
||||
case CDQUOTE:
|
||||
USTPUTC(CTLQUOTEMARK, out);
|
||||
@ -1450,11 +1579,7 @@ parsesub: {
|
||||
int c1;
|
||||
|
||||
c = pgetc();
|
||||
if (c != '(' && c != '{' && (is_eof(c) || !is_name(c)) &&
|
||||
!is_special(c)) {
|
||||
USTPUTC('$', out);
|
||||
pungetc();
|
||||
} else if (c == '(') { /* $(command) or $((arith)) */
|
||||
if (c == '(') { /* $(command) or $((arith)) */
|
||||
if (pgetc() == '(') {
|
||||
PARSEARITH();
|
||||
} else {
|
||||
@ -1465,7 +1590,7 @@ parsesub: {
|
||||
state[level].syntax == DQSYNTAX ||
|
||||
state[level].syntax == ARISYNTAX);
|
||||
}
|
||||
} else {
|
||||
} else if (c == '{' || is_name(c) || is_special(c)) {
|
||||
USTPUTC(CTLVAR, out);
|
||||
typeloc = out - stackblock();
|
||||
USTPUTC(VSNORMAL, out);
|
||||
@ -1612,6 +1737,14 @@ parsesub: {
|
||||
newvarnest++;
|
||||
}
|
||||
}
|
||||
} else if (c == '\'' && state[level].syntax == BASESYNTAX) {
|
||||
/* $'cstylequotes' */
|
||||
USTPUTC(CTLQUOTEMARK, out);
|
||||
state[level].syntax = SQSYNTAX;
|
||||
sqiscstyle = 1;
|
||||
} else {
|
||||
USTPUTC('$', out);
|
||||
pungetc();
|
||||
}
|
||||
goto parsesub_return;
|
||||
}
|
||||
|
73
bin/sh/sh.1
73
bin/sh/sh.1
@ -32,7 +32,7 @@
|
||||
.\" from: @(#)sh.1 8.6 (Berkeley) 5/4/95
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd March 20, 2011
|
||||
.Dd May 5, 2011
|
||||
.Dt SH 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -396,13 +396,82 @@ Quoting is used to remove the special meaning of certain characters
|
||||
or words to the shell, such as operators, whitespace, keywords,
|
||||
or alias names.
|
||||
.Pp
|
||||
There are three types of quoting: matched single quotes,
|
||||
There are four types of quoting: matched single quotes,
|
||||
dollar-single quotes,
|
||||
matched double quotes, and backslash.
|
||||
.Bl -tag -width indent
|
||||
.It Single Quotes
|
||||
Enclosing characters in single quotes preserves the literal
|
||||
meaning of all the characters (except single quotes, making
|
||||
it impossible to put single-quotes in a single-quoted string).
|
||||
.It Dollar-Single Quotes
|
||||
Enclosing characters between
|
||||
.Li $'
|
||||
and
|
||||
.Li '
|
||||
preserves the literal meaning of all characters
|
||||
except backslashes and single quotes.
|
||||
A backslash introduces a C-style escape sequence:
|
||||
.Bl -tag -width xUnnnnnnnn
|
||||
.It \ea
|
||||
Alert (ring the terminal bell)
|
||||
.It \eb
|
||||
Backspace
|
||||
.It \ec Ns Ar c
|
||||
The control character denoted by
|
||||
.Li ^ Ns Ar c
|
||||
in
|
||||
.Xr stty 1 .
|
||||
If
|
||||
.Ar c
|
||||
is a backslash, it must be doubled.
|
||||
.It \ee
|
||||
The ESC character
|
||||
.Tn ( ASCII
|
||||
0x1b)
|
||||
.It \ef
|
||||
Formfeed
|
||||
.It \en
|
||||
Newline
|
||||
.It \er
|
||||
Carriage return
|
||||
.It \et
|
||||
Horizontal tab
|
||||
.It \ev
|
||||
Vertical tab
|
||||
.It \e\e
|
||||
Literal backslash
|
||||
.It \e\&'
|
||||
Literal single-quote
|
||||
.It \e\&"
|
||||
Literal double-quote
|
||||
.It \e Ns Ar nnn
|
||||
The byte whose octal value is
|
||||
.Ar nnn
|
||||
(one to three digits)
|
||||
.It \ex Ns Ar nn
|
||||
The byte whose hexadecimal value is
|
||||
.Ar nn
|
||||
(one or more digits only the last two of which are used)
|
||||
.It \eu Ns Ar nnnn
|
||||
The Unicode code point
|
||||
.Ar nnnn
|
||||
(four hexadecimal digits)
|
||||
.It \eU Ns Ar nnnnnnnn
|
||||
The Unicode code point
|
||||
.Ar nnnnnnnn
|
||||
(eight hexadecimal digits)
|
||||
.El
|
||||
.Pp
|
||||
The sequences for Unicode code points currently only provide useful results
|
||||
for values below 128.
|
||||
They reject code point 0 and UTF-16 surrogates.
|
||||
.Pp
|
||||
If an escape sequence would produce a byte with value 0,
|
||||
that byte and the rest of the string until the matching single-quote
|
||||
are ignored.
|
||||
.Pp
|
||||
Any other string starting with a backslash is an error.
|
||||
.It Double Quotes
|
||||
Enclosing characters within double quotes preserves the literal
|
||||
meaning of all characters except dollar sign
|
||||
|
12
tools/regression/bin/sh/parser/dollar-quote1.0
Normal file
12
tools/regression/bin/sh/parser/dollar-quote1.0
Normal file
@ -0,0 +1,12 @@
|
||||
# $FreeBSD$
|
||||
|
||||
set -e
|
||||
|
||||
[ $'hi' = hi ]
|
||||
[ $'hi
|
||||
there' = 'hi
|
||||
there' ]
|
||||
[ $'\"\'\\\a\b\f\t\v' = "\"'\\$(printf "\a\b\f\t\v")" ]
|
||||
[ $'hi\nthere' = 'hi
|
||||
there' ]
|
||||
[ $'a\rb' = "$(printf "a\rb")" ]
|
5
tools/regression/bin/sh/parser/dollar-quote2.0
Normal file
5
tools/regression/bin/sh/parser/dollar-quote2.0
Normal file
@ -0,0 +1,5 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# This depends on the ASCII character set.
|
||||
|
||||
[ $'\e' = "$(printf "\033")" ]
|
22
tools/regression/bin/sh/parser/dollar-quote3.0
Normal file
22
tools/regression/bin/sh/parser/dollar-quote3.0
Normal file
@ -0,0 +1,22 @@
|
||||
# $FreeBSD$
|
||||
|
||||
unset LC_ALL
|
||||
LC_CTYPE=en_US.ISO8859-1
|
||||
export LC_CTYPE
|
||||
|
||||
e=
|
||||
for i in 0 1 2 3; do
|
||||
for j in 0 1 2 3 4 5 6 7; do
|
||||
for k in 0 1 2 3 4 5 6 7; do
|
||||
case $i$j$k in
|
||||
000) continue ;;
|
||||
esac
|
||||
e="$e\\$i$j$k"
|
||||
done
|
||||
done
|
||||
done
|
||||
ee=`printf "$e"`
|
||||
[ "${#ee}" = 255 ] || echo length bad
|
||||
|
||||
# Start a new shell so the locale change is picked up.
|
||||
[ "$(${SH} -c "printf %s \$'$e'")" = "$ee" ]
|
19
tools/regression/bin/sh/parser/dollar-quote4.0
Normal file
19
tools/regression/bin/sh/parser/dollar-quote4.0
Normal file
@ -0,0 +1,19 @@
|
||||
# $FreeBSD$
|
||||
|
||||
unset LC_ALL
|
||||
LC_CTYPE=en_US.ISO8859-1
|
||||
export LC_CTYPE
|
||||
|
||||
e=
|
||||
for i in 0 1 2 3 4 5 6 7 8 9 a b c d e f; do
|
||||
for j in 0 1 2 3 4 5 6 7 8 9 a b c d e f; do
|
||||
case $i$j in
|
||||
00) continue ;;
|
||||
esac
|
||||
e="$e\x$i$j"
|
||||
done
|
||||
done
|
||||
|
||||
# Start a new shell so the locale change is picked up.
|
||||
ee="$(${SH} -c "printf %s \$'$e'")"
|
||||
[ "${#ee}" = 255 ] || echo length bad
|
12
tools/regression/bin/sh/parser/dollar-quote5.0
Normal file
12
tools/regression/bin/sh/parser/dollar-quote5.0
Normal file
@ -0,0 +1,12 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# This depends on the ASCII character set.
|
||||
|
||||
set -e
|
||||
|
||||
[ $'\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz' = $'\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032' ]
|
||||
[ $'\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ' = $'\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032' ]
|
||||
[ $'\c[' = $'\033' ]
|
||||
[ $'\c]' = $'\035' ]
|
||||
[ $'\c^' = $'\036' ]
|
||||
[ $'\c_' = $'\037' ]
|
5
tools/regression/bin/sh/parser/dollar-quote6.0
Normal file
5
tools/regression/bin/sh/parser/dollar-quote6.0
Normal file
@ -0,0 +1,5 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# This depends on the ASCII character set.
|
||||
|
||||
[ $'\c\\' = $'\034' ]
|
6
tools/regression/bin/sh/parser/dollar-quote7.0
Normal file
6
tools/regression/bin/sh/parser/dollar-quote7.0
Normal file
@ -0,0 +1,6 @@
|
||||
# $FreeBSD$
|
||||
|
||||
set -e
|
||||
|
||||
[ $'\u0024\u0040\u0060' = '$@`' ]
|
||||
[ $'\U00000024\U00000040\U00000060' = '$@`' ]
|
11
tools/regression/bin/sh/parser/dollar-quote8.0
Normal file
11
tools/regression/bin/sh/parser/dollar-quote8.0
Normal file
@ -0,0 +1,11 @@
|
||||
# $FreeBSD$
|
||||
|
||||
[ $'hello\0' = hello ]
|
||||
[ $'hello\0world' = hello ]
|
||||
[ $'hello\0'$'world' = helloworld ]
|
||||
[ $'hello\000' = hello ]
|
||||
[ $'hello\000world' = hello ]
|
||||
[ $'hello\000'$'world' = helloworld ]
|
||||
[ $'hello\x00' = hello ]
|
||||
[ $'hello\x00world' = hello ]
|
||||
[ $'hello\x00'$'world' = helloworld ]
|
8
tools/regression/bin/sh/parser/dollar-quote9.0
Normal file
8
tools/regression/bin/sh/parser/dollar-quote9.0
Normal file
@ -0,0 +1,8 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# POSIX and C99 say D800-DFFF are undefined in a universal character name.
|
||||
# We reject this but many other shells expand to something that looks like
|
||||
# CESU-8.
|
||||
|
||||
v=$( (eval ": \$'\uD800'") 2>&1 >/dev/null)
|
||||
[ $? -ne 0 ] && [ -n "$v" ]
|
Loading…
Reference in New Issue
Block a user