diff --git a/bin/sh/builtins.def b/bin/sh/builtins.def index 1cbeea913a92..8807347f4f19 100644 --- a/bin/sh/builtins.def +++ b/bin/sh/builtins.def @@ -65,6 +65,7 @@ exportcmd -s export -s readonly #exprcmd expr falsecmd false fgcmd -j fg +freebsd_wordexpcmd freebsd_wordexp getoptscmd getopts hashcmd hash histcmd -h fc diff --git a/bin/sh/expand.c b/bin/sh/expand.c index 7411f1cc97f5..88d80ea4bd59 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -1656,3 +1656,57 @@ wordexpcmd(int argc, char **argv) outbin(argv[i], strlen(argv[i]) + 1, out1); return (0); } + +/* + * Do most of the work for wordexp(3), new version. + */ + +int +freebsd_wordexpcmd(int argc __unused, char **argv __unused) +{ + struct arglist arglist; + union node *args, *n; + struct strlist *sp; + size_t count, len; + int ch; + int protected = 0; + int fd = -1; + + while ((ch = nextopt("f:p")) != '\0') { + switch (ch) { + case 'f': + fd = number(shoptarg); + break; + case 'p': + protected = 1; + break; + } + } + if (*argptr != NULL) + error("wrong number of arguments"); + if (fd < 0) + error("missing fd"); + INTOFF; + setinputfd(fd, 1); + INTON; + args = parsewordexp(); + popfile(); /* will also close fd */ + if (protected) + for (n = args; n != NULL; n = n->narg.next) { + if (n->narg.backquote != NULL) { + outcslow('C', out1); + error("command substitution disabled"); + } + } + outcslow(' ', out1); + arglist.lastp = &arglist.list; + for (n = args; n != NULL; n = n->narg.next) + expandarg(n, &arglist, EXP_FULL | EXP_TILDE); + *arglist.lastp = NULL; + for (sp = arglist.list, count = len = 0; sp; sp = sp->next) + count++, len += strlen(sp->text); + out1fmt("%016zx %016zx", count, len); + for (sp = arglist.list; sp; sp = sp->next) + outbin(sp->text, strlen(sp->text) + 1, out1); + return (0); +} diff --git a/bin/sh/parser.c b/bin/sh/parser.c index 302d1797c194..53d7923f8755 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -231,6 +231,39 @@ parsecmd(int interact) } +/* + * Read and parse words for wordexp. + * Returns a list of NARG nodes; NULL if there are no words. + */ +union node * +parsewordexp(void) +{ + union node *n, *first = NULL, **pnext; + int t; + + /* This assumes the parser is not re-entered, + * which could happen if we add command substitution on PS1/PS2. + */ + parser_temp_free_all(); + heredoclist = NULL; + + tokpushback = 0; + checkkwd = 0; + doprompt = 0; + setprompt(0); + needprompt = 0; + pnext = &first; + while ((t = readtoken()) != TEOF) { + if (t != TWORD) + synexpect(TWORD); + n = makename(); + *pnext = n; + pnext = &n->narg.next; + } + return first; +} + + static union node * list(int nlflag) { diff --git a/bin/sh/parser.h b/bin/sh/parser.h index 598259439124..0c3cd88601ab 100644 --- a/bin/sh/parser.h +++ b/bin/sh/parser.h @@ -76,6 +76,7 @@ extern const char *const parsekwd[]; union node *parsecmd(int); +union node *parsewordexp(void); void forcealias(void); void fixredir(union node *, const char *, int); int goodname(const char *); diff --git a/lib/libc/gen/wordexp.3 b/lib/libc/gen/wordexp.3 index 2fc20ef81f2b..dd4605ffa6fd 100644 --- a/lib/libc/gen/wordexp.3 +++ b/lib/libc/gen/wordexp.3 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 18, 2015 +.Dd September 30, 2015 .Dt WORDEXP 3 .Os .Sh NAME @@ -108,8 +108,9 @@ function frees the memory allocated by .Sh IMPLEMENTATION NOTES The .Fn wordexp -function is implemented by executing -.Xr sh 1 . +function is implemented using the undocumented +.Ic freebsd_wordexp +shell built-in command. .Sh RETURN VALUES The .Fn wordexp @@ -191,18 +192,19 @@ and functions conform to .St -p1003.1-2001 . .Sh BUGS -Do not pass untrusted user data to -.Fn wordexp , -regardless of whether the -.Dv WRDE_NOCMD -flag is set. -The -.Fn wordexp -function attempts to detect input that would cause commands to be -executed before passing it to the shell -but it does not use the same parser so it may be fooled. -.Pp The current .Fn wordexp implementation does not recognize multibyte characters other than UTF-8, since the shell (which it invokes to perform expansions) does not. +.Sh SECURITY CONSIDERATIONS +Pathname generation may create output that is exponentially larger than the +input size. +.Pp +Although this implementation detects command substitution reliably for +.Dv WRDE_NOCMD , +the attack surface remains fairly large. +Also, some other implementations +(such as older versions of this one) +may execute command substitutions even if +.Dv WRDE_NOCMD +is set. diff --git a/lib/libc/gen/wordexp.c b/lib/libc/gen/wordexp.c index 859ca50ea6aa..3791e1e05b19 100644 --- a/lib/libc/gen/wordexp.c +++ b/lib/libc/gen/wordexp.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,7 @@ __FBSDID("$FreeBSD$"); static int we_askshell(const char *, wordexp_t *, int); -static int we_check(const char *, int); +static int we_check(const char *); /* * wordexp -- @@ -65,7 +66,7 @@ wordexp(const char * __restrict words, wordexp_t * __restrict we, int flags) we->we_strings = NULL; we->we_nbytes = 0; } - if ((error = we_check(words, flags)) != 0) { + if ((error = we_check(words)) != 0) { wordfree(we); return (error); } @@ -94,17 +95,37 @@ we_read_fully(int fd, char *buffer, size_t len) return done; } +static bool +we_write_fully(int fd, const char *buffer, size_t len) +{ + size_t done; + ssize_t nwritten; + + done = 0; + do { + nwritten = _write(fd, buffer + done, len - done); + if (nwritten == -1 && errno == EINTR) + continue; + if (nwritten <= 0) + return (false); + done += nwritten; + } while (done != len); + return (true); +} + /* * we_askshell -- - * Use the `wordexp' /bin/sh builtin function to do most of the work - * in expanding the word string. This function is complicated by + * Use the `freebsd_wordexp' /bin/sh builtin function to do most of the + * work in expanding the word string. This function is complicated by * memory management. */ static int we_askshell(const char *words, wordexp_t *we, int flags) { - int pdes[2]; /* Pipe to child */ - char buf[18]; /* Buffer for byte and word count */ + int pdesw[2]; /* Pipe for writing words */ + int pdes[2]; /* Pipe for reading output */ + char wfdstr[sizeof(int) * 3 + 1]; + char buf[35]; /* Buffer for byte and word count */ long nwords, nbytes; /* Number of words, bytes from child */ long i; /* Handy integer */ size_t sofs; /* Offset into we->we_strings */ @@ -119,18 +140,25 @@ we_askshell(const char *words, wordexp_t *we, int flags) char **nwv; /* Temporary for realloc() */ sigset_t newsigblock, oldsigblock; const char *ifs; - char save; serrno = errno; ifs = getenv("IFS"); - if (pipe2(pdes, O_CLOEXEC) < 0) + if (pipe2(pdesw, O_CLOEXEC) < 0) return (WRDE_NOSPACE); /* XXX */ + snprintf(wfdstr, sizeof(wfdstr), "%d", pdesw[0]); + if (pipe2(pdes, O_CLOEXEC) < 0) { + _close(pdesw[0]); + _close(pdesw[1]); + return (WRDE_NOSPACE); /* XXX */ + } (void)sigemptyset(&newsigblock); (void)sigaddset(&newsigblock, SIGCHLD); (void)__libc_sigprocmask(SIG_BLOCK, &newsigblock, &oldsigblock); if ((pid = fork()) < 0) { serrno = errno; + _close(pdesw[0]); + _close(pdesw[1]); _close(pdes[0]); _close(pdes[1]); (void)__libc_sigprocmask(SIG_SETMASK, &oldsigblock, NULL); @@ -146,43 +174,54 @@ we_askshell(const char *words, wordexp_t *we, int flags) _dup2(pdes[1], STDOUT_FILENO) : _fcntl(pdes[1], F_SETFD, 0)) < 0) _exit(1); + if (_fcntl(pdesw[0], F_SETFD, 0) < 0) + _exit(1); execl(_PATH_BSHELL, "sh", flags & WRDE_UNDEF ? "-u" : "+u", - "-c", "IFS=$1;eval \"$2\";eval \"echo;set -- $3\";" - "IFS=;a=\"$*\";printf '%08x' \"$#\" \"${#a}\";" - "printf '%s\\0' \"$@\"", + "-c", "IFS=$1;eval \"$2\";" + "freebsd_wordexp -f \"$3\" ${4:+\"$4\"}", "", ifs != NULL ? ifs : " \t\n", - flags & WRDE_SHOWERR ? "" : "exec 2>/dev/null", words, + flags & WRDE_SHOWERR ? "" : "exec 2>/dev/null", + wfdstr, + flags & WRDE_NOCMD ? "-p" : "", (char *)NULL); _exit(1); } /* - * We are the parent; read the output of the shell wordexp function, - * which is a byte indicating that the words were parsed successfully, - * a 32-bit hexadecimal word count, a 32-bit hexadecimal byte count - * (not including terminating null bytes), followed by the expanded - * words separated by nulls. + * We are the parent; write the words. */ _close(pdes[1]); - switch (we_read_fully(pdes[0], buf, 17)) { + _close(pdesw[0]); + if (!we_write_fully(pdesw[1], words, strlen(words))) { + _close(pdesw[1]); + error = WRDE_SYNTAX; + goto cleanup; + } + _close(pdesw[1]); + /* + * Read the output of the shell wordexp function, + * which is a byte indicating that the words were parsed successfully, + * a 64-bit hexadecimal word count, a dummy byte, a 64-bit hexadecimal + * byte count (not including terminating null bytes), followed by the + * expanded words separated by nulls. + */ + switch (we_read_fully(pdes[0], buf, 34)) { case 1: - error = WRDE_BADVAL; + error = buf[0] == 'C' ? WRDE_CMDSUB : WRDE_BADVAL; serrno = errno; goto cleanup; - case 17: + case 34: break; default: error = WRDE_SYNTAX; serrno = errno; goto cleanup; } - save = buf[9]; - buf[9] = '\0'; - nwords = strtol(buf + 1, NULL, 16); - buf[9] = save; buf[17] = '\0'; - nbytes = strtol(buf + 9, NULL, 16) + nwords; + nwords = strtol(buf + 1, NULL, 16); + buf[34] = '\0'; + nbytes = strtol(buf + 18, NULL, 16) + nwords; /* * Allocate or reallocate (when flags & WRDE_APPEND) the word vector @@ -255,83 +294,96 @@ we_askshell(const char *words, wordexp_t *we, int flags) * we_check -- * Check that the string contains none of the following unquoted * special characters: |&;<>(){} - * or command substitutions when WRDE_NOCMD is set in flags. + * This mainly serves for {} which are normally legal in sh. + * It deliberately does not attempt to model full sh syntax. */ static int -we_check(const char *words, int flags) +we_check(const char *words) { char c; - int dquote, level, quote, squote; + /* Saw \ or $, possibly not special: */ + bool quote = false, dollar = false; + /* Saw ', ", ${, ` or $(, possibly not special: */ + bool have_sq = false, have_dq = false, have_par_begin = false; + bool have_cmd = false; + /* Definitely saw a ', ", ${, ` or $(, need a closing character: */ + bool need_sq = false, need_dq = false, need_par_end = false; + bool need_cmd_old = false, need_cmd_new = false; - quote = squote = dquote = 0; while ((c = *words++) != '\0') { switch (c) { case '\\': - if (squote == 0) - quote ^= 1; + quote = !quote; + continue; + case '$': + if (quote) + quote = false; + else + dollar = !dollar; continue; case '\'': - if (quote + dquote == 0) - squote ^= 1; + if (!quote && !have_sq && !have_dq) + need_sq = true; + else + need_sq = false; + have_sq = true; break; case '"': - if (quote + squote == 0) - dquote ^= 1; + if (!quote && !have_sq && !have_dq) + need_dq = true; + else + need_dq = false; + have_dq = true; break; case '`': - if (quote + squote == 0 && flags & WRDE_NOCMD) - return (WRDE_CMDSUB); - while ((c = *words++) != '\0' && c != '`') - if (c == '\\' && (c = *words++) == '\0') - break; - if (c == '\0') - return (WRDE_SYNTAX); + if (!quote && !have_sq && !have_cmd) + need_cmd_old = true; + else + need_cmd_old = false; + have_cmd = true; break; - case '|': case '&': case ';': case '<': case '>': - case '{': case '}': case '(': case ')': case '\n': - if (quote + squote + dquote == 0) + case '{': + if (!quote && !dollar && !have_sq && !have_dq && + !have_cmd) return (WRDE_BADCHAR); + if (dollar) { + if (!quote && !have_sq) + need_par_end = true; + have_par_begin = true; + } break; - case '$': - if ((c = *words++) == '\0') - break; - else if (quote + squote == 0 && c == '(') { - if (flags & WRDE_NOCMD && *words != '(') - return (WRDE_CMDSUB); - level = 1; - while ((c = *words++) != '\0') { - if (c == '\\') { - if ((c = *words++) == '\0') - break; - } else if (c == '(') - level++; - else if (c == ')' && --level == 0) - break; - } - if (c == '\0' || level != 0) - return (WRDE_SYNTAX); - } else if (quote + squote == 0 && c == '{') { - level = 1; - while ((c = *words++) != '\0') { - if (c == '\\') { - if ((c = *words++) == '\0') - break; - } else if (c == '{') - level++; - else if (c == '}' && --level == 0) - break; - } - if (c == '\0' || level != 0) - return (WRDE_SYNTAX); - } else - --words; + case '}': + if (!quote && !have_sq && !have_dq && !have_par_begin && + !have_cmd) + return (WRDE_BADCHAR); + need_par_end = false; + break; + case '(': + if (!quote && !dollar && !have_sq && !have_dq && + !have_cmd) + return (WRDE_BADCHAR); + if (dollar) { + if (!quote && !have_sq) + need_cmd_new = true; + have_cmd = true; + } + break; + case ')': + if (!quote && !have_sq && !have_dq && !have_cmd) + return (WRDE_BADCHAR); + need_cmd_new = false; + break; + case '|': case '&': case ';': case '<': case '>': case '\n': + if (!quote && !have_sq && !have_dq && !have_cmd) + return (WRDE_BADCHAR); break; default: break; } - quote = 0; + quote = dollar = false; } - if (quote + squote + dquote != 0) + if (quote || dollar || need_sq || need_dq || need_par_end || + need_cmd_old || need_cmd_new) return (WRDE_SYNTAX); return (0);