Implement CloudABI's exec() call.

Summary:
In a runtime that is purely based on capability-based security, there is
a strong emphasis on how programs start their execution. We need to make
sure that we execute an new program with an exact set of file
descriptors, ensuring that credentials are not leaked into the process
accidentally.

Providing the right file descriptors is just half the problem. There
also needs to be a framework in place that gives meaning to these file
descriptors. How does a CloudABI mail server know which of the file
descriptors corresponds to the socket that receives incoming emails?
Furthermore, how will this mail server acquire its configuration
parameters, as it cannot open a configuration file from a global path on
disk?

CloudABI solves this problem by replacing traditional string command
line arguments by tree-like data structure consisting of scalars,
sequences and mappings (similar to YAML/JSON). In this structure, file
descriptors are treated as a first-class citizen. When calling exec(),
file descriptors are passed on to the new executable if and only if they
are referenced from this tree structure. See the cloudabi-run(1) man
page for more details and examples (sysutils/cloudabi-utils).

Fortunately, the kernel does not need to care about this tree structure
at all. The C library is responsible for serializing and deserializing,
but also for extracting the list of referenced file descriptors. The
system call only receives a copy of the serialized data and a layout of
what the new file descriptor table should look like:

    int proc_exec(int execfd, const void *data, size_t datalen, const int *fds,
              size_t fdslen);

This change introduces a set of fd*_remapped() functions:

- fdcopy_remapped() pulls a copy of a file descriptor table, remapping
  all of the file descriptors according to the provided mapping table.
- fdinstall_remapped() replaces the file descriptor table of the process
  by the copy created by fdcopy_remapped().
- fdescfree_remapped() frees the table in case we aborted before
  fdinstall_remapped().

We then add a function exec_copyin_data_fds() that builds on top these
functions. It copies in the data and constructs a new remapped file
descriptor. This is used by cloudabi_sys_proc_exec().

Test Plan:
cloudabi-run(1) is capable of spawning processes successfully, providing
it data and file descriptors. procstat -f seems to confirm all is good.
Regular FreeBSD processes also work properly.

Reviewers: kib, mjg

Reviewed By: mjg

Subscribers: imp

Differential Revision: https://reviews.freebsd.org/D3079
This commit is contained in:
Ed Schouten 2015-07-16 07:05:42 +00:00
parent 8947c2986d
commit 457f7e23b1
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=285622
5 changed files with 202 additions and 9 deletions

View File

@ -27,10 +27,12 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/imgact.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <compat/cloudabi/cloudabi_proto.h>
@ -38,9 +40,16 @@ int
cloudabi_sys_proc_exec(struct thread *td,
struct cloudabi_sys_proc_exec_args *uap)
{
struct image_args args;
int error;
/* Not implemented. */
return (ENOSYS);
error = exec_copyin_data_fds(td, &args, uap->data, uap->datalen,
uap->fds, uap->fdslen);
if (error == 0) {
args.fd = uap->fd;
error = kern_execve(td, &args, NULL);
}
return (error);
}
int

View File

@ -1921,6 +1921,14 @@ fdunshare(struct thread *td)
p->p_fd = tmp;
}
void
fdinstall_remapped(struct thread *td, struct filedesc *fdp)
{
fdescfree(td);
td->td_proc->p_fd = fdp;
}
/*
* Copy a filedesc structure. A NULL pointer in returns a NULL reference,
* this is to ease callers, not catch errors.
@ -1959,6 +1967,65 @@ fdcopy(struct filedesc *fdp)
return (newfdp);
}
/*
* Copies a filedesc structure, while remapping all file descriptors
* stored inside using a translation table.
*
* File descriptors are copied over to the new file descriptor table,
* regardless of whether the close-on-exec flag is set.
*/
int
fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
struct filedesc **ret)
{
struct filedesc *newfdp;
struct filedescent *nfde, *ofde;
int error, i;
MPASS(fdp != NULL);
newfdp = fdinit(fdp, true);
if (nfds > fdp->fd_lastfile + 1) {
/* New table cannot be larger than the old one. */
error = E2BIG;
goto bad;
}
/* Copy all passable descriptors (i.e. not kqueue). */
newfdp->fd_freefile = nfds;
for (i = 0; i < nfds; ++i) {
if (fds[i] < 0 || fds[i] > fdp->fd_lastfile) {
/* File descriptor out of bounds. */
error = EBADF;
goto bad;
}
ofde = &fdp->fd_ofiles[fds[i]];
if (ofde->fde_file == NULL) {
/* Unused file descriptor. */
error = EBADF;
goto bad;
}
if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0) {
/* File descriptor cannot be passed. */
error = EINVAL;
goto bad;
}
nfde = &newfdp->fd_ofiles[i];
*nfde = *ofde;
filecaps_copy(&ofde->fde_caps, &nfde->fde_caps);
fhold(nfde->fde_file);
fdused_init(newfdp, i);
newfdp->fd_lastfile = i;
}
newfdp->fd_cmask = fdp->fd_cmask;
FILEDESC_SUNLOCK(fdp);
*ret = newfdp;
return (0);
bad:
FILEDESC_SUNLOCK(fdp);
fdescfree_remapped(newfdp);
return (error);
}
/*
* Clear POSIX style locks. This is only used when fdp looses a reference (i.e.
* one of processes using it exits) and the table used to be shared.
@ -2114,6 +2181,42 @@ fdescfree(struct thread *td)
fddrop(fdp);
}
void
fdescfree_remapped(struct filedesc *fdp)
{
struct filedesc0 *fdp0;
struct filedescent *fde;
struct file *fp;
struct freetable *ft, *tft;
int i;
for (i = 0; i <= fdp->fd_lastfile; i++) {
fde = &fdp->fd_ofiles[i];
fp = fde->fde_file;
if (fp != NULL) {
fdefree_last(fde);
(void) closef(fp, NULL);
}
}
if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
free(fdp->fd_map, M_FILEDESC);
if (fdp->fd_nfiles > NDFILE)
free(fdp->fd_files, M_FILEDESC);
fdp0 = (struct filedesc0 *)fdp;
SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft)
free(ft->ft_table, M_FILEDESC);
if (fdp->fd_cdir != NULL)
vrele(fdp->fd_cdir);
if (fdp->fd_rdir != NULL)
vrele(fdp->fd_rdir);
if (fdp->fd_jdir != NULL)
vrele(fdp->fd_jdir);
fddrop(fdp);
}
/*
* For setugid programs, we don't want to people to use that setugidness
* to generate error messages which write to a file which otherwise would

View File

@ -580,13 +580,20 @@ do_execve(td, args, mac_p)
else
suword(--stack_base, imgp->args->argc);
/*
* For security and other reasons, the file descriptor table cannot
* be shared after an exec.
*/
fdunshare(td);
/* close files on exec */
fdcloseexec(td);
if (args->fdp != NULL) {
/* Install a brand new file descriptor table. */
fdinstall_remapped(td, args->fdp);
args->fdp = NULL;
} else {
/*
* Keep on using the existing file descriptor table. For
* security and other reasons, the file descriptor table
* cannot be shared after an exec.
*/
fdunshare(td);
/* close files on exec */
fdcloseexec(td);
}
/*
* Malloc things before we need locks.
@ -1197,6 +1204,71 @@ exec_copyin_args(struct image_args *args, char *fname,
return (error);
}
int
exec_copyin_data_fds(struct thread *td, struct image_args *args,
const void *data, size_t datalen, const int *fds, size_t fdslen)
{
struct filedesc *ofdp;
const char *p;
int *kfds;
int error;
memset(args, '\0', sizeof(*args));
ofdp = td->td_proc->p_fd;
if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1)
return (E2BIG);
error = exec_alloc_args(args);
if (error != 0)
return (error);
args->begin_argv = args->buf;
args->stringspace = ARG_MAX;
if (datalen > 0) {
/*
* Argument buffer has been provided. Copy it into the
* kernel as a single string and add a terminating null
* byte.
*/
error = copyin(data, args->begin_argv, datalen);
if (error != 0)
goto err_exit;
args->begin_argv[datalen] = '\0';
args->endp = args->begin_argv + datalen + 1;
args->stringspace -= datalen + 1;
/*
* Traditional argument counting. Count the number of
* null bytes.
*/
for (p = args->begin_argv; p < args->endp; ++p)
if (*p == '\0')
++args->argc;
} else {
/* No argument buffer provided. */
args->endp = args->begin_argv;
}
/* There are no environment variables. */
args->begin_envv = args->endp;
/* Create new file descriptor table. */
kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK);
error = copyin(fds, kfds, fdslen * sizeof(int));
if (error != 0) {
free(kfds, M_TEMP);
goto err_exit;
}
error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp);
free(kfds, M_TEMP);
if (error != 0)
goto err_exit;
return (0);
err_exit:
exec_free_args(args);
return (error);
}
/*
* Allocate temporary demand-paged, zero-filled memory for the file name,
* argument, and environment strings. Returns zero if the allocation succeeds
@ -1223,6 +1295,8 @@ exec_free_args(struct image_args *args)
free(args->fname_buf, M_TEMP);
args->fname_buf = NULL;
}
if (args->fdp != NULL)
fdescfree_remapped(args->fdp);
}
/*

View File

@ -170,8 +170,12 @@ void fdclose(struct thread *td, struct file *fp, int idx);
void fdcloseexec(struct thread *td);
void fdsetugidsafety(struct thread *td);
struct filedesc *fdcopy(struct filedesc *fdp);
int fdcopy_remapped(struct filedesc *fdp, const int *fds, size_t nfds,
struct filedesc **newfdp);
void fdinstall_remapped(struct thread *td, struct filedesc *fdp);
void fdunshare(struct thread *td);
void fdescfree(struct thread *td);
void fdescfree_remapped(struct filedesc *fdp);
struct filedesc *fdinit(struct filedesc *fdp, bool prepfiles);
struct filedesc *fdshare(struct filedesc *fdp);
struct filedesc_to_leader *

View File

@ -49,6 +49,7 @@ struct image_args {
int argc; /* count of argument strings */
int envc; /* count of environment strings */
int fd; /* file descriptor of the executable */
struct filedesc *fdp; /* new file descriptor table */
};
struct image_params {
@ -99,6 +100,8 @@ void exec_setregs(struct thread *, struct image_params *, u_long);
int exec_shell_imgact(struct image_params *);
int exec_copyin_args(struct image_args *, char *, enum uio_seg,
char **, char **);
int exec_copyin_data_fds(struct thread *, struct image_args *, const void *,
size_t, const int *, size_t);
int pre_execve(struct thread *td, struct vmspace **oldvmspace);
void post_execve(struct thread *td, int error, struct vmspace *oldvmspace);
#endif