Allow Capsicum capabilities to delegate constrained

access to file system subtrees to sandboxed processes.

- Use of absolute paths and '..' are limited in capability mode.
- Use of absolute paths and '..' are limited when looking up relative
  to a capability.
- When a name lookup is performed, identify what operation is to be
  performed (such as CAP_MKDIR) as well as check for CAP_LOOKUP.

With these constraints, openat() and friends are now safe in capability
mode, and can then be used by code such as the capability-mode runtime
linker.

Approved by: re (bz), mentor (rwatson)
Sponsored by: Google Inc
This commit is contained in:
Jonathan Anderson 2011-08-13 09:21:16 +00:00
parent 15975b7bc2
commit 69d377fe1b
6 changed files with 157 additions and 52 deletions

View File

@ -2335,6 +2335,16 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags,
}
#ifdef CAPABILITIES
/*
* If this is a capability, what rights does it have?
*/
if (haverightsp != NULL) {
if (fp->f_type == DTYPE_CAPABILITY)
*haverightsp = cap_rights(fp);
else
*haverightsp = CAP_MASK_VALID;
}
/*
* If a capability has been requested, return the capability directly.
* Otherwise, check capability rights, extract the underlying object,

View File

@ -220,7 +220,7 @@ cap_new(struct thread *td, struct cap_new_args *uap)
{
int error, capfd;
int fd = uap->fd;
struct file *fp, *fcapp;
struct file *fp;
cap_rights_t rights = uap->rights;
AUDIT_ARG_FD(fd);
@ -229,7 +229,7 @@ cap_new(struct thread *td, struct cap_new_args *uap)
if (error)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
error = kern_capwrap(td, fp, rights, &fcapp, &capfd);
error = kern_capwrap(td, fp, rights, &capfd);
if (error)
return (error);
@ -267,10 +267,10 @@ cap_getrights(struct thread *td, struct cap_getrights_args *uap)
*/
int
kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
struct file **fcappp, int *capfdp)
int *capfdp)
{
struct capability *cp, *cp_old;
struct file *fp_object;
struct file *fp_object, *fcapp;
int error;
if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID)
@ -290,7 +290,7 @@ kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
/*
* Allocate a new file descriptor to hang the capability off of.
*/
error = falloc(td, fcappp, capfdp, fp->f_flag);
error = falloc(td, &fcapp, capfdp, fp->f_flag);
if (error)
return (error);
@ -309,18 +309,18 @@ kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
cp->cap_rights = rights;
cp->cap_object = fp_object;
cp->cap_file = *fcappp;
cp->cap_file = fcapp;
if (fp->f_flag & DFLAG_PASSABLE)
finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp,
finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
&capability_ops);
else
finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp,
finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
&capability_ops_unpassable);
/*
* Release our private reference (the proc filedesc still has one).
*/
fdrop(*fcappp, td);
fdrop(fcapp, td);
return (0);
}

View File

@ -180,6 +180,18 @@ namei(struct nameidata *ndp)
if (!error && *cnp->cn_pnbuf == '\0')
error = ENOENT;
#ifdef CAPABILITY_MODE
/*
* In capability mode, lookups must be "strictly relative" (i.e.
* not an absolute path, and not containing '..' components) to
* a real file descriptor, not the pseudo-descriptor AT_FDCWD.
*/
if (IN_CAPABILITY_MODE(td)) {
ndp->ni_strictrelative = 1;
if (ndp->ni_dirfd == AT_FDCWD)
error = ECAPMODE;
}
#endif
if (error) {
uma_zfree(namei_zone, cnp->cn_pnbuf);
#ifdef DIAGNOSTIC
@ -214,12 +226,20 @@ namei(struct nameidata *ndp)
AUDIT_ARG_ATFD1(ndp->ni_dirfd);
if (cnp->cn_flags & AUDITVNODE2)
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
#ifdef CAPABILITY_MODE
KASSERT(!IN_CAPABILITY_MODE(td),
("%s: reached %s:%d in capability mode",
__func__, __FILE__, __LINE__));
error = fgetvp_rights(td, ndp->ni_dirfd,
ndp->ni_rightsneeded | CAP_LOOKUP,
&(ndp->ni_baserights), &dp);
#ifdef CAPABILITIES
/*
* Lookups relative to a capability must also be
* strictly relative.
*
* Note that a capability with rights CAP_MASK_VALID
* is treated exactly like a regular file descriptor.
*/
if (ndp->ni_baserights != CAP_MASK_VALID)
ndp->ni_strictrelative = 1;
#endif
error = fgetvp(td, ndp->ni_dirfd, 0, &dp);
}
if (error != 0 || dp != NULL) {
FILEDESC_SUNLOCK(fdp);
@ -261,6 +281,8 @@ namei(struct nameidata *ndp)
if (*(cnp->cn_nameptr) == '/') {
vrele(dp);
VFS_UNLOCK_GIANT(vfslocked);
if (ndp->ni_strictrelative != 0)
return (ENOTCAPABLE);
while (*(cnp->cn_nameptr) == '/') {
cnp->cn_nameptr++;
ndp->ni_pathlen--;
@ -604,7 +626,10 @@ dirloop:
}
/*
* Handle "..": four special cases.
* Handle "..": five special cases.
* 0. If doing a capability lookup, return ENOTCAPABLE (this is a
* fairly conservative design choice, but it's the only one that we
* are satisfied guarantees the property we're looking for).
* 1. Return an error if this is the last component of
* the name and the operation is DELETE or RENAME.
* 2. If at root directory (e.g. after chroot)
@ -618,6 +643,10 @@ dirloop:
* the jail or chroot, don't let them out.
*/
if (cnp->cn_flags & ISDOTDOT) {
if (ndp->ni_strictrelative != 0) {
error = ENOTCAPABLE;
goto bad;
}
if ((cnp->cn_flags & ISLASTCN) != 0 &&
(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
error = EINVAL;

View File

@ -993,6 +993,41 @@ change_root(vp, td)
return (0);
}
static __inline cap_rights_t
flags_to_rights(int flags)
{
cap_rights_t rights = 0;
switch ((flags & O_ACCMODE)) {
case O_RDONLY:
rights |= CAP_READ;
break;
case O_RDWR:
rights |= CAP_READ;
/* fall through */
case O_WRONLY:
rights |= CAP_WRITE;
break;
case O_EXEC:
rights |= CAP_FEXECVE;
break;
}
if (flags & O_CREAT)
rights |= CAP_CREATE;
if (flags & O_TRUNC)
rights |= CAP_FTRUNCATE;
if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
rights |= CAP_FLOCK;
return (rights);
}
/*
* Check permissions, allocate an open file structure, and call the device
* open routine if any.
@ -1055,10 +1090,12 @@ kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
struct flock lf;
struct nameidata nd;
int vfslocked;
cap_rights_t rights_needed = CAP_LOOKUP;
AUDIT_ARG_FFLAGS(flags);
AUDIT_ARG_MODE(mode);
/* XXX: audit dirfd */
rights_needed |= flags_to_rights(flags);
/*
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
* may be specified.
@ -1082,8 +1119,8 @@ kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
/* Set the flags early so the finit in devfs can pick them up. */
fp->f_flag = flags & FMASK;
cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
td);
NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
path, fd, rights_needed, td);
td->td_dupfd = -1; /* XXX check for fdopen */
error = vn_open(&nd, &flags, cmode, fp);
if (error) {
@ -1092,18 +1129,20 @@ kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
* wonderous happened deep below and we just pass it up
* pretending we know what we do.
*/
if (error == ENXIO && fp->f_ops != &badfileops) {
fdrop(fp, td);
td->td_retval[0] = indx;
return (0);
}
if (error == ENXIO && fp->f_ops != &badfileops)
goto success;
/*
* handle special fdopen() case. bleh. dupfdopen() is
* responsible for dropping the old contents of ofiles[indx]
* if it succeeds.
*
* Don't do this for relative (capability) lookups; we don't
* understand exactly what would happen, and we don't think
* that it ever should.
*/
if ((error == ENODEV || error == ENXIO) &&
if ((nd.ni_strictrelative == 0) &&
(error == ENODEV || error == ENXIO) &&
(td->td_dupfd >= 0)) {
/* XXX from fdopen */
if ((error = finstall(td, fp, &indx, flags)) != 0)
@ -1172,9 +1211,22 @@ success:
/*
* If we haven't already installed the FD (for dupfdopen), do so now.
*/
if (indx == -1)
if ((error = finstall(td, fp, &indx, flags)) != 0)
goto bad_unlocked;
if (indx == -1) {
#ifdef CAPABILITIES
if (nd.ni_strictrelative == 1) {
/*
* We are doing a strict relative lookup; wrap the
* result in a capability.
*/
if ((error = kern_capwrap(td, fp, nd.ni_baserights,
&indx)) != 0)
goto bad_unlocked;
} else
#endif
if ((error = finstall(td, fp, &indx, flags)) != 0)
goto bad_unlocked;
}
/*
* Release our private reference, leaving the one associated with
@ -1301,8 +1353,9 @@ kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
return (error);
restart:
bwillwrite();
NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
pathseg, path, fd, td);
NDINIT_ATRIGHTS(&nd, CREATE,
LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
CAP_MKFIFO, td);
if ((error = namei(&nd)) != 0)
return (error);
vfslocked = NDHASGIANT(&nd);
@ -2153,8 +2206,8 @@ kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
} else
cred = tmpcred = td->td_ucred;
AUDIT_ARG_VALUE(mode);
NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
AUDITVNODE1, pathseg, path, fd, td);
NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
if ((error = namei(&nd)) != 0)
goto out1;
vfslocked = NDHASGIANT(&nd);
@ -2363,9 +2416,9 @@ kern_statat_vnhook(struct thread *td, int flag, int fd, char *path,
if (flag & ~AT_SYMLINK_NOFOLLOW)
return (EINVAL);
NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
path, fd, td);
path, fd, CAP_FSTAT, td);
if ((error = namei(&nd)) != 0)
return (error);
@ -2920,8 +2973,8 @@ kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
AUDIT_ARG_MODE(mode);
follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
fd, td);
NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
path, fd, CAP_FCHMOD, td);
if ((error = namei(&nd)) != 0)
return (error);
vfslocked = NDHASGIANT(&nd);
@ -3063,8 +3116,8 @@ kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
AUDIT_ARG_OWNER(uid, gid);
follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
fd, td);
NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
path, fd, CAP_FCHOWN, td);
if ((error = namei(&nd)) != 0)
return (error);
@ -3279,8 +3332,8 @@ kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
if ((error = getutimes(tptr, tptrseg, ts)) != 0)
return (error);
NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
fd, td);
NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
path, fd, CAP_FUTIMES, td);
if ((error = namei(&nd)) != 0)
return (error);
@ -3610,11 +3663,11 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
bwillwrite();
#ifdef MAC
NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
AUDITVNODE1, pathseg, old, oldfd, td);
NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
#else
NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
AUDITVNODE1, pathseg, old, oldfd, td);
NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
#endif
if ((error = namei(&fromnd)) != 0)
@ -3637,8 +3690,9 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
vrele(fvp);
goto out1;
}
NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
td);
if (fromnd.ni_vp->v_type == VDIR)
tond.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&tond)) != 0) {
@ -3764,8 +3818,8 @@ kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
AUDIT_ARG_MODE(mode);
restart:
bwillwrite();
NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
segflg, path, fd, td);
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
nd.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&nd)) != 0)
return (error);
@ -3853,8 +3907,8 @@ kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
restart:
bwillwrite();
NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
pathseg, path, fd, td);
NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
if ((error = namei(&nd)) != 0)
return (error);
vfslocked = NDHASGIANT(&nd);

View File

@ -142,7 +142,7 @@
* Create a capability to wrap a file object.
*/
int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
struct file **cap, int *capfd);
int *capfd);
/*
* Unwrap a capability if its rights mask is a superset of 'rights'.

View File

@ -63,6 +63,7 @@ struct nameidata {
*/
const char *ni_dirp; /* pathname pointer */
enum uio_seg ni_segflg; /* location of pathname */
cap_rights_t ni_rightsneeded; /* rights required to look up vnode */
/*
* Arguments to lookup.
*/
@ -70,6 +71,11 @@ struct nameidata {
struct vnode *ni_rootdir; /* logical root directory */
struct vnode *ni_topdir; /* logical top directory */
int ni_dirfd; /* starting directory for *at functions */
int ni_strictrelative; /* relative lookup only; no '..' */
/*
* Results: returned from namei
*/
cap_rights_t ni_baserights; /* rights the *at base has (or -1) */
/*
* Results: returned from/manipulated by lookup
*/
@ -151,11 +157,13 @@ struct nameidata {
* Initialization of a nameidata structure.
*/
#define NDINIT(ndp, op, flags, segflg, namep, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, td)
NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, 0, td)
#define NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, td)
NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, 0, td)
#define NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rights, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rights, td)
#define NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td) \
NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, td)
NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, 0, td)
static __inline void
NDINIT_ALL(struct nameidata *ndp,
@ -164,6 +172,7 @@ NDINIT_ALL(struct nameidata *ndp,
const char *namep,
int dirfd,
struct vnode *startdir,
cap_rights_t rights,
struct thread *td)
{
ndp->ni_cnd.cn_nameiop = op;
@ -172,6 +181,9 @@ NDINIT_ALL(struct nameidata *ndp,
ndp->ni_dirp = namep;
ndp->ni_dirfd = dirfd;
ndp->ni_startdir = startdir;
ndp->ni_strictrelative = 0;
ndp->ni_rightsneeded = rights;
ndp->ni_baserights = 0;
ndp->ni_cnd.cn_thread = td;
}