open(2): Implement O_PATH

Reviewed by:	markj
Tested by:	pho
Discussed with:	walker.aj325_gmail.com, wulf
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D29323
This commit is contained in:
Konstantin Belousov 2021-03-18 12:41:47 +02:00
parent 509124b626
commit 8d9ed174f3
9 changed files with 170 additions and 38 deletions

View File

@ -28,7 +28,7 @@
.\" @(#)open.2 8.2 (Berkeley) 11/16/93
.\" $FreeBSD$
.\"
.Dd February 23, 2021
.Dd March 18, 2021
.Dt OPEN 2
.Os
.Sh NAME
@ -168,6 +168,7 @@ O_DIRECTORY error if file is not a directory
O_CLOEXEC set FD_CLOEXEC upon open
O_VERIFY verify the contents of the file
O_RESOLVE_BENEATH path resolution must not cross the fd directory
O_PATH record only the target path in the opened descriptor
.Ed
.Pp
Opening a file with
@ -316,6 +317,44 @@ The primary use for this descriptor will be as the lookup descriptor for the
.Fn *at
family of functions.
.Pp
.Dv O_PATH
returns a file descriptor that can be used as a directory file descriptor for
.Xr openat 2
and other system calls taking a file descriptor argument, like
.Xr fstatat 2
and others.
The other functionality of the returned file descriptor is limited to
the descriptor-level operations.
It can be used for
.Bl -tag -width SCM_RIGHTS -offset indent -compact
.It Xr fcntl 2
but advisory locking is not allowed
.It Xr dup 2
.It Xr close 2
.It Xr fstat 2
.It Xr fexecve 2
requires that
.Dv O_EXEC
was also specified at open time
.It Dv SCM_RIGHTS
can be passed over a
.Xr unix 4
socket using a
.Dv SCM_RIGHTS
message
.El
But operations like
.Xr read 2 ,
.Xr ftruncate 2 ,
and any other that operate on file and not on file descriptor (except
.Xr fstat 2 ),
are not allowed.
See also the description of
.Dv AT_EMPTY_PATH
flag for
.Xr fstatat 2
and related syscalls.
.Pp
If successful,
.Fn open
returns a non-negative integer, termed a file descriptor.

View File

@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/selinfo.h>
#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
@ -546,6 +547,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp);
if (error != 0)
break;
if (fp->f_ops == &path_fileops) {
fdrop(fp, td);
error = EBADF;
break;
}
do {
tmp = flg = fp->f_flag;
tmp &= ~FCNTLFLAGS;
@ -610,7 +616,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
if (fp->f_type != DTYPE_VNODE) {
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@ -715,7 +721,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp);
if (error != 0)
break;
if (fp->f_type != DTYPE_VNODE) {
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
error = EBADF;
fdrop(fp, td);
break;
@ -771,7 +777,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = fget_unlocked(fdp, fd, &cap_no_rights, &fp);
if (error != 0)
break;
if (fp->f_type != DTYPE_VNODE) {
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
fdrop(fp, td);
error = EBADF;
break;
@ -3544,7 +3550,7 @@ sys_flock(struct thread *td, struct flock_args *uap)
error = fget(td, uap->fd, &cap_flock_rights, &fp);
if (error != 0)
return (error);
if (fp->f_type != DTYPE_VNODE) {
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
fdrop(fp, td);
return (EOPNOTSUPP);
}
@ -4960,6 +4966,38 @@ struct fileops badfileops = {
.fo_fill_kinfo = badfo_fill_kinfo,
};
static int
path_poll(struct file *fp, int events, struct ucred *active_cred,
struct thread *td)
{
return (POLLNVAL);
}
static int
path_close(struct file *fp, struct thread *td)
{
MPASS(fp->f_type == DTYPE_VNODE);
fp->f_ops = &badfileops;
vrele(fp->f_vnode);
return (0);
}
struct fileops path_fileops = {
.fo_read = badfo_readwrite,
.fo_write = badfo_readwrite,
.fo_truncate = badfo_truncate,
.fo_ioctl = badfo_ioctl,
.fo_poll = path_poll,
.fo_kqfilter = badfo_kqfilter,
.fo_stat = vn_statfile,
.fo_close = path_close,
.fo_chmod = badfo_chmod,
.fo_chown = badfo_chown,
.fo_sendfile = badfo_sendfile,
.fo_fill_kinfo = vn_fill_kinfo,
.fo_flags = DFLAG_PASSABLE,
};
int
invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)

View File

@ -1619,6 +1619,11 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
goto err3;
}
if (fp != NULL && fp->f_ops == &path_fileops) {
error = EBADF;
goto err3;
}
job->fd_file = fp;
mtx_lock(&aio_job_mtx);

View File

@ -360,8 +360,10 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
if (cnp->cn_flags & AUDITVNODE2)
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
/*
* Effectively inlined fgetvp_rights, because we need to
* inspect the file as well as grabbing the vnode.
* Effectively inlined fgetvp_rights, because
* we need to inspect the file as well as
* grabbing the vnode. No check for O_PATH,
* files to implement its semantic.
*/
error = fget_cap(td, ndp->ni_dirfd, &rights,
&dfp, &ndp->ni_filecaps);

View File

@ -375,7 +375,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
int error;
AUDIT_ARG_FD(fd);
error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
if (error != 0)
return (error);
vp = fp->f_vnode;
@ -891,7 +891,7 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap)
int error;
AUDIT_ARG_FD(uap->fd);
error = getvnode(td, uap->fd, &cap_fchdir_rights,
error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
&fp);
if (error != 0)
return (error);
@ -1023,9 +1023,10 @@ change_dir(struct vnode *vp, struct thread *td)
static __inline void
flags_to_rights(int flags, cap_rights_t *rightsp)
{
if (flags & O_EXEC) {
cap_rights_set_one(rightsp, CAP_FEXECVE);
if (flags & O_PATH)
return;
} else {
switch ((flags & O_ACCMODE)) {
case O_RDONLY:
@ -1112,11 +1113,15 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
AUDIT_ARG_MODE(mode);
cap_rights_init_one(&rights, CAP_LOOKUP);
flags_to_rights(flags, &rights);
/*
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
* may be specified.
* may be specified. On the other hand, for O_PATH any mode
* except O_EXEC is ignored.
*/
if (flags & O_EXEC) {
if ((flags & O_PATH) != 0) {
flags &= ~(O_CREAT | O_ACCMODE);
} else if ((flags & O_EXEC) != 0) {
if (flags & O_ACCMODE)
return (EINVAL);
} else if ((flags & O_ACCMODE) == O_ACCMODE) {
@ -1145,8 +1150,10 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
* wonderous happened deep below and we just pass it up
* pretending we know what we do.
*/
if (error == ENXIO && fp->f_ops != &badfileops)
if (error == ENXIO && fp->f_ops != &badfileops) {
MPASS((flags & O_PATH) == 0);
goto success;
}
/*
* Handle special fdopen() case. bleh.
@ -1176,14 +1183,16 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
* files that switched type in the cdevsw fdopen() method.
*/
fp->f_vnode = vp;
/*
* If the file wasn't claimed by devfs bind it to the normal
* vnode operations here.
*/
if (fp->f_ops == &badfileops) {
KASSERT(vp->v_type != VFIFO,
KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
("Unexpected fifo fp %p vp %p", fp, vp));
finit_vnode(fp, flags, NULL, &vnops);
finit_vnode(fp, flags, NULL, (flags & O_PATH) != 0 ?
&path_fileops : &vnops);
}
VOP_UNLOCK(vp);
@ -1882,7 +1891,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
fp = NULL;
if (fd != FD_NONE) {
error = getvnode(td, fd, &cap_no_rights, &fp);
error = getvnode_path(td, fd, &cap_no_rights, &fp);
if (error != 0)
return (error);
}
@ -4255,12 +4264,13 @@ sys_revoke(struct thread *td, struct revoke_args *uap)
}
/*
* Convert a user file descriptor to a kernel file entry and check that, if it
* is a capability, the correct rights are present. A reference on the file
* entry is held upon returning.
* This variant of getvnode() allows O_PATH files. Caller should
* ensure that returned file and vnode are only used for compatible
* semantics.
*/
int
getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp)
{
struct file *fp;
int error;
@ -4285,10 +4295,35 @@ getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
fdrop(fp, td);
return (EINVAL);
}
*fpp = fp;
return (0);
}
/*
* Convert a user file descriptor to a kernel file entry and check
* that, if it is a capability, the correct rights are present.
* A reference on the file entry is held upon returning.
*/
int
getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
{
int error;
error = getvnode_path(td, fd, rightsp, fpp);
/*
* Filter out O_PATH file descriptors, most getvnode() callers
* do not call fo_ methods.
*/
if (error == 0 && (*fpp)->f_ops == &path_fileops) {
fdrop(*fpp, td);
error = EBADF;
}
return (error);
}
/*
* Get an (NFS) file handle.
*/

View File

@ -391,25 +391,30 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
return (EOPNOTSUPP);
if (vp->v_type != VDIR && fmode & O_DIRECTORY)
return (ENOTDIR);
accmode = 0;
if (fmode & (FWRITE | O_TRUNC)) {
if ((fmode & O_PATH) == 0) {
if ((fmode & (FWRITE | O_TRUNC)) != 0) {
if (vp->v_type == VDIR)
return (EISDIR);
accmode |= VWRITE;
}
if (fmode & FREAD)
if ((fmode & FREAD) != 0)
accmode |= VREAD;
if (fmode & FEXEC)
accmode |= VEXEC;
if ((fmode & O_APPEND) && (fmode & FWRITE))
accmode |= VAPPEND;
#ifdef MAC
if (fmode & O_CREAT)
if ((fmode & O_CREAT) != 0)
accmode |= VCREAT;
if (fmode & O_VERIFY)
#endif
}
if ((fmode & FEXEC) != 0)
accmode |= VEXEC;
#ifdef MAC
if ((fmode & O_VERIFY) != 0)
accmode |= VVERIFY;
error = mac_vnode_check_open(cred, vp, accmode);
if (error)
if (error != 0)
return (error);
accmode &= ~(VCREAT | VVERIFY);
@ -419,6 +424,9 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
if (error != 0)
return (error);
}
if ((fmode & O_PATH) != 0)
return (0);
if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
vn_lock(vp, LK_UPGRADE | LK_RETRY);
error = VOP_OPEN(vp, fmode, cred, td, fp);

View File

@ -135,7 +135,7 @@ typedef __pid_t pid_t;
#if __BSD_VISIBLE
#define O_VERIFY 0x00200000 /* open only after verification */
/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */
#define O_PATH 0x00400000 /* fd is only a path */
#define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk
out of cwd */
#endif
@ -156,10 +156,12 @@ typedef __pid_t pid_t;
/* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
#define FFLAGS(oflags) ((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
#define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
#define OFLAGS(fflags) \
(((fflags) & (O_EXEC | O_PATH)) != 0 ? (fflags) : (fflags) - 1)
/* bits to save after open */
#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC)
#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK| \
O_DIRECT|FEXEC|O_PATH)
/* bits settable by fcntl(F_SETFL, ...) */
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)

View File

@ -239,6 +239,7 @@ struct xfile {
extern struct fileops vnops;
extern struct fileops badfileops;
extern struct fileops path_fileops;
extern struct fileops socketops;
extern int maxfiles; /* kernel limit on number of open files */
extern int maxfilesperproc; /* per process limit on number of open files */

View File

@ -265,6 +265,8 @@ struct filedesc_to_leader *
struct filedesc *fdp, struct proc *leader);
int getvnode(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp);
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,