From 8d9ed174f3afba5f114742447e622fc1173d4774 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Thu, 18 Mar 2021 12:41:47 +0200 Subject: [PATCH] open(2): Implement O_PATH Reviewed by: markj Tested by: pho Discussed with: walker.aj325_gmail.com, wulf Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D29323 --- lib/libc/sys/open.2 | 41 ++++++++++++++++++++++++++- sys/kern/kern_descrip.c | 46 ++++++++++++++++++++++++++++--- sys/kern/vfs_aio.c | 5 ++++ sys/kern/vfs_lookup.c | 6 ++-- sys/kern/vfs_syscalls.c | 61 ++++++++++++++++++++++++++++++++--------- sys/kern/vfs_vnops.c | 38 +++++++++++++++---------- sys/sys/fcntl.h | 8 ++++-- sys/sys/file.h | 1 + sys/sys/filedesc.h | 2 ++ 9 files changed, 170 insertions(+), 38 deletions(-) diff --git a/lib/libc/sys/open.2 b/lib/libc/sys/open.2 index e24c823d039a..f9c54bfc7581 100644 --- a/lib/libc/sys/open.2 +++ b/lib/libc/sys/open.2 @@ -28,7 +28,7 @@ .\" @(#)open.2 8.2 (Berkeley) 11/16/93 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 18, 2021 .Dt OPEN 2 .Os .Sh NAME @@ -168,6 +168,7 @@ O_DIRECTORY error if file is not a directory O_CLOEXEC set FD_CLOEXEC upon open O_VERIFY verify the contents of the file O_RESOLVE_BENEATH path resolution must not cross the fd directory +O_PATH record only the target path in the opened descriptor .Ed .Pp Opening a file with @@ -316,6 +317,44 @@ The primary use for this descriptor will be as the lookup descriptor for the .Fn *at family of functions. .Pp +.Dv O_PATH +returns a file descriptor that can be used as a directory file descriptor for +.Xr openat 2 +and other system calls taking a file descriptor argument, like +.Xr fstatat 2 +and others. +The other functionality of the returned file descriptor is limited to +the descriptor-level operations. +It can be used for +.Bl -tag -width SCM_RIGHTS -offset indent -compact +.It Xr fcntl 2 +but advisory locking is not allowed +.It Xr dup 2 +.It Xr close 2 +.It Xr fstat 2 +.It Xr fexecve 2 +requires that +.Dv O_EXEC +was also specified at open time +.It Dv SCM_RIGHTS +can be passed over a +.Xr unix 4 +socket using a +.Dv SCM_RIGHTS +message +.El +But operations like +.Xr read 2 , +.Xr ftruncate 2 , +and any other that operate on file and not on file descriptor (except +.Xr fstat 2 ), +are not allowed. +See also the description of +.Dv AT_EMPTY_PATH +flag for +.Xr fstatat 2 +and related syscalls. +.Pp If successful, .Fn open returns a non-negative integer, termed a file descriptor. diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 7a43fbb2eb80..81af58fbddd1 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -546,6 +547,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp); if (error != 0) break; + if (fp->f_ops == &path_fileops) { + fdrop(fp, td); + error = EBADF; + break; + } do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -610,7 +616,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp); if (error != 0) break; - if (fp->f_type != DTYPE_VNODE) { + if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { error = EBADF; fdrop(fp, td); break; @@ -715,7 +721,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = fget_unlocked(fdp, fd, &cap_flock_rights, &fp); if (error != 0) break; - if (fp->f_type != DTYPE_VNODE) { + if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { error = EBADF; fdrop(fp, td); break; @@ -771,7 +777,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = fget_unlocked(fdp, fd, &cap_no_rights, &fp); if (error != 0) break; - if (fp->f_type != DTYPE_VNODE) { + if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { fdrop(fp, td); error = EBADF; break; @@ -3544,7 +3550,7 @@ sys_flock(struct thread *td, struct flock_args *uap) error = fget(td, uap->fd, &cap_flock_rights, &fp); if (error != 0) return (error); - if (fp->f_type != DTYPE_VNODE) { + if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) { fdrop(fp, td); return (EOPNOTSUPP); } @@ -4960,6 +4966,38 @@ struct fileops badfileops = { .fo_fill_kinfo = badfo_fill_kinfo, }; +static int +path_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + return (POLLNVAL); +} + +static int +path_close(struct file *fp, struct thread *td) +{ + MPASS(fp->f_type == DTYPE_VNODE); + fp->f_ops = &badfileops; + vrele(fp->f_vnode); + return (0); +} + +struct fileops path_fileops = { + .fo_read = badfo_readwrite, + .fo_write = badfo_readwrite, + .fo_truncate = badfo_truncate, + .fo_ioctl = badfo_ioctl, + .fo_poll = path_poll, + .fo_kqfilter = badfo_kqfilter, + .fo_stat = vn_statfile, + .fo_close = path_close, + .fo_chmod = badfo_chmod, + .fo_chown = badfo_chown, + .fo_sendfile = badfo_sendfile, + .fo_fill_kinfo = vn_fill_kinfo, + .fo_flags = DFLAG_PASSABLE, +}; + int invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 9b45a06c5f9f..640e82b6f0ff 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -1619,6 +1619,11 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, goto err3; } + if (fp != NULL && fp->f_ops == &path_fileops) { + error = EBADF; + goto err3; + } + job->fd_file = fp; mtx_lock(&aio_job_mtx); diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index f4ec3cea9fff..f979676f4c7d 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -360,8 +360,10 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_ATFD2(ndp->ni_dirfd); /* - * Effectively inlined fgetvp_rights, because we need to - * inspect the file as well as grabbing the vnode. + * Effectively inlined fgetvp_rights, because + * we need to inspect the file as well as + * grabbing the vnode. No check for O_PATH, + * files to implement its semantic. */ error = fget_cap(td, ndp->ni_dirfd, &rights, &dfp, &ndp->ni_filecaps); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 45f155ebff3d..5a1efcdec467 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -375,7 +375,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf) int error; AUDIT_ARG_FD(fd); - error = getvnode(td, fd, &cap_fstatfs_rights, &fp); + error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; @@ -891,7 +891,7 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap) int error; AUDIT_ARG_FD(uap->fd); - error = getvnode(td, uap->fd, &cap_fchdir_rights, + error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fp); if (error != 0) return (error); @@ -1023,9 +1023,10 @@ change_dir(struct vnode *vp, struct thread *td) static __inline void flags_to_rights(int flags, cap_rights_t *rightsp) { - if (flags & O_EXEC) { cap_rights_set_one(rightsp, CAP_FEXECVE); + if (flags & O_PATH) + return; } else { switch ((flags & O_ACCMODE)) { case O_RDONLY: @@ -1112,11 +1113,15 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, AUDIT_ARG_MODE(mode); cap_rights_init_one(&rights, CAP_LOOKUP); flags_to_rights(flags, &rights); + /* * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags - * may be specified. + * may be specified. On the other hand, for O_PATH any mode + * except O_EXEC is ignored. */ - if (flags & O_EXEC) { + if ((flags & O_PATH) != 0) { + flags &= ~(O_CREAT | O_ACCMODE); + } else if ((flags & O_EXEC) != 0) { if (flags & O_ACCMODE) return (EINVAL); } else if ((flags & O_ACCMODE) == O_ACCMODE) { @@ -1145,8 +1150,10 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, * wonderous happened deep below and we just pass it up * pretending we know what we do. */ - if (error == ENXIO && fp->f_ops != &badfileops) + if (error == ENXIO && fp->f_ops != &badfileops) { + MPASS((flags & O_PATH) == 0); goto success; + } /* * Handle special fdopen() case. bleh. @@ -1176,14 +1183,16 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, * files that switched type in the cdevsw fdopen() method. */ fp->f_vnode = vp; + /* * If the file wasn't claimed by devfs bind it to the normal * vnode operations here. */ if (fp->f_ops == &badfileops) { - KASSERT(vp->v_type != VFIFO, + KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, ("Unexpected fifo fp %p vp %p", fp, vp)); - finit_vnode(fp, flags, NULL, &vnops); + finit_vnode(fp, flags, NULL, (flags & O_PATH) != 0 ? + &path_fileops : &vnops); } VOP_UNLOCK(vp); @@ -1882,7 +1891,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, fp = NULL; if (fd != FD_NONE) { - error = getvnode(td, fd, &cap_no_rights, &fp); + error = getvnode_path(td, fd, &cap_no_rights, &fp); if (error != 0) return (error); } @@ -4255,12 +4264,13 @@ sys_revoke(struct thread *td, struct revoke_args *uap) } /* - * Convert a user file descriptor to a kernel file entry and check that, if it - * is a capability, the correct rights are present. A reference on the file - * entry is held upon returning. + * This variant of getvnode() allows O_PATH files. Caller should + * ensure that returned file and vnode are only used for compatible + * semantics. */ int -getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) +getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp) { struct file *fp; int error; @@ -4285,10 +4295,35 @@ getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) fdrop(fp, td); return (EINVAL); } + *fpp = fp; return (0); } +/* + * Convert a user file descriptor to a kernel file entry and check + * that, if it is a capability, the correct rights are present. + * A reference on the file entry is held upon returning. + */ +int +getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) +{ + int error; + + error = getvnode_path(td, fd, rightsp, fpp); + + /* + * Filter out O_PATH file descriptors, most getvnode() callers + * do not call fo_ methods. + */ + if (error == 0 && (*fpp)->f_ops == &path_fileops) { + fdrop(*fpp, td); + error = EBADF; + } + + return (error); +} + /* * Get an (NFS) file handle. */ diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 98f37d26ea8c..6339295b0556 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -391,25 +391,30 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, return (EOPNOTSUPP); if (vp->v_type != VDIR && fmode & O_DIRECTORY) return (ENOTDIR); + accmode = 0; - if (fmode & (FWRITE | O_TRUNC)) { - if (vp->v_type == VDIR) - return (EISDIR); - accmode |= VWRITE; - } - if (fmode & FREAD) - accmode |= VREAD; - if (fmode & FEXEC) - accmode |= VEXEC; - if ((fmode & O_APPEND) && (fmode & FWRITE)) - accmode |= VAPPEND; + if ((fmode & O_PATH) == 0) { + if ((fmode & (FWRITE | O_TRUNC)) != 0) { + if (vp->v_type == VDIR) + return (EISDIR); + accmode |= VWRITE; + } + if ((fmode & FREAD) != 0) + accmode |= VREAD; + if ((fmode & O_APPEND) && (fmode & FWRITE)) + accmode |= VAPPEND; #ifdef MAC - if (fmode & O_CREAT) - accmode |= VCREAT; - if (fmode & O_VERIFY) + if ((fmode & O_CREAT) != 0) + accmode |= VCREAT; +#endif + } + if ((fmode & FEXEC) != 0) + accmode |= VEXEC; +#ifdef MAC + if ((fmode & O_VERIFY) != 0) accmode |= VVERIFY; error = mac_vnode_check_open(cred, vp, accmode); - if (error) + if (error != 0) return (error); accmode &= ~(VCREAT | VVERIFY); @@ -419,6 +424,9 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, if (error != 0) return (error); } + if ((fmode & O_PATH) != 0) + return (0); + if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vn_lock(vp, LK_UPGRADE | LK_RETRY); error = VOP_OPEN(vp, fmode, cred, td, fp); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 0fa4e7758c9d..c328abaa02af 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -135,7 +135,7 @@ typedef __pid_t pid_t; #if __BSD_VISIBLE #define O_VERIFY 0x00200000 /* open only after verification */ -/* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */ +#define O_PATH 0x00400000 /* fd is only a path */ #define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk out of cwd */ #endif @@ -156,10 +156,12 @@ typedef __pid_t pid_t; /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ #define FFLAGS(oflags) ((oflags) & O_EXEC ? (oflags) : (oflags) + 1) -#define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1) +#define OFLAGS(fflags) \ + (((fflags) & (O_EXEC | O_PATH)) != 0 ? (fflags) : (fflags) - 1) /* bits to save after open */ -#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC) +#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK| \ + O_DIRECT|FEXEC|O_PATH) /* bits settable by fcntl(F_SETFL, ...) */ #define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT) diff --git a/sys/sys/file.h b/sys/sys/file.h index c4fc70f517a4..9237ee5ceb9d 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -239,6 +239,7 @@ struct xfile { extern struct fileops vnops; extern struct fileops badfileops; +extern struct fileops path_fileops; extern struct fileops socketops; extern int maxfiles; /* kernel limit on number of open files */ extern int maxfilesperproc; /* per process limit on number of open files */ diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 8c5aa258ed28..7f18d8a2286c 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -265,6 +265,8 @@ struct filedesc_to_leader * struct filedesc *fdp, struct proc *leader); int getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp); +int getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, + struct file **fpp); void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); int fget_cap_locked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp,