From d3cc535474a0df422a6928615dcb3afe46af253f Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Fri, 17 Jan 2020 14:42:25 +0000 Subject: [PATCH] vfs: provide F_ISUNIONSTACK as a kludge for libc Prior to introduction of this op libc's readdir would call fstatfs(2), in effect unnecessarily copying kilobytes of data just to check fs name and a mount flag. Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D23162 --- lib/libc/gen/opendir.c | 25 +++++++++++++++----- lib/libc/sys/fcntl.2 | 7 +++++- sys/fs/unionfs/union_vfsops.c | 2 +- sys/kern/kern_descrip.c | 44 +++++++++++++++++++++++++++++++++++ sys/sys/fcntl.h | 1 + sys/sys/mount.h | 1 + 6 files changed, 72 insertions(+), 8 deletions(-) diff --git a/lib/libc/gen/opendir.c b/lib/libc/gen/opendir.c index a6e7859288a0..bb42dfdf49e2 100644 --- a/lib/libc/gen/opendir.c +++ b/lib/libc/gen/opendir.c @@ -273,6 +273,24 @@ _filldir(DIR *dirp, bool use_current_pos) return (true); } +static bool +is_unionstack(int fd) +{ + struct statfs sfb; + int unionstack; + + unionstack = _fcntl(fd, F_ISUNIONSTACK); + if (unionstack != -1) + return (unionstack); + + /* + * Temporary compat for kernels which don't provide F_ISUNIONSTACK. + */ + if (_fstatfs(fd, &sfb) < 0) + return (true); + return (strcmp(sfb.f_fstypename, "unionfs") == 0 || + (sfb.f_flags & MNT_UNION)); +} /* * Common routine for opendir(3), __opendir2(3) and fdopendir(3). @@ -312,12 +330,7 @@ __opendir_common(int fd, int flags, bool use_current_pos) */ unionstack = false; if (flags & DTF_NODUP) { - struct statfs sfb; - - if (_fstatfs(fd, &sfb) == 0) { - unionstack = strcmp(sfb.f_fstypename, "unionfs") == 0 || - (sfb.f_flags & MNT_UNION); - } + unionstack = is_unionstack(fd); } if (unionstack) { diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2 index 189b505f4332..9793024f48ed 100644 --- a/lib/libc/sys/fcntl.2 +++ b/lib/libc/sys/fcntl.2 @@ -28,7 +28,7 @@ .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd September 4, 2019 +.Dd January 17, 2020 .Dt FCNTL 2 .Os .Sh NAME @@ -185,6 +185,11 @@ Add seals to the file as described below, if the underlying filesystem supports seals. .It Dv F_GET_SEALS Get seals associated with the file, if the underlying filesystem supports seals. +.It Dv F_ISUNIONSTACK +Check if the vnode is part of a union stack (either the "union" flag from +.Xr mount 2 +or unionfs). +This is a hack not intended to be used outside of libc. .El .Pp The flags for the diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c index 862089520423..d002fb5ef98a 100644 --- a/sys/fs/unionfs/union_vfsops.c +++ b/sys/fs/unionfs/union_vfsops.c @@ -296,7 +296,7 @@ unionfs_domount(struct mount *mp) if ((ump->um_lowervp->v_mount->mnt_flag & MNT_LOCAL) && (ump->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) mp->mnt_flag |= MNT_LOCAL; - mp->mnt_kern_flag |= MNTK_NOMSYNC; + mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNIONFS; MNT_IUNLOCK(mp); /* diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 732df3fb1a87..52d58c8ca186 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -489,6 +489,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) struct filedescent *fde; struct proc *p; struct vnode *vp; + struct mount *mp; int error, flg, seals, tmp; uint64_t bsize; off_t foffset; @@ -816,6 +817,49 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) fdrop(fp, td); break; + case F_ISUNIONSTACK: + /* + * Check if the vnode is part of a union stack (either the + * "union" flag from mount(2) or unionfs). + * + * Prior to introduction of this op libc's readdir would call + * fstatfs(2), in effect unnecessarily copying kilobytes of + * data just to check fs name and a mount flag. + * + * Fixing the code to handle everything in the kernel instead + * is a non-trivial endeavor and has low priority, thus this + * horrible kludge facilitates the current behavior in a much + * cheaper manner until someone(tm) sorts this out. + */ + error = fget_unlocked(fdp, fd, &cap_no_rights, &fp, NULL); + if (error != 0) + break; + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); + error = EBADF; + break; + } + vp = fp->f_vnode; + /* + * Since we don't prevent dooming the vnode even non-null mp + * found can become immediately stale. This is tolerable since + * mount points are type-stable (providing safe memory access) + * and any vfs op on this vnode going forward will return an + * error (meaning return value in this case is meaningless). + */ + mp = (struct mount *)atomic_load_ptr(&vp->v_mount); + if (__predict_false(mp == NULL)) { + fdrop(fp, td); + error = EBADF; + break; + } + td->td_retval[0] = 0; + if (mp->mnt_kern_flag & MNTK_UNIONFS || + mp->mnt_flag & MNT_UNION) + td->td_retval[0] = 1; + fdrop(fp, td); + break; + default: error = EINVAL; break; diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 5359a317c4e5..55ba4220afef 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -250,6 +250,7 @@ typedef __pid_t pid_t; #define F_DUP2FD_CLOEXEC 18 /* Like F_DUP2FD, but FD_CLOEXEC is set */ #define F_ADD_SEALS 19 #define F_GET_SEALS 20 +#define F_ISUNIONSTACK 21 /* Kludge for libc, don't use it. */ /* Seals (F_ADD_SEALS, F_GET_SEALS). */ #define F_SEAL_SEAL 0x0001 /* Prevent adding sealings */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 0bf8f2e28c74..6c23cb25c352 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -414,6 +414,7 @@ void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp); #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_VMSETSIZE_BUG 0x00010000 +#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */