fd: use smr for managing struct pwd

This has a side effect of eliminating filedesc slock/sunlock during path
lookup, which in turn removes contention vs concurrent modifications to the fd
table.

Reviewed by:	markj, kib
Differential Revision:	https://reviews.freebsd.org/D23889
This commit is contained in:
Mateusz Guzik 2020-03-08 00:23:36 +00:00
parent a6924f5db3
commit d2222aa0e9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=358734
5 changed files with 103 additions and 30 deletions

View File

@ -460,6 +460,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
struct file file;
struct filedesc filed;
struct pwd pwd;
unsigned long pwd_addr;
struct vm_map_entry vmentry;
struct vm_object object;
struct vmspace vmspace;
@ -488,10 +489,10 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
return (NULL);
}
haspwd = false;
if (filed.fd_pwd != NULL) {
if (!kvm_read_all(kd, (unsigned long)filed.fd_pwd, &pwd,
sizeof(pwd))) {
warnx("can't read fd_pwd at %p", (void *)filed.fd_pwd);
pwd_addr = (unsigned long)(FILEDESC_KVM_LOAD_PWD(&filed));
if (pwd_addr != 0) {
if (!kvm_read_all(kd, pwd_addr, &pwd, sizeof(pwd))) {
warnx("can't read fd_pwd at %p", (void *)pwd_addr);
return (NULL);
}
haspwd = true;

View File

@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
#include <sys/signalvar.h>
#include <sys/kdb.h>
#include <sys/smr.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
@ -101,6 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
static __read_mostly uma_zone_t file_zone;
static __read_mostly uma_zone_t filedesc0_zone;
static __read_mostly uma_zone_t pwd_zone;
static __read_mostly smr_t pwd_smr;
static int closefp(struct filedesc *fdp, int fd, struct file *fp,
struct thread *td, int holdleaders);
@ -1985,6 +1988,7 @@ fdinit(struct filedesc *fdp, bool prepfiles)
{
struct filedesc0 *newfdp0;
struct filedesc *newfdp;
struct pwd *newpwd;
newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO);
newfdp = &newfdp0->fd_fd;
@ -2000,7 +2004,8 @@ fdinit(struct filedesc *fdp, bool prepfiles)
newfdp->fd_files->fdt_nfiles = NDFILE;
if (fdp == NULL) {
newfdp->fd_pwd = pwd_alloc();
newpwd = pwd_alloc();
smr_serialized_store(&newfdp->fd_pwd, newpwd, true);
return (newfdp);
}
@ -2008,7 +2013,8 @@ fdinit(struct filedesc *fdp, bool prepfiles)
fdgrowtable(newfdp, fdp->fd_lastfile + 1);
FILEDESC_SLOCK(fdp);
newfdp->fd_pwd = pwd_hold_filedesc(fdp);
newpwd = pwd_hold_filedesc(fdp);
smr_serialized_store(&newfdp->fd_pwd, newpwd, true);
if (!prepfiles) {
FILEDESC_SUNLOCK(fdp);
@ -2328,7 +2334,7 @@ fdescfree(struct thread *td)
return;
FILEDESC_XLOCK(fdp);
pwd = fdp->fd_pwd;
pwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
pwd_set(fdp, NULL);
FILEDESC_XUNLOCK(fdp);
@ -2341,7 +2347,7 @@ void
fdescfree_remapped(struct filedesc *fdp)
{
pwd_drop(fdp->fd_pwd);
pwd_drop(smr_serialized_load(&fdp->fd_pwd, true));
fdescfree_fds(curthread, fdp, 0);
}
@ -3277,7 +3283,7 @@ pwd_hold_filedesc(struct filedesc *fdp)
struct pwd *pwd;
FILEDESC_LOCK_ASSERT(fdp);
pwd = fdp->fd_pwd;
pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
if (pwd != NULL)
refcount_acquire(&pwd->pwd_refcount);
return (pwd);
@ -3291,11 +3297,14 @@ pwd_hold(struct thread *td)
fdp = td->td_proc->p_fd;
FILEDESC_SLOCK(fdp);
pwd = fdp->fd_pwd;
MPASS(pwd != NULL);
refcount_acquire(&pwd->pwd_refcount);
FILEDESC_SUNLOCK(fdp);
smr_enter(pwd_smr);
for (;;) {
pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
MPASS(pwd != NULL);
if (refcount_acquire_if_not_zero(&pwd->pwd_refcount))
break;
}
smr_exit(pwd_smr);
return (pwd);
}
@ -3304,7 +3313,8 @@ pwd_alloc(void)
{
struct pwd *pwd;
pwd = malloc(sizeof(*pwd), M_PWD, M_WAITOK | M_ZERO);
pwd = uma_zalloc_smr(pwd_zone, M_WAITOK);
bzero(pwd, sizeof(*pwd));
refcount_init(&pwd->pwd_refcount, 1);
return (pwd);
}
@ -3322,7 +3332,7 @@ pwd_drop(struct pwd *pwd)
vrele(pwd->pwd_rdir);
if (pwd->pwd_jdir != NULL)
vrele(pwd->pwd_jdir);
free(pwd, M_PWD);
uma_zfree_smr(pwd_zone, pwd);
}
/*
@ -3340,7 +3350,7 @@ pwd_chroot(struct thread *td, struct vnode *vp)
fdp = td->td_proc->p_fd;
newpwd = pwd_alloc();
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
if (chroot_allow_open_directories == 0 ||
(chroot_allow_open_directories == 1 &&
oldpwd->pwd_rdir != rootvnode)) {
@ -3376,7 +3386,7 @@ pwd_chdir(struct thread *td, struct vnode *vp)
newpwd = pwd_alloc();
fdp = td->td_proc->p_fd;
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
newpwd->pwd_cdir = vp;
pwd_fill(oldpwd, newpwd);
pwd_set(fdp, newpwd);
@ -3392,7 +3402,7 @@ pwd_ensure_dirs(void)
fdp = curproc->p_fd;
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) {
FILEDESC_XUNLOCK(fdp);
return;
@ -3401,7 +3411,7 @@ pwd_ensure_dirs(void)
newpwd = pwd_alloc();
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
pwd_fill(oldpwd, newpwd);
if (newpwd->pwd_cdir == NULL) {
vrefact(rootvnode);
@ -3441,7 +3451,7 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
if (fdp == NULL)
continue;
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp);
if (oldpwd == NULL ||
(oldpwd->pwd_cdir != olddp &&
oldpwd->pwd_rdir != olddp &&
@ -4074,6 +4084,7 @@ int
kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
{
struct filedesc *fdp;
struct pwd *pwd;
struct export_fd_buf *efbuf;
struct vnode *cdir;
int error;
@ -4091,7 +4102,8 @@ kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
efbuf->remainder = maxlen;
FILEDESC_SLOCK(fdp);
cdir = fdp->fd_pwd->pwd_cdir;
pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
cdir = pwd->pwd_cdir;
if (cdir == NULL) {
error = EINVAL;
} else {
@ -4279,6 +4291,9 @@ filelistinit(void *dummy)
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
pwd_smr = uma_zone_get_smr(pwd_zone);
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
}
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);

View File

@ -2063,6 +2063,22 @@ linker_hwpmc_list_objects(void)
}
#endif
/* check if root file system is not mounted */
static bool
linker_root_mounted(void)
{
struct pwd *pwd;
bool ret;
if (rootvnode == NULL)
return (false);
pwd = pwd_hold(curthread);
ret = pwd->pwd_rdir != NULL;
pwd_drop(pwd);
return (ret);
}
/*
* Find a file which contains given module and load it, if "parent" is not
* NULL, register a reference to it.
@ -2084,15 +2100,13 @@ linker_load_module(const char *kldname, const char *modname,
*/
KASSERT(verinfo == NULL, ("linker_load_module: verinfo"
" is not NULL"));
/* check if root file system is not mounted */
if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL)
if (!linker_root_mounted())
return (ENXIO);
pathname = linker_search_kld(kldname);
} else {
if (modlist_lookup2(modname, verinfo) != NULL)
return (EEXIST);
/* check if root file system is not mounted */
if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL)
if (!linker_root_mounted())
return (ENXIO);
if (kldname != NULL)
pathname = strdup(kldname, M_LINKER);

View File

@ -42,6 +42,8 @@
#include <sys/priority.h>
#include <sys/seqc.h>
#include <sys/sx.h>
#include <sys/_smr.h>
#include <sys/smr_types.h>
#include <machine/_limits.h>
@ -76,16 +78,23 @@ struct fdescenttbl {
*/
#define NDSLOTTYPE u_long
/*
* This struct is copy-on-write and allocated from an SMR zone.
* All fields are constant after initialization apart from the reference count.
*
* Check pwd_* routines for usage.
*/
struct pwd {
volatile u_int pwd_refcount;
struct vnode *pwd_cdir; /* current directory */
struct vnode *pwd_rdir; /* root directory */
struct vnode *pwd_jdir; /* jail root directory */
};
typedef SMR_POINTER(struct pwd *) smrpwd_t;
struct filedesc {
struct fdescenttbl *fd_files; /* open files table */
struct pwd *fd_pwd; /* directories */
smrpwd_t fd_pwd; /* directories */
NDSLOTTYPE *fd_map; /* bitmap of free fds */
int fd_lastfile; /* high-water mark of fd_ofiles */
int fd_freefile; /* approx. next free file */
@ -141,6 +150,38 @@ struct filedesc_to_leader {
SX_NOTRECURSED)
#define FILEDESC_UNLOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_UNLOCKED)
#define FILEDESC_LOCKED_LOAD_PWD(fdp) ({ \
struct filedesc *_fdp = (fdp); \
struct pwd *_pwd; \
_pwd = smr_serialized_load(&(_fdp)->fd_pwd, \
(FILEDESC_LOCK_ASSERT(_fdp), true)); \
_pwd; \
})
#define FILEDESC_XLOCKED_LOAD_PWD(fdp) ({ \
struct filedesc *_fdp = (fdp); \
struct pwd *_pwd; \
_pwd = smr_serialized_load(&(_fdp)->fd_pwd, \
(FILEDESC_XLOCK_ASSERT(_fdp), true)); \
_pwd; \
})
#else
/*
* Accessor for libkvm et al.
*/
#define FILEDESC_KVM_LOAD_PWD(fdp) ({ \
struct filedesc *_fdp = (fdp); \
struct pwd *_pwd; \
_pwd = smr_kvm_load(&(_fdp)->fd_pwd); \
_pwd; \
})
#endif
#ifdef _KERNEL
/* Operation types for kern_dup(). */
enum {
FDDUP_NORMAL, /* dup() behavior. */
@ -265,8 +306,8 @@ static inline void
pwd_set(struct filedesc *fdp, struct pwd *newpwd)
{
FILEDESC_XLOCK_ASSERT(fdp);
fdp->fd_pwd = newpwd;
smr_serialized_store(&fdp->fd_pwd, newpwd,
(FILEDESC_XLOCK_ASSERT(fdp), true));
}
#endif /* _KERNEL */

View File

@ -3590,6 +3590,7 @@ buffered_write(fp, uio, active_cred, flags, td)
int flags;
struct thread *td;
{
struct pwd *pwd;
struct vnode *devvp, *vp;
struct inode *ip;
struct buf *bp;
@ -3610,7 +3611,8 @@ buffered_write(fp, uio, active_cred, flags, td)
return (EINVAL);
fdp = td->td_proc->p_fd;
FILEDESC_SLOCK(fdp);
vp = fdp->fd_pwd->pwd_cdir;
pwd = FILEDESC_LOCKED_LOAD_PWD(fdp);
vp = pwd->pwd_cdir;
vref(vp);
FILEDESC_SUNLOCK(fdp);
vn_lock(vp, LK_SHARED | LK_RETRY);