From 8d03b99b9dafe92896f405c79f846667637c0194 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 1 Mar 2020 21:53:46 +0000 Subject: [PATCH] fd: move vnodes out of filedesc into a dedicated structure The new structure is copy-on-write. With the assumption that path lookups are significantly more frequent than chdirs and chrooting this is a win. This provides stable root and jail root vnodes without the need to reference them on lookup, which in turn means less work on globally shared structures. Note this also happens to fix a bug where jail vnode was never referenced, meaning subsequent access on lookup could run into use-after-free. Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D23884 --- lib/libprocstat/libprocstat.c | 51 +++-- sys/compat/linprocfs/linprocfs.c | 28 +-- sys/kern/kern_descrip.c | 309 ++++++++++++++++++---------- sys/kern/kern_linker.c | 4 +- sys/kern/vfs_cache.c | 45 ++-- sys/kern/vfs_lookup.c | 37 ++-- sys/kern/vfs_mountroot.c | 16 +- sys/security/audit/audit_bsm_klib.c | 27 ++- sys/sys/filedesc.h | 22 +- sys/ufs/ffs/ffs_alloc.c | 10 +- 10 files changed, 320 insertions(+), 229 deletions(-) diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 26bc71d31ede..de3b674be7c8 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -459,6 +459,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap { struct file file; struct filedesc filed; + struct pwd pwd; struct vm_map_entry vmentry; struct vm_object object; struct vmspace vmspace; @@ -473,6 +474,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap int i, fflags; int prot, type; unsigned int nfiles; + bool haspwd; assert(procstat); kd = procstat->kd; @@ -485,6 +487,15 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap warnx("can't read filedesc at %p", (void *)kp->ki_fd); return (NULL); } + haspwd = false; + if (filed.fd_pwd != NULL) { + if (!kvm_read_all(kd, (unsigned long)filed.fd_pwd, &pwd, + sizeof(pwd))) { + warnx("can't read fd_pwd at %p", (void *)filed.fd_pwd); + return (NULL); + } + haspwd = true; + } /* * Allocate list head. @@ -495,25 +506,27 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap STAILQ_INIT(head); /* root directory vnode, if one. */ - if (filed.fd_rdir) { - entry = filestat_new_entry(filed.fd_rdir, PS_FST_TYPE_VNODE, -1, - PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL); - if (entry != NULL) - STAILQ_INSERT_TAIL(head, entry, next); - } - /* current working directory vnode. */ - if (filed.fd_cdir) { - entry = filestat_new_entry(filed.fd_cdir, PS_FST_TYPE_VNODE, -1, - PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL); - if (entry != NULL) - STAILQ_INSERT_TAIL(head, entry, next); - } - /* jail root, if any. */ - if (filed.fd_jdir) { - entry = filestat_new_entry(filed.fd_jdir, PS_FST_TYPE_VNODE, -1, - PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL); - if (entry != NULL) - STAILQ_INSERT_TAIL(head, entry, next); + if (haspwd) { + if (pwd.pwd_rdir) { + entry = filestat_new_entry(pwd.pwd_rdir, PS_FST_TYPE_VNODE, -1, + PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL); + if (entry != NULL) + STAILQ_INSERT_TAIL(head, entry, next); + } + /* current working directory vnode. */ + if (pwd.pwd_cdir) { + entry = filestat_new_entry(pwd.pwd_cdir, PS_FST_TYPE_VNODE, -1, + PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL); + if (entry != NULL) + STAILQ_INSERT_TAIL(head, entry, next); + } + /* jail root, if any. */ + if (pwd.pwd_jdir) { + entry = filestat_new_entry(pwd.pwd_jdir, PS_FST_TYPE_VNODE, -1, + PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL); + if (entry != NULL) + STAILQ_INSERT_TAIL(head, entry, next); + } } /* ktrace vnode, if one */ if (kp->ki_tracep) { diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index caa57f160a2a..c441193a4991 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -1028,23 +1028,16 @@ linprocfs_doprocstatus(PFS_FILL_ARGS) static int linprocfs_doproccwd(PFS_FILL_ARGS) { - struct filedesc *fdp; - struct vnode *vp; + struct pwd *pwd; char *fullpath = "unknown"; char *freepath = NULL; - fdp = p->p_fd; - FILEDESC_SLOCK(fdp); - vp = fdp->fd_cdir; - if (vp != NULL) - VREF(vp); - FILEDESC_SUNLOCK(fdp); - vn_fullpath(td, vp, &fullpath, &freepath); - if (vp != NULL) - vrele(vp); + pwd = pwd_hold(td); + vn_fullpath(td, pwd->pwd_cdir, &fullpath, &freepath); sbuf_printf(sb, "%s", fullpath); if (freepath) free(freepath, M_TEMP); + pwd_drop(pwd); return (0); } @@ -1054,23 +1047,18 @@ linprocfs_doproccwd(PFS_FILL_ARGS) static int linprocfs_doprocroot(PFS_FILL_ARGS) { - struct filedesc *fdp; + struct pwd *pwd; struct vnode *vp; char *fullpath = "unknown"; char *freepath = NULL; - fdp = p->p_fd; - FILEDESC_SLOCK(fdp); - vp = jailed(p->p_ucred) ? fdp->fd_jdir : fdp->fd_rdir; - if (vp != NULL) - VREF(vp); - FILEDESC_SUNLOCK(fdp); + pwd = pwd_hold(td); + vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir; vn_fullpath(td, vp, &fullpath, &freepath); - if (vp != NULL) - vrele(vp); sbuf_printf(sb, "%s", fullpath); if (freepath) free(freepath, M_TEMP); + pwd_drop(pwd); return (0); } diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index ec07a70e3fa3..a40e1961fcc8 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$"); #include static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); +static MALLOC_DEFINE(M_PWD, "pwd", "Descriptor table vnodes"); static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", "file desc to leader structures"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); @@ -116,6 +117,8 @@ static void filecaps_copy_finish(const struct filecaps *src, static u_long *filecaps_free_prep(struct filecaps *fcaps); static void filecaps_free_finish(u_long *ioctls); +static struct pwd *pwd_alloc(void); + /* * Each process has: * @@ -314,24 +317,6 @@ fdfree(struct filedesc *fdp, int fd) fdunused(fdp, fd); } -void -pwd_ensure_dirs(void) -{ - struct filedesc *fdp; - - fdp = curproc->p_fd; - FILEDESC_XLOCK(fdp); - if (fdp->fd_cdir == NULL) { - fdp->fd_cdir = rootvnode; - vrefact(rootvnode); - } - if (fdp->fd_rdir == NULL) { - fdp->fd_rdir = rootvnode; - vrefact(rootvnode); - } - FILEDESC_XUNLOCK(fdp); -} - /* * System calls on descriptors. */ @@ -2014,22 +1999,16 @@ fdinit(struct filedesc *fdp, bool prepfiles) newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles; newfdp->fd_files->fdt_nfiles = NDFILE; - if (fdp == NULL) + if (fdp == NULL) { + newfdp->fd_pwd = pwd_alloc(); return (newfdp); + } if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles) fdgrowtable(newfdp, fdp->fd_lastfile + 1); FILEDESC_SLOCK(fdp); - newfdp->fd_cdir = fdp->fd_cdir; - if (newfdp->fd_cdir) - vrefact(newfdp->fd_cdir); - newfdp->fd_rdir = fdp->fd_rdir; - if (newfdp->fd_rdir) - vrefact(newfdp->fd_rdir); - newfdp->fd_jdir = fdp->fd_jdir; - if (newfdp->fd_jdir) - vrefact(newfdp->fd_jdir); + newfdp->fd_pwd = pwd_hold_filedesc(fdp); if (!prepfiles) { FILEDESC_SUNLOCK(fdp); @@ -2327,7 +2306,7 @@ fdescfree(struct thread *td) { struct proc *p; struct filedesc *fdp; - struct vnode *cdir, *jdir, *rdir; + struct pwd *pwd; p = td->td_proc; fdp = p->p_fd; @@ -2349,20 +2328,11 @@ fdescfree(struct thread *td) return; FILEDESC_XLOCK(fdp); - cdir = fdp->fd_cdir; - fdp->fd_cdir = NULL; - rdir = fdp->fd_rdir; - fdp->fd_rdir = NULL; - jdir = fdp->fd_jdir; - fdp->fd_jdir = NULL; + pwd = fdp->fd_pwd; + pwd_set(fdp, NULL); FILEDESC_XUNLOCK(fdp); - if (cdir != NULL) - vrele(cdir); - if (rdir != NULL) - vrele(rdir); - if (jdir != NULL) - vrele(jdir); + pwd_drop(pwd); fdescfree_fds(td, fdp, 1); } @@ -2371,13 +2341,7 @@ void fdescfree_remapped(struct filedesc *fdp) { - if (fdp->fd_cdir != NULL) - vrele(fdp->fd_cdir); - if (fdp->fd_rdir != NULL) - vrele(fdp->fd_rdir); - if (fdp->fd_jdir != NULL) - vrele(fdp->fd_jdir); - + pwd_drop(fdp->fd_pwd); fdescfree_fds(curthread, fdp, 0); } @@ -3287,37 +3251,117 @@ chroot_refuse_vdir_fds(struct filedesc *fdp) return (0); } +static void +pwd_fill(struct pwd *oldpwd, struct pwd *newpwd) +{ + + if (newpwd->pwd_cdir == NULL && oldpwd->pwd_cdir != NULL) { + vrefact(oldpwd->pwd_cdir); + newpwd->pwd_cdir = oldpwd->pwd_cdir; + } + + if (newpwd->pwd_rdir == NULL && oldpwd->pwd_rdir != NULL) { + vrefact(oldpwd->pwd_rdir); + newpwd->pwd_rdir = oldpwd->pwd_rdir; + } + + if (newpwd->pwd_jdir == NULL && oldpwd->pwd_jdir != NULL) { + vrefact(oldpwd->pwd_jdir); + newpwd->pwd_jdir = oldpwd->pwd_jdir; + } +} + +struct pwd * +pwd_hold_filedesc(struct filedesc *fdp) +{ + struct pwd *pwd; + + FILEDESC_LOCK_ASSERT(fdp); + pwd = fdp->fd_pwd; + if (pwd != NULL) + refcount_acquire(&pwd->pwd_refcount); + return (pwd); +} + +struct pwd * +pwd_hold(struct thread *td) +{ + struct filedesc *fdp; + struct pwd *pwd; + + fdp = td->td_proc->p_fd; + + FILEDESC_SLOCK(fdp); + pwd = fdp->fd_pwd; + MPASS(pwd != NULL); + refcount_acquire(&pwd->pwd_refcount); + FILEDESC_SUNLOCK(fdp); + return (pwd); +} + +static struct pwd * +pwd_alloc(void) +{ + struct pwd *pwd; + + pwd = malloc(sizeof(*pwd), M_PWD, M_WAITOK | M_ZERO); + refcount_init(&pwd->pwd_refcount, 1); + return (pwd); +} + +void +pwd_drop(struct pwd *pwd) +{ + + if (!refcount_release(&pwd->pwd_refcount)) + return; + + if (pwd->pwd_cdir != NULL) + vrele(pwd->pwd_cdir); + if (pwd->pwd_rdir != NULL) + vrele(pwd->pwd_rdir); + if (pwd->pwd_jdir != NULL) + vrele(pwd->pwd_jdir); + free(pwd, M_PWD); +} + /* - * Common routine for kern_chroot() and jail_attach(). The caller is - * responsible for invoking priv_check() and mac_vnode_check_chroot() to - * authorize this operation. - */ +* Common routine for kern_chroot() and jail_attach(). The caller is +* responsible for invoking priv_check() and mac_vnode_check_chroot() to +* authorize this operation. +*/ int pwd_chroot(struct thread *td, struct vnode *vp) { struct filedesc *fdp; - struct vnode *oldvp; + struct pwd *newpwd, *oldpwd; int error; fdp = td->td_proc->p_fd; + newpwd = pwd_alloc(); FILEDESC_XLOCK(fdp); + oldpwd = fdp->fd_pwd; if (chroot_allow_open_directories == 0 || - (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { + (chroot_allow_open_directories == 1 && + oldpwd->pwd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); if (error != 0) { FILEDESC_XUNLOCK(fdp); + pwd_drop(newpwd); return (error); } } - oldvp = fdp->fd_rdir; + vrefact(vp); - fdp->fd_rdir = vp; - if (fdp->fd_jdir == NULL) { + newpwd->pwd_rdir = vp; + if (oldpwd->pwd_jdir == NULL) { vrefact(vp); - fdp->fd_jdir = vp; + newpwd->pwd_jdir = vp; } + pwd_fill(oldpwd, newpwd); + pwd_set(fdp, newpwd); FILEDESC_XUNLOCK(fdp); - vrele(oldvp); + pwd_drop(oldpwd); return (0); } @@ -3325,16 +3369,51 @@ void pwd_chdir(struct thread *td, struct vnode *vp) { struct filedesc *fdp; - struct vnode *oldvp; + struct pwd *newpwd, *oldpwd; + VNPASS(vp->v_usecount > 0, vp); + + newpwd = pwd_alloc(); fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); - VNASSERT(vp->v_usecount > 0, vp, - ("chdir to a vnode with zero usecount")); - oldvp = fdp->fd_cdir; - fdp->fd_cdir = vp; + oldpwd = fdp->fd_pwd; + newpwd->pwd_cdir = vp; + pwd_fill(oldpwd, newpwd); + pwd_set(fdp, newpwd); FILEDESC_XUNLOCK(fdp); - vrele(oldvp); + pwd_drop(oldpwd); +} + +void +pwd_ensure_dirs(void) +{ + struct filedesc *fdp; + struct pwd *oldpwd, *newpwd; + + fdp = curproc->p_fd; + FILEDESC_XLOCK(fdp); + oldpwd = fdp->fd_pwd; + if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) { + FILEDESC_XUNLOCK(fdp); + return; + } + FILEDESC_XUNLOCK(fdp); + + newpwd = pwd_alloc(); + FILEDESC_XLOCK(fdp); + oldpwd = fdp->fd_pwd; + pwd_fill(oldpwd, newpwd); + if (newpwd->pwd_cdir == NULL) { + vrefact(rootvnode); + newpwd->pwd_cdir = rootvnode; + } + if (newpwd->pwd_rdir == NULL) { + vrefact(rootvnode); + newpwd->pwd_rdir = rootvnode; + } + pwd_set(fdp, newpwd); + FILEDESC_XUNLOCK(fdp); + pwd_drop(oldpwd); } /* @@ -3345,6 +3424,7 @@ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp) { struct filedesc *fdp; + struct pwd *newpwd, *oldpwd; struct prison *pr; struct proc *p; int nrele; @@ -3352,6 +3432,7 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp) if (vrefcnt(olddp) == 1) return; nrele = 0; + newpwd = pwd_alloc(); sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); @@ -3360,25 +3441,36 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp) if (fdp == NULL) continue; FILEDESC_XLOCK(fdp); - if (fdp->fd_cdir == olddp) { - vrefact(newdp); - fdp->fd_cdir = newdp; - nrele++; + oldpwd = fdp->fd_pwd; + if (oldpwd == NULL || + (oldpwd->pwd_cdir != olddp && + oldpwd->pwd_rdir != olddp && + oldpwd->pwd_jdir != olddp)) { + FILEDESC_XUNLOCK(fdp); + fddrop(fdp); + continue; } - if (fdp->fd_rdir == olddp) { + if (oldpwd->pwd_cdir == olddp) { vrefact(newdp); - fdp->fd_rdir = newdp; - nrele++; + newpwd->pwd_cdir = newdp; } - if (fdp->fd_jdir == olddp) { + if (oldpwd->pwd_rdir == olddp) { vrefact(newdp); - fdp->fd_jdir = newdp; - nrele++; + newpwd->pwd_rdir = newdp; } + if (oldpwd->pwd_jdir == olddp) { + vrefact(newdp); + newpwd->pwd_jdir = newdp; + } + pwd_fill(oldpwd, newpwd); + pwd_set(fdp, newpwd); FILEDESC_XUNLOCK(fdp); + pwd_drop(oldpwd); fddrop(fdp); + newpwd = pwd_alloc(); } sx_sunlock(&allproc_lock); + pwd_drop(newpwd); if (rootvnode == olddp) { vrefact(newdp); rootvnode = newdp; @@ -3714,6 +3806,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, struct filedesc *fdp; struct export_fd_buf *efbuf; struct vnode *cttyvp, *textvp, *tracevp; + struct pwd *pwd; int error, i; cap_rights_t rights; @@ -3754,20 +3847,24 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, goto fail; efbuf->fdp = fdp; FILEDESC_SLOCK(fdp); - /* working directory */ - if (fdp->fd_cdir != NULL) { - vrefact(fdp->fd_cdir); - export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); - } - /* root directory */ - if (fdp->fd_rdir != NULL) { - vrefact(fdp->fd_rdir); - export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf); - } - /* jail directory */ - if (fdp->fd_jdir != NULL) { - vrefact(fdp->fd_jdir); - export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); + pwd = pwd_hold_filedesc(fdp); + if (pwd != NULL) { + /* working directory */ + if (pwd->pwd_cdir != NULL) { + vrefact(pwd->pwd_cdir); + export_vnode_to_sb(pwd->pwd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); + } + /* root directory */ + if (pwd->pwd_rdir != NULL) { + vrefact(pwd->pwd_rdir); + export_vnode_to_sb(pwd->pwd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf); + } + /* jail directory */ + if (pwd->pwd_jdir != NULL) { + vrefact(pwd->pwd_jdir); + export_vnode_to_sb(pwd->pwd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); + } + pwd_drop(pwd); } for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) @@ -3882,6 +3979,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) struct kinfo_ofile *okif; struct kinfo_file *kif; struct filedesc *fdp; + struct pwd *pwd; int error, i, *name; struct file *fp; struct proc *p; @@ -3897,15 +3995,19 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK); FILEDESC_SLOCK(fdp); - if (fdp->fd_cdir != NULL) - export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, - okif, fdp, req); - if (fdp->fd_rdir != NULL) - export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, - okif, fdp, req); - if (fdp->fd_jdir != NULL) - export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, - okif, fdp, req); + pwd = pwd_hold_filedesc(fdp); + if (pwd != NULL) { + if (pwd->pwd_cdir != NULL) + export_vnode_for_osysctl(pwd->pwd_cdir, KF_FD_TYPE_CWD, kif, + okif, fdp, req); + if (pwd->pwd_rdir != NULL) + export_vnode_for_osysctl(pwd->pwd_rdir, KF_FD_TYPE_ROOT, kif, + okif, fdp, req); + if (pwd->pwd_jdir != NULL) + export_vnode_for_osysctl(pwd->pwd_jdir, KF_FD_TYPE_JAIL, kif, + okif, fdp, req); + pwd_drop(pwd); + } for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; @@ -3973,6 +4075,7 @@ kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) { struct filedesc *fdp; struct export_fd_buf *efbuf; + struct vnode *cdir; int error; PROC_LOCK_ASSERT(p, MA_OWNED); @@ -3988,12 +4091,12 @@ kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) efbuf->remainder = maxlen; FILEDESC_SLOCK(fdp); - if (fdp->fd_cdir == NULL) + cdir = fdp->fd_pwd->pwd_cdir; + if (cdir == NULL) { error = EINVAL; - else { - vrefact(fdp->fd_cdir); - error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, - FREAD, efbuf); + } else { + vrefact(cdir); + error = export_vnode_to_sb(cdir, KF_FD_TYPE_CWD, FREAD, efbuf); } FILEDESC_SUNLOCK(fdp); fddrop(fdp); diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index 0ae605a9a129..593540ef3003 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -2085,14 +2085,14 @@ linker_load_module(const char *kldname, const char *modname, KASSERT(verinfo == NULL, ("linker_load_module: verinfo" " is not NULL")); /* check if root file system is not mounted */ - if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL) + if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL) return (ENXIO); pathname = linker_search_kld(kldname); } else { if (modlist_lookup2(modname, verinfo) != NULL) return (EEXIST); /* check if root file system is not mounted */ - if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL) + if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL) return (ENXIO); if (kldname != NULL) pathname = strdup(kldname, M_LINKER); diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 31d95698d179..f581dedcde7a 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -2196,20 +2196,12 @@ sys___getcwd(struct thread *td, struct __getcwd_args *uap) int vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen) { - struct filedesc *fdp; - struct vnode *cdir, *rdir; + struct pwd *pwd; int error; - fdp = td->td_proc->p_fd; - FILEDESC_SLOCK(fdp); - cdir = fdp->fd_cdir; - vrefact(cdir); - rdir = fdp->fd_rdir; - vrefact(rdir); - FILEDESC_SUNLOCK(fdp); - error = vn_fullpath_any(td, cdir, rdir, buf, retbuf, buflen); - vrele(rdir); - vrele(cdir); + pwd = pwd_hold(td); + error = vn_fullpath_any(td, pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, buflen); + pwd_drop(pwd); #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI) && error == 0) @@ -2256,9 +2248,8 @@ sys___realpathat(struct thread *td, struct __realpathat_args *uap) int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { + struct pwd *pwd; char *buf; - struct filedesc *fdp; - struct vnode *rdir; size_t buflen; int error; @@ -2267,13 +2258,9 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) buflen = MAXPATHLEN; buf = malloc(buflen, M_TEMP, M_WAITOK); - fdp = td->td_proc->p_fd; - FILEDESC_SLOCK(fdp); - rdir = fdp->fd_rdir; - vrefact(rdir); - FILEDESC_SUNLOCK(fdp); - error = vn_fullpath_any(td, vn, rdir, buf, retbuf, &buflen); - vrele(rdir); + pwd = pwd_hold(td); + error = vn_fullpath_any(td, vn, pwd->pwd_rdir, buf, retbuf, &buflen); + pwd_drop(pwd); if (!error) *freebuf = buf; @@ -2541,8 +2528,7 @@ vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf, char **freebuf, size_t *buflen) { char *buf, *tmpbuf; - struct filedesc *fdp; - struct vnode *rdir; + struct pwd *pwd; struct componentname *cnp; struct vnode *vp; size_t addend; @@ -2557,11 +2543,7 @@ vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf, slash_prefixed = false; buf = malloc(*buflen, M_TEMP, M_WAITOK); - fdp = td->td_proc->p_fd; - FILEDESC_SLOCK(fdp); - rdir = fdp->fd_rdir; - vrefact(rdir); - FILEDESC_SUNLOCK(fdp); + pwd = pwd_hold(td); addend = 0; vp = ndp->ni_vp; @@ -2582,16 +2564,17 @@ vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf, } vref(vp); - error = vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, slash_prefixed, addend); + error = vn_fullpath_dir(td, vp, pwd->pwd_rdir, buf, retbuf, buflen, + slash_prefixed, addend); if (error != 0) goto out_bad; - vrele(rdir); + pwd_drop(pwd); *freebuf = buf; return (0); out_bad: - vrele(rdir); + pwd_drop(pwd); free(buf, M_TEMP); return (error); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 58a97a1337f0..bdfdaa3d0013 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -303,7 +303,6 @@ namei_handle_root(struct nameidata *ndp, struct vnode **dpp, u_int n) int namei(struct nameidata *ndp) { - struct filedesc *fdp; /* pointer to file descriptor state */ char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ @@ -311,6 +310,7 @@ namei(struct nameidata *ndp) struct file *dfp; struct thread *td; struct proc *p; + struct pwd *pwd; cap_rights_t rights; struct filecaps dirfd_caps; struct uio auio; @@ -327,7 +327,6 @@ namei(struct nameidata *ndp) ("namei: flags contaminated with nameiops")); MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || ndp->ni_startdir->v_type == VBAD); - fdp = p->p_fd; TAILQ_INIT(&ndp->ni_cap_tracker); ndp->ni_lcf = 0; @@ -395,13 +394,13 @@ namei(struct nameidata *ndp) /* * Get starting point for the translation. */ - FILEDESC_SLOCK(fdp); + pwd = pwd_hold(td); /* * The reference on ni_rootdir is acquired in the block below to avoid * back-to-back atomics for absolute lookups. */ - ndp->ni_rootdir = fdp->fd_rdir; - ndp->ni_topdir = fdp->fd_jdir; + ndp->ni_rootdir = pwd->pwd_rdir; + ndp->ni_topdir = pwd->pwd_jdir; startdir_used = 0; dp = NULL; @@ -422,7 +421,7 @@ namei(struct nameidata *ndp) dp = ndp->ni_startdir; startdir_used = 1; } else if (ndp->ni_dirfd == AT_FDCWD) { - dp = fdp->fd_cdir; + dp = pwd->pwd_cdir; if (dp == ndp->ni_rootdir) { vrefactn(dp, 2); } else { @@ -442,7 +441,7 @@ namei(struct nameidata *ndp) * Effectively inlined fgetvp_rights, because we need to * inspect the file as well as grabbing the vnode. */ - error = fget_cap_locked(fdp, ndp->ni_dirfd, &rights, + error = fget_cap(td, ndp->ni_dirfd, &rights, &dfp, &ndp->ni_filecaps); if (error != 0) { /* @@ -450,16 +449,19 @@ namei(struct nameidata *ndp) * or capability-related, both of which can be * safely returned to the caller. */ - } else if (dfp->f_ops == &badfileops) { - error = EBADF; - } else if (dfp->f_vnode == NULL) { - error = ENOTDIR; } else { - dp = dfp->f_vnode; - vrefact(dp); + if (dfp->f_ops == &badfileops) { + error = EBADF; + } else if (dfp->f_vnode == NULL) { + error = ENOTDIR; + } else { + dp = dfp->f_vnode; + vrefact(dp); - if ((dfp->f_flag & FSEARCH) != 0) - cnp->cn_flags |= NOEXECCHECK; + if ((dfp->f_flag & FSEARCH) != 0) + cnp->cn_flags |= NOEXECCHECK; + } + fdrop(dfp, td); } #ifdef CAPABILITIES /* @@ -481,7 +483,7 @@ namei(struct nameidata *ndp) } if (error == 0 && (cnp->cn_flags & BENEATH) != 0) { if (ndp->ni_dirfd == AT_FDCWD) { - ndp->ni_beneath_latch = fdp->fd_cdir; + ndp->ni_beneath_latch = pwd->pwd_cdir; vrefact(ndp->ni_beneath_latch); } else { rights = ndp->ni_rightsneeded; @@ -496,7 +498,6 @@ namei(struct nameidata *ndp) if (error == 0) ndp->ni_lcf |= NI_LCF_LATCH; } - FILEDESC_SUNLOCK(fdp); /* * If we are auditing the kernel pathname, save the user pathname. */ @@ -542,6 +543,7 @@ namei(struct nameidata *ndp) nameicap_cleanup(ndp, true); SDT_PROBE2(vfs, namei, lookup, return, error, (error == 0 ? ndp->ni_vp : NULL)); + pwd_drop(pwd); return (error); } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { @@ -617,6 +619,7 @@ namei(struct nameidata *ndp) namei_cleanup_cnp(cnp); nameicap_cleanup(ndp, true); SDT_PROBE2(vfs, namei, lookup, return, error, NULL); + pwd_drop(pwd); return (error); } diff --git a/sys/kern/vfs_mountroot.c b/sys/kern/vfs_mountroot.c index 481b8de3d0fc..5a3f11133116 100644 --- a/sys/kern/vfs_mountroot.c +++ b/sys/kern/vfs_mountroot.c @@ -237,27 +237,13 @@ root_mounted(void) static void set_rootvnode(void) { - struct proc *p; if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode)) panic("set_rootvnode: Cannot find root vnode"); VOP_UNLOCK(rootvnode); - p = curthread->td_proc; - FILEDESC_XLOCK(p->p_fd); - - if (p->p_fd->fd_cdir != NULL) - vrele(p->p_fd->fd_cdir); - p->p_fd->fd_cdir = rootvnode; - VREF(rootvnode); - - if (p->p_fd->fd_rdir != NULL) - vrele(p->p_fd->fd_rdir); - p->p_fd->fd_rdir = rootvnode; - VREF(rootvnode); - - FILEDESC_XUNLOCK(p->p_fd); + pwd_ensure_dirs(); } static int diff --git a/sys/security/audit/audit_bsm_klib.c b/sys/security/audit/audit_bsm_klib.c index b10722a4e0e4..64b7a344a603 100644 --- a/sys/security/audit/audit_bsm_klib.c +++ b/sys/security/audit/audit_bsm_klib.c @@ -493,38 +493,35 @@ void audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) { struct vnode *cdir, *rdir; - struct filedesc *fdp; + struct pwd *pwd; cap_rights_t rights; int error; + bool vrele_cdir; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d", __func__, __FILE__, __LINE__); - rdir = cdir = NULL; - fdp = td->td_proc->p_fd; - FILEDESC_SLOCK(fdp); - if (*path == '/') { - rdir = fdp->fd_rdir; - vrefact(rdir); - } else { + pwd = pwd_hold(td); + rdir = pwd->pwd_rdir; + cdir = NULL; + vrele_cdir = false; + if (*path != '/') { if (dirfd == AT_FDCWD) { - cdir = fdp->fd_cdir; - vrefact(cdir); + cdir = pwd->pwd_cdir; } else { error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir); if (error != 0) { - FILEDESC_SUNLOCK(fdp); cpath[0] = '\0'; + pwd_drop(pwd); return; } + vrele_cdir = true; } } - FILEDESC_SUNLOCK(fdp); audit_canon_path_vp(td, rdir, cdir, path, cpath); - if (rdir != NULL) - vrele(rdir); - if (cdir != NULL) + pwd_drop(pwd); + if (vrele_cdir) vrele(cdir); } diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 3aa165bf8d7c..116649757de4 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -76,11 +76,16 @@ struct fdescenttbl { */ #define NDSLOTTYPE u_long +struct pwd { + volatile u_int pwd_refcount; + struct vnode *pwd_cdir; /* current directory */ + struct vnode *pwd_rdir; /* root directory */ + struct vnode *pwd_jdir; /* jail root directory */ +}; + struct filedesc { struct fdescenttbl *fd_files; /* open files table */ - struct vnode *fd_cdir; /* current directory */ - struct vnode *fd_rdir; /* root directory */ - struct vnode *fd_jdir; /* jail root directory */ + struct pwd *fd_pwd; /* directories */ NDSLOTTYPE *fd_map; /* bitmap of free fds */ int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ @@ -253,6 +258,17 @@ void pwd_chdir(struct thread *td, struct vnode *vp); int pwd_chroot(struct thread *td, struct vnode *vp); void pwd_ensure_dirs(void); +struct pwd *pwd_hold_filedesc(struct filedesc *fdp); +struct pwd *pwd_hold(struct thread *td); +void pwd_drop(struct pwd *pwd); +static inline void +pwd_set(struct filedesc *fdp, struct pwd *newpwd) +{ + + FILEDESC_XLOCK_ASSERT(fdp); + fdp->fd_pwd = newpwd; +} + #endif /* _KERNEL */ #endif /* !_SYS_FILEDESC_H_ */ diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index ea441404f477..fb270b11d912 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -3190,6 +3190,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) struct inode *ip, *dp; struct mount *mp; struct fs *fs; + struct pwd *pwd; ufs2_daddr_t blkno; long blkcnt, blksize; u_long key; @@ -3448,11 +3449,11 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) /* * Now we get and lock the child directory containing "..". */ - FILEDESC_SLOCK(td->td_proc->p_fd); - dvp = td->td_proc->p_fd->fd_cdir; - FILEDESC_SUNLOCK(td->td_proc->p_fd); + pwd = pwd_hold(td); + dvp = pwd->pwd_cdir; if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) { vput(fdvp); + pwd_drop(pwd); break; } dp = VTOI(dvp); @@ -3463,6 +3464,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) cache_purge(dvp); vput(dvp); vput(fdvp); + pwd_drop(pwd); break; case FFS_UNLINK: @@ -3607,7 +3609,7 @@ buffered_write(fp, uio, active_cred, flags, td) return (EINVAL); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); - vp = fdp->fd_cdir; + vp = fdp->fd_pwd->pwd_cdir; vref(vp); FILEDESC_SUNLOCK(fdp); vn_lock(vp, LK_SHARED | LK_RETRY);