fd: move vnodes out of filedesc into a dedicated structure

The new structure is copy-on-write. With the assumption that path lookups are
significantly more frequent than chdirs and chrooting this is a win.

This provides stable root and jail root vnodes without the need to reference
them on lookup, which in turn means less work on globally shared structures.
Note this also happens to fix a bug where jail vnode was never referenced,
meaning subsequent access on lookup could run into use-after-free.

Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D23884
This commit is contained in:
Mateusz Guzik 2020-03-01 21:53:46 +00:00
parent 8243063f9b
commit 8d03b99b9d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=358503
10 changed files with 320 additions and 229 deletions

View File

@ -459,6 +459,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
{ {
struct file file; struct file file;
struct filedesc filed; struct filedesc filed;
struct pwd pwd;
struct vm_map_entry vmentry; struct vm_map_entry vmentry;
struct vm_object object; struct vm_object object;
struct vmspace vmspace; struct vmspace vmspace;
@ -473,6 +474,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
int i, fflags; int i, fflags;
int prot, type; int prot, type;
unsigned int nfiles; unsigned int nfiles;
bool haspwd;
assert(procstat); assert(procstat);
kd = procstat->kd; kd = procstat->kd;
@ -485,6 +487,15 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
warnx("can't read filedesc at %p", (void *)kp->ki_fd); warnx("can't read filedesc at %p", (void *)kp->ki_fd);
return (NULL); return (NULL);
} }
haspwd = false;
if (filed.fd_pwd != NULL) {
if (!kvm_read_all(kd, (unsigned long)filed.fd_pwd, &pwd,
sizeof(pwd))) {
warnx("can't read fd_pwd at %p", (void *)filed.fd_pwd);
return (NULL);
}
haspwd = true;
}
/* /*
* Allocate list head. * Allocate list head.
@ -495,25 +506,27 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
STAILQ_INIT(head); STAILQ_INIT(head);
/* root directory vnode, if one. */ /* root directory vnode, if one. */
if (filed.fd_rdir) { if (haspwd) {
entry = filestat_new_entry(filed.fd_rdir, PS_FST_TYPE_VNODE, -1, if (pwd.pwd_rdir) {
PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL); entry = filestat_new_entry(pwd.pwd_rdir, PS_FST_TYPE_VNODE, -1,
if (entry != NULL) PS_FST_FFLAG_READ, PS_FST_UFLAG_RDIR, 0, 0, NULL, NULL);
STAILQ_INSERT_TAIL(head, entry, next); if (entry != NULL)
} STAILQ_INSERT_TAIL(head, entry, next);
/* current working directory vnode. */ }
if (filed.fd_cdir) { /* current working directory vnode. */
entry = filestat_new_entry(filed.fd_cdir, PS_FST_TYPE_VNODE, -1, if (pwd.pwd_cdir) {
PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL); entry = filestat_new_entry(pwd.pwd_cdir, PS_FST_TYPE_VNODE, -1,
if (entry != NULL) PS_FST_FFLAG_READ, PS_FST_UFLAG_CDIR, 0, 0, NULL, NULL);
STAILQ_INSERT_TAIL(head, entry, next); if (entry != NULL)
} STAILQ_INSERT_TAIL(head, entry, next);
/* jail root, if any. */ }
if (filed.fd_jdir) { /* jail root, if any. */
entry = filestat_new_entry(filed.fd_jdir, PS_FST_TYPE_VNODE, -1, if (pwd.pwd_jdir) {
PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL); entry = filestat_new_entry(pwd.pwd_jdir, PS_FST_TYPE_VNODE, -1,
if (entry != NULL) PS_FST_FFLAG_READ, PS_FST_UFLAG_JAIL, 0, 0, NULL, NULL);
STAILQ_INSERT_TAIL(head, entry, next); if (entry != NULL)
STAILQ_INSERT_TAIL(head, entry, next);
}
} }
/* ktrace vnode, if one */ /* ktrace vnode, if one */
if (kp->ki_tracep) { if (kp->ki_tracep) {

View File

@ -1028,23 +1028,16 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
static int static int
linprocfs_doproccwd(PFS_FILL_ARGS) linprocfs_doproccwd(PFS_FILL_ARGS)
{ {
struct filedesc *fdp; struct pwd *pwd;
struct vnode *vp;
char *fullpath = "unknown"; char *fullpath = "unknown";
char *freepath = NULL; char *freepath = NULL;
fdp = p->p_fd; pwd = pwd_hold(td);
FILEDESC_SLOCK(fdp); vn_fullpath(td, pwd->pwd_cdir, &fullpath, &freepath);
vp = fdp->fd_cdir;
if (vp != NULL)
VREF(vp);
FILEDESC_SUNLOCK(fdp);
vn_fullpath(td, vp, &fullpath, &freepath);
if (vp != NULL)
vrele(vp);
sbuf_printf(sb, "%s", fullpath); sbuf_printf(sb, "%s", fullpath);
if (freepath) if (freepath)
free(freepath, M_TEMP); free(freepath, M_TEMP);
pwd_drop(pwd);
return (0); return (0);
} }
@ -1054,23 +1047,18 @@ linprocfs_doproccwd(PFS_FILL_ARGS)
static int static int
linprocfs_doprocroot(PFS_FILL_ARGS) linprocfs_doprocroot(PFS_FILL_ARGS)
{ {
struct filedesc *fdp; struct pwd *pwd;
struct vnode *vp; struct vnode *vp;
char *fullpath = "unknown"; char *fullpath = "unknown";
char *freepath = NULL; char *freepath = NULL;
fdp = p->p_fd; pwd = pwd_hold(td);
FILEDESC_SLOCK(fdp); vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir;
vp = jailed(p->p_ucred) ? fdp->fd_jdir : fdp->fd_rdir;
if (vp != NULL)
VREF(vp);
FILEDESC_SUNLOCK(fdp);
vn_fullpath(td, vp, &fullpath, &freepath); vn_fullpath(td, vp, &fullpath, &freepath);
if (vp != NULL)
vrele(vp);
sbuf_printf(sb, "%s", fullpath); sbuf_printf(sb, "%s", fullpath);
if (freepath) if (freepath)
free(freepath, M_TEMP); free(freepath, M_TEMP);
pwd_drop(pwd);
return (0); return (0);
} }

View File

@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
#include <ddb/ddb.h> #include <ddb/ddb.h>
static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
static MALLOC_DEFINE(M_PWD, "pwd", "Descriptor table vnodes");
static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
"file desc to leader structures"); "file desc to leader structures");
static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
@ -116,6 +117,8 @@ static void filecaps_copy_finish(const struct filecaps *src,
static u_long *filecaps_free_prep(struct filecaps *fcaps); static u_long *filecaps_free_prep(struct filecaps *fcaps);
static void filecaps_free_finish(u_long *ioctls); static void filecaps_free_finish(u_long *ioctls);
static struct pwd *pwd_alloc(void);
/* /*
* Each process has: * Each process has:
* *
@ -314,24 +317,6 @@ fdfree(struct filedesc *fdp, int fd)
fdunused(fdp, fd); fdunused(fdp, fd);
} }
void
pwd_ensure_dirs(void)
{
struct filedesc *fdp;
fdp = curproc->p_fd;
FILEDESC_XLOCK(fdp);
if (fdp->fd_cdir == NULL) {
fdp->fd_cdir = rootvnode;
vrefact(rootvnode);
}
if (fdp->fd_rdir == NULL) {
fdp->fd_rdir = rootvnode;
vrefact(rootvnode);
}
FILEDESC_XUNLOCK(fdp);
}
/* /*
* System calls on descriptors. * System calls on descriptors.
*/ */
@ -2014,22 +1999,16 @@ fdinit(struct filedesc *fdp, bool prepfiles)
newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles; newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles;
newfdp->fd_files->fdt_nfiles = NDFILE; newfdp->fd_files->fdt_nfiles = NDFILE;
if (fdp == NULL) if (fdp == NULL) {
newfdp->fd_pwd = pwd_alloc();
return (newfdp); return (newfdp);
}
if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles) if (prepfiles && fdp->fd_lastfile >= newfdp->fd_nfiles)
fdgrowtable(newfdp, fdp->fd_lastfile + 1); fdgrowtable(newfdp, fdp->fd_lastfile + 1);
FILEDESC_SLOCK(fdp); FILEDESC_SLOCK(fdp);
newfdp->fd_cdir = fdp->fd_cdir; newfdp->fd_pwd = pwd_hold_filedesc(fdp);
if (newfdp->fd_cdir)
vrefact(newfdp->fd_cdir);
newfdp->fd_rdir = fdp->fd_rdir;
if (newfdp->fd_rdir)
vrefact(newfdp->fd_rdir);
newfdp->fd_jdir = fdp->fd_jdir;
if (newfdp->fd_jdir)
vrefact(newfdp->fd_jdir);
if (!prepfiles) { if (!prepfiles) {
FILEDESC_SUNLOCK(fdp); FILEDESC_SUNLOCK(fdp);
@ -2327,7 +2306,7 @@ fdescfree(struct thread *td)
{ {
struct proc *p; struct proc *p;
struct filedesc *fdp; struct filedesc *fdp;
struct vnode *cdir, *jdir, *rdir; struct pwd *pwd;
p = td->td_proc; p = td->td_proc;
fdp = p->p_fd; fdp = p->p_fd;
@ -2349,20 +2328,11 @@ fdescfree(struct thread *td)
return; return;
FILEDESC_XLOCK(fdp); FILEDESC_XLOCK(fdp);
cdir = fdp->fd_cdir; pwd = fdp->fd_pwd;
fdp->fd_cdir = NULL; pwd_set(fdp, NULL);
rdir = fdp->fd_rdir;
fdp->fd_rdir = NULL;
jdir = fdp->fd_jdir;
fdp->fd_jdir = NULL;
FILEDESC_XUNLOCK(fdp); FILEDESC_XUNLOCK(fdp);
if (cdir != NULL) pwd_drop(pwd);
vrele(cdir);
if (rdir != NULL)
vrele(rdir);
if (jdir != NULL)
vrele(jdir);
fdescfree_fds(td, fdp, 1); fdescfree_fds(td, fdp, 1);
} }
@ -2371,13 +2341,7 @@ void
fdescfree_remapped(struct filedesc *fdp) fdescfree_remapped(struct filedesc *fdp)
{ {
if (fdp->fd_cdir != NULL) pwd_drop(fdp->fd_pwd);
vrele(fdp->fd_cdir);
if (fdp->fd_rdir != NULL)
vrele(fdp->fd_rdir);
if (fdp->fd_jdir != NULL)
vrele(fdp->fd_jdir);
fdescfree_fds(curthread, fdp, 0); fdescfree_fds(curthread, fdp, 0);
} }
@ -3287,37 +3251,117 @@ chroot_refuse_vdir_fds(struct filedesc *fdp)
return (0); return (0);
} }
static void
pwd_fill(struct pwd *oldpwd, struct pwd *newpwd)
{
if (newpwd->pwd_cdir == NULL && oldpwd->pwd_cdir != NULL) {
vrefact(oldpwd->pwd_cdir);
newpwd->pwd_cdir = oldpwd->pwd_cdir;
}
if (newpwd->pwd_rdir == NULL && oldpwd->pwd_rdir != NULL) {
vrefact(oldpwd->pwd_rdir);
newpwd->pwd_rdir = oldpwd->pwd_rdir;
}
if (newpwd->pwd_jdir == NULL && oldpwd->pwd_jdir != NULL) {
vrefact(oldpwd->pwd_jdir);
newpwd->pwd_jdir = oldpwd->pwd_jdir;
}
}
struct pwd *
pwd_hold_filedesc(struct filedesc *fdp)
{
struct pwd *pwd;
FILEDESC_LOCK_ASSERT(fdp);
pwd = fdp->fd_pwd;
if (pwd != NULL)
refcount_acquire(&pwd->pwd_refcount);
return (pwd);
}
struct pwd *
pwd_hold(struct thread *td)
{
struct filedesc *fdp;
struct pwd *pwd;
fdp = td->td_proc->p_fd;
FILEDESC_SLOCK(fdp);
pwd = fdp->fd_pwd;
MPASS(pwd != NULL);
refcount_acquire(&pwd->pwd_refcount);
FILEDESC_SUNLOCK(fdp);
return (pwd);
}
static struct pwd *
pwd_alloc(void)
{
struct pwd *pwd;
pwd = malloc(sizeof(*pwd), M_PWD, M_WAITOK | M_ZERO);
refcount_init(&pwd->pwd_refcount, 1);
return (pwd);
}
void
pwd_drop(struct pwd *pwd)
{
if (!refcount_release(&pwd->pwd_refcount))
return;
if (pwd->pwd_cdir != NULL)
vrele(pwd->pwd_cdir);
if (pwd->pwd_rdir != NULL)
vrele(pwd->pwd_rdir);
if (pwd->pwd_jdir != NULL)
vrele(pwd->pwd_jdir);
free(pwd, M_PWD);
}
/* /*
* Common routine for kern_chroot() and jail_attach(). The caller is * Common routine for kern_chroot() and jail_attach(). The caller is
* responsible for invoking priv_check() and mac_vnode_check_chroot() to * responsible for invoking priv_check() and mac_vnode_check_chroot() to
* authorize this operation. * authorize this operation.
*/ */
int int
pwd_chroot(struct thread *td, struct vnode *vp) pwd_chroot(struct thread *td, struct vnode *vp)
{ {
struct filedesc *fdp; struct filedesc *fdp;
struct vnode *oldvp; struct pwd *newpwd, *oldpwd;
int error; int error;
fdp = td->td_proc->p_fd; fdp = td->td_proc->p_fd;
newpwd = pwd_alloc();
FILEDESC_XLOCK(fdp); FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
if (chroot_allow_open_directories == 0 || if (chroot_allow_open_directories == 0 ||
(chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { (chroot_allow_open_directories == 1 &&
oldpwd->pwd_rdir != rootvnode)) {
error = chroot_refuse_vdir_fds(fdp); error = chroot_refuse_vdir_fds(fdp);
if (error != 0) { if (error != 0) {
FILEDESC_XUNLOCK(fdp); FILEDESC_XUNLOCK(fdp);
pwd_drop(newpwd);
return (error); return (error);
} }
} }
oldvp = fdp->fd_rdir;
vrefact(vp); vrefact(vp);
fdp->fd_rdir = vp; newpwd->pwd_rdir = vp;
if (fdp->fd_jdir == NULL) { if (oldpwd->pwd_jdir == NULL) {
vrefact(vp); vrefact(vp);
fdp->fd_jdir = vp; newpwd->pwd_jdir = vp;
} }
pwd_fill(oldpwd, newpwd);
pwd_set(fdp, newpwd);
FILEDESC_XUNLOCK(fdp); FILEDESC_XUNLOCK(fdp);
vrele(oldvp); pwd_drop(oldpwd);
return (0); return (0);
} }
@ -3325,16 +3369,51 @@ void
pwd_chdir(struct thread *td, struct vnode *vp) pwd_chdir(struct thread *td, struct vnode *vp)
{ {
struct filedesc *fdp; struct filedesc *fdp;
struct vnode *oldvp; struct pwd *newpwd, *oldpwd;
VNPASS(vp->v_usecount > 0, vp);
newpwd = pwd_alloc();
fdp = td->td_proc->p_fd; fdp = td->td_proc->p_fd;
FILEDESC_XLOCK(fdp); FILEDESC_XLOCK(fdp);
VNASSERT(vp->v_usecount > 0, vp, oldpwd = fdp->fd_pwd;
("chdir to a vnode with zero usecount")); newpwd->pwd_cdir = vp;
oldvp = fdp->fd_cdir; pwd_fill(oldpwd, newpwd);
fdp->fd_cdir = vp; pwd_set(fdp, newpwd);
FILEDESC_XUNLOCK(fdp); FILEDESC_XUNLOCK(fdp);
vrele(oldvp); pwd_drop(oldpwd);
}
void
pwd_ensure_dirs(void)
{
struct filedesc *fdp;
struct pwd *oldpwd, *newpwd;
fdp = curproc->p_fd;
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) {
FILEDESC_XUNLOCK(fdp);
return;
}
FILEDESC_XUNLOCK(fdp);
newpwd = pwd_alloc();
FILEDESC_XLOCK(fdp);
oldpwd = fdp->fd_pwd;
pwd_fill(oldpwd, newpwd);
if (newpwd->pwd_cdir == NULL) {
vrefact(rootvnode);
newpwd->pwd_cdir = rootvnode;
}
if (newpwd->pwd_rdir == NULL) {
vrefact(rootvnode);
newpwd->pwd_rdir = rootvnode;
}
pwd_set(fdp, newpwd);
FILEDESC_XUNLOCK(fdp);
pwd_drop(oldpwd);
} }
/* /*
@ -3345,6 +3424,7 @@ void
mountcheckdirs(struct vnode *olddp, struct vnode *newdp) mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
{ {
struct filedesc *fdp; struct filedesc *fdp;
struct pwd *newpwd, *oldpwd;
struct prison *pr; struct prison *pr;
struct proc *p; struct proc *p;
int nrele; int nrele;
@ -3352,6 +3432,7 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
if (vrefcnt(olddp) == 1) if (vrefcnt(olddp) == 1)
return; return;
nrele = 0; nrele = 0;
newpwd = pwd_alloc();
sx_slock(&allproc_lock); sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) { FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p); PROC_LOCK(p);
@ -3360,25 +3441,36 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
if (fdp == NULL) if (fdp == NULL)
continue; continue;
FILEDESC_XLOCK(fdp); FILEDESC_XLOCK(fdp);
if (fdp->fd_cdir == olddp) { oldpwd = fdp->fd_pwd;
vrefact(newdp); if (oldpwd == NULL ||
fdp->fd_cdir = newdp; (oldpwd->pwd_cdir != olddp &&
nrele++; oldpwd->pwd_rdir != olddp &&
oldpwd->pwd_jdir != olddp)) {
FILEDESC_XUNLOCK(fdp);
fddrop(fdp);
continue;
} }
if (fdp->fd_rdir == olddp) { if (oldpwd->pwd_cdir == olddp) {
vrefact(newdp); vrefact(newdp);
fdp->fd_rdir = newdp; newpwd->pwd_cdir = newdp;
nrele++;
} }
if (fdp->fd_jdir == olddp) { if (oldpwd->pwd_rdir == olddp) {
vrefact(newdp); vrefact(newdp);
fdp->fd_jdir = newdp; newpwd->pwd_rdir = newdp;
nrele++;
} }
if (oldpwd->pwd_jdir == olddp) {
vrefact(newdp);
newpwd->pwd_jdir = newdp;
}
pwd_fill(oldpwd, newpwd);
pwd_set(fdp, newpwd);
FILEDESC_XUNLOCK(fdp); FILEDESC_XUNLOCK(fdp);
pwd_drop(oldpwd);
fddrop(fdp); fddrop(fdp);
newpwd = pwd_alloc();
} }
sx_sunlock(&allproc_lock); sx_sunlock(&allproc_lock);
pwd_drop(newpwd);
if (rootvnode == olddp) { if (rootvnode == olddp) {
vrefact(newdp); vrefact(newdp);
rootvnode = newdp; rootvnode = newdp;
@ -3714,6 +3806,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
struct filedesc *fdp; struct filedesc *fdp;
struct export_fd_buf *efbuf; struct export_fd_buf *efbuf;
struct vnode *cttyvp, *textvp, *tracevp; struct vnode *cttyvp, *textvp, *tracevp;
struct pwd *pwd;
int error, i; int error, i;
cap_rights_t rights; cap_rights_t rights;
@ -3754,20 +3847,24 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
goto fail; goto fail;
efbuf->fdp = fdp; efbuf->fdp = fdp;
FILEDESC_SLOCK(fdp); FILEDESC_SLOCK(fdp);
/* working directory */ pwd = pwd_hold_filedesc(fdp);
if (fdp->fd_cdir != NULL) { if (pwd != NULL) {
vrefact(fdp->fd_cdir); /* working directory */
export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf); if (pwd->pwd_cdir != NULL) {
} vrefact(pwd->pwd_cdir);
/* root directory */ export_vnode_to_sb(pwd->pwd_cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
if (fdp->fd_rdir != NULL) { }
vrefact(fdp->fd_rdir); /* root directory */
export_vnode_to_sb(fdp->fd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf); if (pwd->pwd_rdir != NULL) {
} vrefact(pwd->pwd_rdir);
/* jail directory */ export_vnode_to_sb(pwd->pwd_rdir, KF_FD_TYPE_ROOT, FREAD, efbuf);
if (fdp->fd_jdir != NULL) { }
vrefact(fdp->fd_jdir); /* jail directory */
export_vnode_to_sb(fdp->fd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); if (pwd->pwd_jdir != NULL) {
vrefact(pwd->pwd_jdir);
export_vnode_to_sb(pwd->pwd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf);
}
pwd_drop(pwd);
} }
for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
@ -3882,6 +3979,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
struct kinfo_ofile *okif; struct kinfo_ofile *okif;
struct kinfo_file *kif; struct kinfo_file *kif;
struct filedesc *fdp; struct filedesc *fdp;
struct pwd *pwd;
int error, i, *name; int error, i, *name;
struct file *fp; struct file *fp;
struct proc *p; struct proc *p;
@ -3897,15 +3995,19 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK); okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK);
FILEDESC_SLOCK(fdp); FILEDESC_SLOCK(fdp);
if (fdp->fd_cdir != NULL) pwd = pwd_hold_filedesc(fdp);
export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif, if (pwd != NULL) {
okif, fdp, req); if (pwd->pwd_cdir != NULL)
if (fdp->fd_rdir != NULL) export_vnode_for_osysctl(pwd->pwd_cdir, KF_FD_TYPE_CWD, kif,
export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif, okif, fdp, req);
okif, fdp, req); if (pwd->pwd_rdir != NULL)
if (fdp->fd_jdir != NULL) export_vnode_for_osysctl(pwd->pwd_rdir, KF_FD_TYPE_ROOT, kif,
export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, okif, fdp, req);
okif, fdp, req); if (pwd->pwd_jdir != NULL)
export_vnode_for_osysctl(pwd->pwd_jdir, KF_FD_TYPE_JAIL, kif,
okif, fdp, req);
pwd_drop(pwd);
}
for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) { for (i = 0; fdp->fd_refcnt > 0 && i <= fdp->fd_lastfile; i++) {
if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
continue; continue;
@ -3973,6 +4075,7 @@ kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
{ {
struct filedesc *fdp; struct filedesc *fdp;
struct export_fd_buf *efbuf; struct export_fd_buf *efbuf;
struct vnode *cdir;
int error; int error;
PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p, MA_OWNED);
@ -3988,12 +4091,12 @@ kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
efbuf->remainder = maxlen; efbuf->remainder = maxlen;
FILEDESC_SLOCK(fdp); FILEDESC_SLOCK(fdp);
if (fdp->fd_cdir == NULL) cdir = fdp->fd_pwd->pwd_cdir;
if (cdir == NULL) {
error = EINVAL; error = EINVAL;
else { } else {
vrefact(fdp->fd_cdir); vrefact(cdir);
error = export_vnode_to_sb(fdp->fd_cdir, KF_FD_TYPE_CWD, error = export_vnode_to_sb(cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
FREAD, efbuf);
} }
FILEDESC_SUNLOCK(fdp); FILEDESC_SUNLOCK(fdp);
fddrop(fdp); fddrop(fdp);

View File

@ -2085,14 +2085,14 @@ linker_load_module(const char *kldname, const char *modname,
KASSERT(verinfo == NULL, ("linker_load_module: verinfo" KASSERT(verinfo == NULL, ("linker_load_module: verinfo"
" is not NULL")); " is not NULL"));
/* check if root file system is not mounted */ /* check if root file system is not mounted */
if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL) if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL)
return (ENXIO); return (ENXIO);
pathname = linker_search_kld(kldname); pathname = linker_search_kld(kldname);
} else { } else {
if (modlist_lookup2(modname, verinfo) != NULL) if (modlist_lookup2(modname, verinfo) != NULL)
return (EEXIST); return (EEXIST);
/* check if root file system is not mounted */ /* check if root file system is not mounted */
if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL) if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL)
return (ENXIO); return (ENXIO);
if (kldname != NULL) if (kldname != NULL)
pathname = strdup(kldname, M_LINKER); pathname = strdup(kldname, M_LINKER);

View File

@ -2196,20 +2196,12 @@ sys___getcwd(struct thread *td, struct __getcwd_args *uap)
int int
vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen) vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen)
{ {
struct filedesc *fdp; struct pwd *pwd;
struct vnode *cdir, *rdir;
int error; int error;
fdp = td->td_proc->p_fd; pwd = pwd_hold(td);
FILEDESC_SLOCK(fdp); error = vn_fullpath_any(td, pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, buflen);
cdir = fdp->fd_cdir; pwd_drop(pwd);
vrefact(cdir);
rdir = fdp->fd_rdir;
vrefact(rdir);
FILEDESC_SUNLOCK(fdp);
error = vn_fullpath_any(td, cdir, rdir, buf, retbuf, buflen);
vrele(rdir);
vrele(cdir);
#ifdef KTRACE #ifdef KTRACE
if (KTRPOINT(curthread, KTR_NAMEI) && error == 0) if (KTRPOINT(curthread, KTR_NAMEI) && error == 0)
@ -2256,9 +2248,8 @@ sys___realpathat(struct thread *td, struct __realpathat_args *uap)
int int
vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
{ {
struct pwd *pwd;
char *buf; char *buf;
struct filedesc *fdp;
struct vnode *rdir;
size_t buflen; size_t buflen;
int error; int error;
@ -2267,13 +2258,9 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
buflen = MAXPATHLEN; buflen = MAXPATHLEN;
buf = malloc(buflen, M_TEMP, M_WAITOK); buf = malloc(buflen, M_TEMP, M_WAITOK);
fdp = td->td_proc->p_fd; pwd = pwd_hold(td);
FILEDESC_SLOCK(fdp); error = vn_fullpath_any(td, vn, pwd->pwd_rdir, buf, retbuf, &buflen);
rdir = fdp->fd_rdir; pwd_drop(pwd);
vrefact(rdir);
FILEDESC_SUNLOCK(fdp);
error = vn_fullpath_any(td, vn, rdir, buf, retbuf, &buflen);
vrele(rdir);
if (!error) if (!error)
*freebuf = buf; *freebuf = buf;
@ -2541,8 +2528,7 @@ vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf,
char **freebuf, size_t *buflen) char **freebuf, size_t *buflen)
{ {
char *buf, *tmpbuf; char *buf, *tmpbuf;
struct filedesc *fdp; struct pwd *pwd;
struct vnode *rdir;
struct componentname *cnp; struct componentname *cnp;
struct vnode *vp; struct vnode *vp;
size_t addend; size_t addend;
@ -2557,11 +2543,7 @@ vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf,
slash_prefixed = false; slash_prefixed = false;
buf = malloc(*buflen, M_TEMP, M_WAITOK); buf = malloc(*buflen, M_TEMP, M_WAITOK);
fdp = td->td_proc->p_fd; pwd = pwd_hold(td);
FILEDESC_SLOCK(fdp);
rdir = fdp->fd_rdir;
vrefact(rdir);
FILEDESC_SUNLOCK(fdp);
addend = 0; addend = 0;
vp = ndp->ni_vp; vp = ndp->ni_vp;
@ -2582,16 +2564,17 @@ vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf,
} }
vref(vp); vref(vp);
error = vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, slash_prefixed, addend); error = vn_fullpath_dir(td, vp, pwd->pwd_rdir, buf, retbuf, buflen,
slash_prefixed, addend);
if (error != 0) if (error != 0)
goto out_bad; goto out_bad;
vrele(rdir); pwd_drop(pwd);
*freebuf = buf; *freebuf = buf;
return (0); return (0);
out_bad: out_bad:
vrele(rdir); pwd_drop(pwd);
free(buf, M_TEMP); free(buf, M_TEMP);
return (error); return (error);
} }

View File

@ -303,7 +303,6 @@ namei_handle_root(struct nameidata *ndp, struct vnode **dpp, u_int n)
int int
namei(struct nameidata *ndp) namei(struct nameidata *ndp)
{ {
struct filedesc *fdp; /* pointer to file descriptor state */
char *cp; /* pointer into pathname argument */ char *cp; /* pointer into pathname argument */
struct vnode *dp; /* the directory we are searching */ struct vnode *dp; /* the directory we are searching */
struct iovec aiov; /* uio for reading symbolic links */ struct iovec aiov; /* uio for reading symbolic links */
@ -311,6 +310,7 @@ namei(struct nameidata *ndp)
struct file *dfp; struct file *dfp;
struct thread *td; struct thread *td;
struct proc *p; struct proc *p;
struct pwd *pwd;
cap_rights_t rights; cap_rights_t rights;
struct filecaps dirfd_caps; struct filecaps dirfd_caps;
struct uio auio; struct uio auio;
@ -327,7 +327,6 @@ namei(struct nameidata *ndp)
("namei: flags contaminated with nameiops")); ("namei: flags contaminated with nameiops"));
MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
ndp->ni_startdir->v_type == VBAD); ndp->ni_startdir->v_type == VBAD);
fdp = p->p_fd;
TAILQ_INIT(&ndp->ni_cap_tracker); TAILQ_INIT(&ndp->ni_cap_tracker);
ndp->ni_lcf = 0; ndp->ni_lcf = 0;
@ -395,13 +394,13 @@ namei(struct nameidata *ndp)
/* /*
* Get starting point for the translation. * Get starting point for the translation.
*/ */
FILEDESC_SLOCK(fdp); pwd = pwd_hold(td);
/* /*
* The reference on ni_rootdir is acquired in the block below to avoid * The reference on ni_rootdir is acquired in the block below to avoid
* back-to-back atomics for absolute lookups. * back-to-back atomics for absolute lookups.
*/ */
ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_rootdir = pwd->pwd_rdir;
ndp->ni_topdir = fdp->fd_jdir; ndp->ni_topdir = pwd->pwd_jdir;
startdir_used = 0; startdir_used = 0;
dp = NULL; dp = NULL;
@ -422,7 +421,7 @@ namei(struct nameidata *ndp)
dp = ndp->ni_startdir; dp = ndp->ni_startdir;
startdir_used = 1; startdir_used = 1;
} else if (ndp->ni_dirfd == AT_FDCWD) { } else if (ndp->ni_dirfd == AT_FDCWD) {
dp = fdp->fd_cdir; dp = pwd->pwd_cdir;
if (dp == ndp->ni_rootdir) { if (dp == ndp->ni_rootdir) {
vrefactn(dp, 2); vrefactn(dp, 2);
} else { } else {
@ -442,7 +441,7 @@ namei(struct nameidata *ndp)
* Effectively inlined fgetvp_rights, because we need to * Effectively inlined fgetvp_rights, because we need to
* inspect the file as well as grabbing the vnode. * inspect the file as well as grabbing the vnode.
*/ */
error = fget_cap_locked(fdp, ndp->ni_dirfd, &rights, error = fget_cap(td, ndp->ni_dirfd, &rights,
&dfp, &ndp->ni_filecaps); &dfp, &ndp->ni_filecaps);
if (error != 0) { if (error != 0) {
/* /*
@ -450,16 +449,19 @@ namei(struct nameidata *ndp)
* or capability-related, both of which can be * or capability-related, both of which can be
* safely returned to the caller. * safely returned to the caller.
*/ */
} else if (dfp->f_ops == &badfileops) {
error = EBADF;
} else if (dfp->f_vnode == NULL) {
error = ENOTDIR;
} else { } else {
dp = dfp->f_vnode; if (dfp->f_ops == &badfileops) {
vrefact(dp); error = EBADF;
} else if (dfp->f_vnode == NULL) {
error = ENOTDIR;
} else {
dp = dfp->f_vnode;
vrefact(dp);
if ((dfp->f_flag & FSEARCH) != 0) if ((dfp->f_flag & FSEARCH) != 0)
cnp->cn_flags |= NOEXECCHECK; cnp->cn_flags |= NOEXECCHECK;
}
fdrop(dfp, td);
} }
#ifdef CAPABILITIES #ifdef CAPABILITIES
/* /*
@ -481,7 +483,7 @@ namei(struct nameidata *ndp)
} }
if (error == 0 && (cnp->cn_flags & BENEATH) != 0) { if (error == 0 && (cnp->cn_flags & BENEATH) != 0) {
if (ndp->ni_dirfd == AT_FDCWD) { if (ndp->ni_dirfd == AT_FDCWD) {
ndp->ni_beneath_latch = fdp->fd_cdir; ndp->ni_beneath_latch = pwd->pwd_cdir;
vrefact(ndp->ni_beneath_latch); vrefact(ndp->ni_beneath_latch);
} else { } else {
rights = ndp->ni_rightsneeded; rights = ndp->ni_rightsneeded;
@ -496,7 +498,6 @@ namei(struct nameidata *ndp)
if (error == 0) if (error == 0)
ndp->ni_lcf |= NI_LCF_LATCH; ndp->ni_lcf |= NI_LCF_LATCH;
} }
FILEDESC_SUNLOCK(fdp);
/* /*
* If we are auditing the kernel pathname, save the user pathname. * If we are auditing the kernel pathname, save the user pathname.
*/ */
@ -542,6 +543,7 @@ namei(struct nameidata *ndp)
nameicap_cleanup(ndp, true); nameicap_cleanup(ndp, true);
SDT_PROBE2(vfs, namei, lookup, return, error, SDT_PROBE2(vfs, namei, lookup, return, error,
(error == 0 ? ndp->ni_vp : NULL)); (error == 0 ? ndp->ni_vp : NULL));
pwd_drop(pwd);
return (error); return (error);
} }
if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
@ -617,6 +619,7 @@ namei(struct nameidata *ndp)
namei_cleanup_cnp(cnp); namei_cleanup_cnp(cnp);
nameicap_cleanup(ndp, true); nameicap_cleanup(ndp, true);
SDT_PROBE2(vfs, namei, lookup, return, error, NULL); SDT_PROBE2(vfs, namei, lookup, return, error, NULL);
pwd_drop(pwd);
return (error); return (error);
} }

View File

@ -237,27 +237,13 @@ root_mounted(void)
static void static void
set_rootvnode(void) set_rootvnode(void)
{ {
struct proc *p;
if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode)) if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
panic("set_rootvnode: Cannot find root vnode"); panic("set_rootvnode: Cannot find root vnode");
VOP_UNLOCK(rootvnode); VOP_UNLOCK(rootvnode);
p = curthread->td_proc; pwd_ensure_dirs();
FILEDESC_XLOCK(p->p_fd);
if (p->p_fd->fd_cdir != NULL)
vrele(p->p_fd->fd_cdir);
p->p_fd->fd_cdir = rootvnode;
VREF(rootvnode);
if (p->p_fd->fd_rdir != NULL)
vrele(p->p_fd->fd_rdir);
p->p_fd->fd_rdir = rootvnode;
VREF(rootvnode);
FILEDESC_XUNLOCK(p->p_fd);
} }
static int static int

View File

@ -493,38 +493,35 @@ void
audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
{ {
struct vnode *cdir, *rdir; struct vnode *cdir, *rdir;
struct filedesc *fdp; struct pwd *pwd;
cap_rights_t rights; cap_rights_t rights;
int error; int error;
bool vrele_cdir;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d", WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d",
__func__, __FILE__, __LINE__); __func__, __FILE__, __LINE__);
rdir = cdir = NULL; pwd = pwd_hold(td);
fdp = td->td_proc->p_fd; rdir = pwd->pwd_rdir;
FILEDESC_SLOCK(fdp); cdir = NULL;
if (*path == '/') { vrele_cdir = false;
rdir = fdp->fd_rdir; if (*path != '/') {
vrefact(rdir);
} else {
if (dirfd == AT_FDCWD) { if (dirfd == AT_FDCWD) {
cdir = fdp->fd_cdir; cdir = pwd->pwd_cdir;
vrefact(cdir);
} else { } else {
error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir); error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir);
if (error != 0) { if (error != 0) {
FILEDESC_SUNLOCK(fdp);
cpath[0] = '\0'; cpath[0] = '\0';
pwd_drop(pwd);
return; return;
} }
vrele_cdir = true;
} }
} }
FILEDESC_SUNLOCK(fdp);
audit_canon_path_vp(td, rdir, cdir, path, cpath); audit_canon_path_vp(td, rdir, cdir, path, cpath);
if (rdir != NULL) pwd_drop(pwd);
vrele(rdir); if (vrele_cdir)
if (cdir != NULL)
vrele(cdir); vrele(cdir);
} }

View File

@ -76,11 +76,16 @@ struct fdescenttbl {
*/ */
#define NDSLOTTYPE u_long #define NDSLOTTYPE u_long
struct pwd {
volatile u_int pwd_refcount;
struct vnode *pwd_cdir; /* current directory */
struct vnode *pwd_rdir; /* root directory */
struct vnode *pwd_jdir; /* jail root directory */
};
struct filedesc { struct filedesc {
struct fdescenttbl *fd_files; /* open files table */ struct fdescenttbl *fd_files; /* open files table */
struct vnode *fd_cdir; /* current directory */ struct pwd *fd_pwd; /* directories */
struct vnode *fd_rdir; /* root directory */
struct vnode *fd_jdir; /* jail root directory */
NDSLOTTYPE *fd_map; /* bitmap of free fds */ NDSLOTTYPE *fd_map; /* bitmap of free fds */
int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_lastfile; /* high-water mark of fd_ofiles */
int fd_freefile; /* approx. next free file */ int fd_freefile; /* approx. next free file */
@ -253,6 +258,17 @@ void pwd_chdir(struct thread *td, struct vnode *vp);
int pwd_chroot(struct thread *td, struct vnode *vp); int pwd_chroot(struct thread *td, struct vnode *vp);
void pwd_ensure_dirs(void); void pwd_ensure_dirs(void);
struct pwd *pwd_hold_filedesc(struct filedesc *fdp);
struct pwd *pwd_hold(struct thread *td);
void pwd_drop(struct pwd *pwd);
static inline void
pwd_set(struct filedesc *fdp, struct pwd *newpwd)
{
FILEDESC_XLOCK_ASSERT(fdp);
fdp->fd_pwd = newpwd;
}
#endif /* _KERNEL */ #endif /* _KERNEL */
#endif /* !_SYS_FILEDESC_H_ */ #endif /* !_SYS_FILEDESC_H_ */

View File

@ -3190,6 +3190,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
struct inode *ip, *dp; struct inode *ip, *dp;
struct mount *mp; struct mount *mp;
struct fs *fs; struct fs *fs;
struct pwd *pwd;
ufs2_daddr_t blkno; ufs2_daddr_t blkno;
long blkcnt, blksize; long blkcnt, blksize;
u_long key; u_long key;
@ -3448,11 +3449,11 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
/* /*
* Now we get and lock the child directory containing "..". * Now we get and lock the child directory containing "..".
*/ */
FILEDESC_SLOCK(td->td_proc->p_fd); pwd = pwd_hold(td);
dvp = td->td_proc->p_fd->fd_cdir; dvp = pwd->pwd_cdir;
FILEDESC_SUNLOCK(td->td_proc->p_fd);
if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) { if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
vput(fdvp); vput(fdvp);
pwd_drop(pwd);
break; break;
} }
dp = VTOI(dvp); dp = VTOI(dvp);
@ -3463,6 +3464,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
cache_purge(dvp); cache_purge(dvp);
vput(dvp); vput(dvp);
vput(fdvp); vput(fdvp);
pwd_drop(pwd);
break; break;
case FFS_UNLINK: case FFS_UNLINK:
@ -3607,7 +3609,7 @@ buffered_write(fp, uio, active_cred, flags, td)
return (EINVAL); return (EINVAL);
fdp = td->td_proc->p_fd; fdp = td->td_proc->p_fd;
FILEDESC_SLOCK(fdp); FILEDESC_SLOCK(fdp);
vp = fdp->fd_cdir; vp = fdp->fd_pwd->pwd_cdir;
vref(vp); vref(vp);
FILEDESC_SUNLOCK(fdp); FILEDESC_SUNLOCK(fdp);
vn_lock(vp, LK_SHARED | LK_RETRY); vn_lock(vp, LK_SHARED | LK_RETRY);