From 1762f674ccb571e6b03c009906dd1af3c6343f9b Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 15 May 2021 02:22:55 +0300 Subject: [PATCH] ktrace: pack all ktrace parameters into allocated structure ktr_io_params Ref-count the ktr_io_params structure instead of vnode/cred. Reviewed by: markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D30257 --- sys/kern/kern_descrip.c | 6 +- sys/kern/kern_exec.c | 16 +-- sys/kern/kern_ktrace.c | 234 +++++++++++++++++++++++++--------------- sys/kern/kern_proc.c | 5 +- sys/kern/kern_thread.c | 12 +-- sys/sys/ktrace.h | 6 +- sys/sys/proc.h | 3 +- 7 files changed, 170 insertions(+), 112 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 168bddda9c45..09eb0770bcda 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -79,9 +79,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#ifdef KTRACE #include -#endif #include @@ -4363,9 +4361,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, PROC_LOCK_ASSERT(p, MA_OWNED); /* ktrace vnode */ - tracevp = p->p_tracevp; - if (tracevp != NULL) - vrefact(tracevp); + tracevp = ktr_get_tracevp(p, true); /* text vnode */ textvp = p->p_textvp; if (textvp != NULL) diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 04dedfa59c9b..d9b9e1ae122b 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -374,8 +374,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE - struct vnode *tracevp = NULL; - struct ucred *tracecred = NULL; + struct ktr_io_params *kiop; #endif struct vnode *oldtextvp = NULL, *newtextvp; int credential_changing; @@ -391,6 +390,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; + kiop = NULL; /* * Lock the process and set the P_INEXEC flag to indicate that @@ -780,11 +780,10 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, * we do not regain any tracing during a possible block. */ setsugid(p); + kiop = NULL; #ifdef KTRACE - if (p->p_tracecred != NULL && - priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED)) - ktrprocexec(p, &tracecred, &tracevp); + kiop = ktrprocexec(p); #endif /* * Close any file descriptors 0..2 that reference procfs, @@ -939,12 +938,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, */ if (oldtextvp != NULL) vrele(oldtextvp); -#ifdef KTRACE - if (tracevp != NULL) - vrele(tracevp); - if (tracecred != NULL) - crfree(tracecred); -#endif + ktr_io_params_free(kiop); pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index 9916022961e9..c03eea4991a1 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -145,20 +145,27 @@ static int print_message = 1; static struct mtx ktrace_mtx; static struct sx ktrace_sx; +struct ktr_io_params { + struct vnode *vp; + struct ucred *cr; + u_int refs; +}; + static void ktrace_init(void *dummy); static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); static struct ktr_request *ktr_getrequest(int type); static void ktr_submitrequest(struct thread *td, struct ktr_request *req); -static void ktr_freeproc(struct proc *p, struct ucred **uc, - struct vnode **vp); +static struct ktr_io_params *ktr_freeproc(struct proc *p); static void ktr_freerequest(struct ktr_request *req); static void ktr_freerequest_locked(struct ktr_request *req); static void ktr_writerequest(struct thread *td, struct ktr_request *req); static int ktrcanset(struct thread *,struct proc *); -static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); -static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); +static int ktrsetchildren(struct thread *, struct proc *, int, int, + struct ktr_io_params *); +static int ktrops(struct thread *, struct proc *, int, int, + struct ktr_io_params *); static void ktrprocctor_entered(struct thread *, struct proc *); /* @@ -421,28 +428,85 @@ ktr_freerequest_locked(struct ktr_request *req) STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); } +static void +ktr_io_params_ref(struct ktr_io_params *kiop) +{ + mtx_assert(&ktrace_mtx, MA_OWNED); + kiop->refs++; +} + +static struct ktr_io_params * +ktr_io_params_rele(struct ktr_io_params *kiop) +{ + mtx_assert(&ktrace_mtx, MA_OWNED); + if (kiop == NULL) + return (NULL); + KASSERT(kiop->refs > 0, ("kiop ref == 0 %p", kiop)); + return (--(kiop->refs) == 0 ? kiop : NULL); +} + +void +ktr_io_params_free(struct ktr_io_params *kiop) +{ + if (kiop == NULL) + return; + + MPASS(kiop->refs == 0); + vn_close(kiop->vp, FWRITE, kiop->cr, curthread); + crfree(kiop->cr); + free(kiop, M_KTRACE); +} + +static struct ktr_io_params * +ktr_io_params_alloc(struct thread *td, struct vnode *vp) +{ + struct ktr_io_params *res; + + res = malloc(sizeof(struct ktr_io_params), M_KTRACE, M_WAITOK); + res->vp = vp; + res->cr = crhold(td->td_ucred); + res->refs = 1; + return (res); +} + /* * Disable tracing for a process and release all associated resources. * The caller is responsible for releasing a reference on the returned * vnode and credentials. */ -static void -ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) +static struct ktr_io_params * +ktr_freeproc(struct proc *p) { + struct ktr_io_params *kiop; struct ktr_request *req; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&ktrace_mtx, MA_OWNED); - *uc = p->p_tracecred; - p->p_tracecred = NULL; - if (vp != NULL) - *vp = p->p_tracevp; - p->p_tracevp = NULL; + kiop = ktr_io_params_rele(p->p_ktrioparms); + p->p_ktrioparms = NULL; p->p_traceflag = 0; while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); ktr_freerequest_locked(req); } + return (kiop); +} + +struct vnode * +ktr_get_tracevp(struct proc *p, bool ref) +{ + struct vnode *vp; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + if (p->p_ktrioparms != NULL) { + vp = p->p_ktrioparms->vp; + if (ref) + vrefact(vp); + } else { + vp = NULL; + } + return (vp); } void @@ -501,14 +565,21 @@ ktrsysret(int code, int error, register_t retval) * * XXX: We toss any pending asynchronous records. */ -void -ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) +struct ktr_io_params * +ktrprocexec(struct proc *p) { + struct ktr_io_params *kiop; PROC_LOCK_ASSERT(p, MA_OWNED); + + kiop = p->p_ktrioparms; + if (kiop == NULL || priv_check_cred(kiop->cr, PRIV_DEBUG_DIFFCRED)) + return (NULL); + mtx_lock(&ktrace_mtx); - ktr_freeproc(p, uc, vp); + kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); + return (kiop); } /* @@ -520,8 +591,7 @@ ktrprocexit(struct thread *td) { struct ktr_request *req; struct proc *p; - struct ucred *cred; - struct vnode *vp; + struct ktr_io_params *kiop; p = td->td_proc; if (p->p_traceflag == 0) @@ -536,13 +606,10 @@ ktrprocexit(struct thread *td) sx_xunlock(&ktrace_sx); PROC_LOCK(p); mtx_lock(&ktrace_mtx); - ktr_freeproc(p, &cred, &vp); + kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); - if (vp != NULL) - vrele(vp); - if (cred != NULL) - crfree(cred); + ktr_io_params_free(kiop); ktrace_exit(td); } @@ -583,7 +650,7 @@ void ktrprocfork(struct proc *p1, struct proc *p2) { - MPASS(p2->p_tracevp == NULL); + MPASS(p2->p_ktrioparms == NULL); MPASS(p2->p_traceflag == 0); if (p1->p_traceflag == 0) @@ -593,12 +660,8 @@ ktrprocfork(struct proc *p1, struct proc *p2) mtx_lock(&ktrace_mtx); if (p1->p_traceflag & KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; - if ((p2->p_tracevp = p1->p_tracevp) != NULL) { - VREF(p2->p_tracevp); - KASSERT(p1->p_tracecred != NULL, - ("ktrace vnode with no cred")); - p2->p_tracecred = crhold(p1->p_tracecred); - } + if ((p2->p_ktrioparms = p1->p_ktrioparms) != NULL) + p1->p_ktrioparms->refs++; } mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p1); @@ -932,7 +995,7 @@ sys_ktrace(struct thread *td, struct ktrace_args *uap) int nfound, ret = 0; int flags, error = 0; struct nameidata nd; - struct ucred *cred; + struct ktr_io_params *kiop, *old_kiop; /* * Need something to (un)trace. @@ -940,6 +1003,7 @@ sys_ktrace(struct thread *td, struct ktrace_args *uap) if (ops != KTROP_CLEARFILE && facs == 0) return (EINVAL); + kiop = NULL; ktrace_enter(td); if (ops != KTROP_CLEAR) { /* @@ -960,34 +1024,34 @@ sys_ktrace(struct thread *td, struct ktrace_args *uap) ktrace_exit(td); return (EACCES); } + kiop = ktr_io_params_alloc(td, vp); } /* * Clear all uses of the tracefile. */ if (ops == KTROP_CLEARFILE) { - int vrele_count; - - vrele_count = 0; +restart: sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { + old_kiop = NULL; PROC_LOCK(p); - if (p->p_tracevp == vp) { + if (p->p_ktrioparms != NULL && + p->p_ktrioparms->vp == vp) { if (ktrcanset(td, p)) { mtx_lock(&ktrace_mtx); - ktr_freeproc(p, &cred, NULL); + old_kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); - vrele_count++; - crfree(cred); } else error = EPERM; } PROC_UNLOCK(p); + if (old_kiop != NULL) { + sx_sunlock(&allproc_lock); + ktr_io_params_free(old_kiop); + goto restart; + } } sx_sunlock(&allproc_lock); - if (vrele_count > 0) { - while (vrele_count-- > 0) - vrele(vp); - } goto done; } /* @@ -1019,9 +1083,9 @@ sys_ktrace(struct thread *td, struct ktrace_args *uap) } nfound++; if (descend) - ret |= ktrsetchildren(td, p, ops, facs, vp); + ret |= ktrsetchildren(td, p, ops, facs, kiop); else - ret |= ktrops(td, p, ops, facs, vp); + ret |= ktrops(td, p, ops, facs, kiop); } if (nfound == 0) { sx_sunlock(&proctree_lock); @@ -1044,16 +1108,20 @@ sys_ktrace(struct thread *td, struct ktrace_args *uap) goto done; } if (descend) - ret |= ktrsetchildren(td, p, ops, facs, vp); + ret |= ktrsetchildren(td, p, ops, facs, kiop); else - ret |= ktrops(td, p, ops, facs, vp); + ret |= ktrops(td, p, ops, facs, kiop); } sx_sunlock(&proctree_lock); if (!ret) error = EPERM; done: - if (vp != NULL) - (void) vn_close(vp, FWRITE, td->td_ucred, td); + if (kiop != NULL) { + mtx_lock(&ktrace_mtx); + kiop = ktr_io_params_rele(kiop); + mtx_unlock(&ktrace_mtx); + ktr_io_params_free(kiop); + } ktrace_exit(td); return (error); #else /* !KTRACE */ @@ -1097,10 +1165,10 @@ sys_utrace(struct thread *td, struct utrace_args *uap) #ifdef KTRACE static int -ktrops(struct thread *td, struct proc *p, int ops, int facs, struct vnode *vp) +ktrops(struct thread *td, struct proc *p, int ops, int facs, + struct ktr_io_params *new_kiop) { - struct vnode *tracevp = NULL; - struct ucred *tracecred = NULL; + struct ktr_io_params *old_kiop; PROC_LOCK_ASSERT(p, MA_OWNED); if (!ktrcanset(td, p)) { @@ -1112,19 +1180,18 @@ ktrops(struct thread *td, struct proc *p, int ops, int facs, struct vnode *vp) PROC_UNLOCK(p); return (1); } + old_kiop = NULL; mtx_lock(&ktrace_mtx); if (ops == KTROP_SET) { - if (p->p_tracevp != vp) { - /* - * if trace file already in use, relinquish below - */ - tracevp = p->p_tracevp; - VREF(vp); - p->p_tracevp = vp; + if (p->p_ktrioparms != NULL && + p->p_ktrioparms->vp != new_kiop->vp) { + /* if trace file already in use, relinquish below */ + old_kiop = ktr_io_params_rele(p->p_ktrioparms); + p->p_ktrioparms = NULL; } - if (p->p_tracecred != td->td_ucred) { - tracecred = p->p_tracecred; - p->p_tracecred = crhold(td->td_ucred); + if (p->p_ktrioparms == NULL) { + p->p_ktrioparms = new_kiop; + ktr_io_params_ref(new_kiop); } p->p_traceflag |= facs; if (priv_check(td, PRIV_KTRACE) == 0) @@ -1133,23 +1200,20 @@ ktrops(struct thread *td, struct proc *p, int ops, int facs, struct vnode *vp) /* KTROP_CLEAR */ if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) /* no more tracing */ - ktr_freeproc(p, &tracecred, &tracevp); + old_kiop = ktr_freeproc(p); } mtx_unlock(&ktrace_mtx); if ((p->p_traceflag & KTRFAC_MASK) != 0) ktrprocctor_entered(td, p); PROC_UNLOCK(p); - if (tracevp != NULL) - vrele(tracevp); - if (tracecred != NULL) - crfree(tracecred); + ktr_io_params_free(old_kiop); return (1); } static int ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, - struct vnode *vp) + struct ktr_io_params *new_kiop) { struct proc *p; int ret = 0; @@ -1158,7 +1222,7 @@ ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, PROC_LOCK_ASSERT(p, MA_OWNED); sx_assert(&proctree_lock, SX_LOCKED); for (;;) { - ret |= ktrops(td, p, ops, facs, vp); + ret |= ktrops(td, p, ops, facs, new_kiop); /* * If this process has children, descend to them next, * otherwise do any siblings, and if done with this level, @@ -1183,6 +1247,7 @@ ktrsetchildren(struct thread *td, struct proc *top, int ops, int facs, static void ktr_writerequest(struct thread *td, struct ktr_request *req) { + struct ktr_io_params *kiop; struct ktr_header *kth; struct vnode *vp; struct proc *p; @@ -1193,6 +1258,8 @@ ktr_writerequest(struct thread *td, struct ktr_request *req) int datalen, buflen; int error; + p = td->td_proc; + /* * We hold the vnode and credential for use in I/O in case ktrace is * disabled on the process as we write out the request. @@ -1201,20 +1268,22 @@ ktr_writerequest(struct thread *td, struct ktr_request *req) * the vnode has been closed. */ mtx_lock(&ktrace_mtx); - vp = td->td_proc->p_tracevp; - cred = td->td_proc->p_tracecred; + + kiop = p->p_ktrioparms; /* - * If vp is NULL, the vp has been cleared out from under this - * request, so just drop it. Make sure the credential and vnode are - * in sync: we should have both or neither. + * If kiop is NULL, it has been cleared out from under this + * request, so just drop it. */ - if (vp == NULL) { - KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); + if (kiop == NULL) { mtx_unlock(&ktrace_mtx); return; } - VREF(vp); + + vp = kiop->vp; + cred = kiop->cr; + + vrefact(vp); KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); crhold(cred); mtx_unlock(&ktrace_mtx); @@ -1258,7 +1327,7 @@ ktr_writerequest(struct thread *td, struct ktr_request *req) VOP_UNLOCK(vp); vn_finished_write(mp); crfree(cred); - if (!error) { + if (error == 0) { vrele(vp); return; } @@ -1268,24 +1337,17 @@ ktr_writerequest(struct thread *td, struct ktr_request *req) * process. Other processes might still be suitable for * writes to this vnode. */ - p = td->td_proc; log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped for pid %d\n", error, p->p_pid); - cred = NULL; - sx_slock(&allproc_lock); + PROC_LOCK(p); mtx_lock(&ktrace_mtx); - if (p->p_tracevp == vp) - ktr_freeproc(p, &cred, NULL); + if (p->p_ktrioparms != NULL && p->p_ktrioparms->vp == vp) + kiop = ktr_freeproc(p); mtx_unlock(&ktrace_mtx); PROC_UNLOCK(p); - if (cred != NULL) { - crfree(cred); - cred = NULL; - } - sx_sunlock(&allproc_lock); - vrele(vp); + ktr_io_params_free(kiop); vrele(vp); } diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 33f168836370..ec732e8db060 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -75,6 +75,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef KTRACE +#include +#endif #ifdef DDB #include @@ -1058,7 +1061,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) kp->ki_args = p->p_args; kp->ki_textvp = p->p_textvp; #ifdef KTRACE - kp->ki_tracep = p->p_tracevp; + kp->ki_tracep = ktr_get_tracevp(p, false); kp->ki_traceflag = p->p_traceflag; #endif kp->ki_fd = p->p_fd; diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index d5549baa6ad5..a52d6e75fd03 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -96,11 +96,11 @@ _Static_assert(offsetof(struct proc, p_flag) == 0xb8, "struct proc KBI p_flag"); _Static_assert(offsetof(struct proc, p_pid) == 0xc4, "struct proc KBI p_pid"); -_Static_assert(offsetof(struct proc, p_filemon) == 0x3c0, +_Static_assert(offsetof(struct proc, p_filemon) == 0x3b8, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x3d8, +_Static_assert(offsetof(struct proc, p_comm) == 0x3d0, "struct proc KBI p_comm"); -_Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8, +_Static_assert(offsetof(struct proc, p_emuldata) == 0x4b0, "struct proc KBI p_emuldata"); #endif #ifdef __i386__ @@ -116,11 +116,11 @@ _Static_assert(offsetof(struct proc, p_flag) == 0x6c, "struct proc KBI p_flag"); _Static_assert(offsetof(struct proc, p_pid) == 0x78, "struct proc KBI p_pid"); -_Static_assert(offsetof(struct proc, p_filemon) == 0x26c, +_Static_assert(offsetof(struct proc, p_filemon) == 0x268, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x280, +_Static_assert(offsetof(struct proc, p_comm) == 0x27c, "struct proc KBI p_comm"); -_Static_assert(offsetof(struct proc, p_emuldata) == 0x30c, +_Static_assert(offsetof(struct proc, p_emuldata) == 0x308, "struct proc KBI p_emuldata"); #endif diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index a0b02f7d3ac5..c4ab985722c0 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -265,6 +265,10 @@ struct ktr_struct_array { #define KTRFAC_DROP 0x20000000 /* last event was dropped */ #ifdef _KERNEL +struct ktr_io_params; + +struct vnode *ktr_get_tracevp(struct proc *, bool); +void ktr_io_params_free(struct ktr_io_params *); void ktrnamei(char *); void ktrcsw(int, int, const char *); void ktrpsig(int, sig_t, sigset_t *, int); @@ -275,7 +279,7 @@ void ktrsyscall(int, int narg, register_t args[]); void ktrsysctl(int *name, u_int namelen); void ktrsysret(int, int, register_t); void ktrprocctor(struct proc *); -void ktrprocexec(struct proc *, struct ucred **, struct vnode **); +struct ktr_io_params *ktrprocexec(struct proc *); void ktrprocexit(struct thread *); void ktrprocfork(struct proc *, struct proc *); void ktruserret(struct thread *); diff --git a/sys/sys/proc.h b/sys/sys/proc.h index d957412424aa..c54d4399f948 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -662,8 +662,7 @@ struct proc { int p_profthreads; /* (c) Num threads in addupc_task. */ volatile int p_exitthreads; /* (j) Number of threads exiting */ int p_traceflag; /* (o) Kernel trace points. */ - struct vnode *p_tracevp; /* (c + o) Trace to vnode. */ - struct ucred *p_tracecred; /* (o) Credentials to trace with. */ + struct ktr_io_params *p_ktrioparms; /* (c + o) Params for ktrace. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ u_int p_lock; /* (c) Proclock (prevent swap) count. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */