From 3d3e6793f6f502c1220f6384dc79e1e0595ba0cb Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 21 Nov 2018 18:56:15 +0000 Subject: [PATCH] proc: implement pid hash locks and an iterator forks, exits and waits are frequently stalled during poudriere -j 128 runs due to killpg and process list exports performed for each package. Both uses take the allproc lock. The latter case can be modified to iterate over the hash with finer grained locking instead. Reviewed by: kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D17817 --- sys/kern/kern_exit.c | 4 +- sys/kern/kern_fork.c | 2 + sys/kern/kern_proc.c | 246 ++++++++++++++++++++++++------------------- sys/sys/proc.h | 4 + 4 files changed, 147 insertions(+), 109 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index a50c088127d1..6b601a7c19ab 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -435,7 +435,6 @@ exit1(struct thread *td, int rval, int signo) sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); - LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); /* @@ -876,6 +875,9 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); /* off zombproc */ sx_xunlock(&allproc_lock); + sx_xlock(PIDHASHLOCK(p->p_pid)); + LIST_REMOVE(p, p_hash); + sx_xunlock(PIDHASHLOCK(p->p_pid)); LIST_REMOVE(p, p_sibling); reaper_abandon_children(p, true); LIST_REMOVE(p, p_reapsibling); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index cea57b6c34a6..a155246e6cc0 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -406,7 +406,9 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread * AUDIT_ARG_PID(p2->p_pid); LIST_INSERT_HEAD(&allproc, p2, p_list); allproc_gen++; + sx_xlock(PIDHASHLOCK(p2->p_pid)); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); + sx_xunlock(PIDHASHLOCK(p2->p_pid)); PROC_LOCK(p2); PROC_LOCK(p1); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 24083b33fca1..c34f73368bfd 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -118,7 +118,9 @@ static struct proc *zpfind_locked(pid_t pid); * Other process lists */ struct pidhashhead *pidhashtbl; +struct sx *pidhashtbl_lock; u_long pidhash; +u_long pidhashlock; struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; @@ -173,6 +175,7 @@ CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE); void procinit(void) { + u_long i; sx_init(&allproc_lock, "allproc"); sx_init(&proctree_lock, "proctree"); @@ -180,6 +183,13 @@ procinit(void) LIST_INIT(&allproc); LIST_INIT(&zombproc); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); + pidhashlock = (pidhash + 1) / 64; + if (pidhashlock > 0) + pidhashlock--; + pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1), + M_PROC, M_WAITOK | M_ZERO); + for (i = 0; i < pidhashlock + 1; i++) + sx_init(&pidhashtbl_lock[i], "pidhash"); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); proc_zone = uma_zcreate("PROC", sched_sizeof_proc(), proc_ctor, proc_dtor, proc_init, proc_fini, @@ -306,7 +316,7 @@ pfind_locked(pid_t pid) LIST_FOREACH(p, PIDHASH(pid), p_hash) { if (p->p_pid == pid) { PROC_LOCK(p); - if (p->p_state == PRS_NEW) { + if (p->p_state == PRS_NEW || p->p_state == PRS_ZOMBIE) { PROC_UNLOCK(p); p = NULL; } @@ -1421,13 +1431,134 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) return (0); } +int +proc_iterate(int (*cb)(struct proc *, void *), void *cbarg) +{ + struct proc *p; + int error, i, j; + + for (i = 0; i < pidhashlock + 1; i++) { + sx_slock(&pidhashtbl_lock[i]); + for (j = i; j <= pidhash; j += pidhashlock + 1) { + LIST_FOREACH(p, &pidhashtbl[j], p_hash) { + if (p->p_state == PRS_NEW) + continue; + error = cb(p, cbarg); + PROC_LOCK_ASSERT(p, MA_NOTOWNED); + if (error != 0) { + sx_sunlock(&pidhashtbl_lock[i]); + return (error); + } + } + } + sx_sunlock(&pidhashtbl_lock[i]); + } + return (0); +} + +struct kern_proc_out_args { + struct sysctl_req *req; + int flags; + int oid_number; + int *name; +}; + +static int +sysctl_kern_proc_iterate(struct proc *p, void *origarg) +{ + struct kern_proc_out_args *arg = origarg; + int *name = arg->name; + int oid_number = arg->oid_number; + int flags = arg->flags; + struct sysctl_req *req = arg->req; + int error = 0; + + PROC_LOCK(p); + + KASSERT(p->p_ucred != NULL, + ("process credential is NULL for non-NEW proc")); + /* + * Show a user only appropriate processes. + */ + if (p_cansee(curthread, p)) + goto skip; + /* + * TODO - make more efficient (see notes below). + * do by session. + */ + switch (oid_number) { + + case KERN_PROC_GID: + if (p->p_ucred->cr_gid != (gid_t)name[0]) + goto skip; + break; + + case KERN_PROC_PGRP: + /* could do this by traversing pgrp */ + if (p->p_pgrp == NULL || + p->p_pgrp->pg_id != (pid_t)name[0]) + goto skip; + break; + + case KERN_PROC_RGID: + if (p->p_ucred->cr_rgid != (gid_t)name[0]) + goto skip; + break; + + case KERN_PROC_SESSION: + if (p->p_session == NULL || + p->p_session->s_sid != (pid_t)name[0]) + goto skip; + break; + + case KERN_PROC_TTY: + if ((p->p_flag & P_CONTROLT) == 0 || + p->p_session == NULL) + goto skip; + /* XXX proctree_lock */ + SESS_LOCK(p->p_session); + if (p->p_session->s_ttyp == NULL || + tty_udev(p->p_session->s_ttyp) != + (dev_t)name[0]) { + SESS_UNLOCK(p->p_session); + goto skip; + } + SESS_UNLOCK(p->p_session); + break; + + case KERN_PROC_UID: + if (p->p_ucred->cr_uid != (uid_t)name[0]) + goto skip; + break; + + case KERN_PROC_RUID: + if (p->p_ucred->cr_ruid != (uid_t)name[0]) + goto skip; + break; + + case KERN_PROC_PROC: + break; + + default: + break; + + } + error = sysctl_out_proc(p, req, flags); + PROC_LOCK_ASSERT(p, MA_NOTOWNED); + return (error); +skip: + PROC_UNLOCK(p); + return (0); +} + static int sysctl_kern_proc(SYSCTL_HANDLER_ARGS) { + struct kern_proc_out_args iterarg; int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; - int flags, doingzomb, oid_number; + int flags, oid_number; int error = 0; oid_number = oidp->oid_number; @@ -1479,112 +1610,11 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) if (error != 0) return (error); } - sx_slock(&allproc_lock); - for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) { - if (!doingzomb) - p = LIST_FIRST(&allproc); - else - p = LIST_FIRST(&zombproc); - for (; p != NULL; p = LIST_NEXT(p, p_list)) { - /* - * Skip embryonic processes. - */ - if (p->p_state == PRS_NEW) - continue; - PROC_LOCK(p); - KASSERT(p->p_ucred != NULL, - ("process credential is NULL for non-NEW proc")); - /* - * Show a user only appropriate processes. - */ - if (p_cansee(curthread, p)) { - PROC_UNLOCK(p); - continue; - } - /* - * TODO - make more efficient (see notes below). - * do by session. - */ - switch (oid_number) { - - case KERN_PROC_GID: - if (p->p_ucred->cr_gid != (gid_t)name[0]) { - PROC_UNLOCK(p); - continue; - } - break; - - case KERN_PROC_PGRP: - /* could do this by traversing pgrp */ - if (p->p_pgrp == NULL || - p->p_pgrp->pg_id != (pid_t)name[0]) { - PROC_UNLOCK(p); - continue; - } - break; - - case KERN_PROC_RGID: - if (p->p_ucred->cr_rgid != (gid_t)name[0]) { - PROC_UNLOCK(p); - continue; - } - break; - - case KERN_PROC_SESSION: - if (p->p_session == NULL || - p->p_session->s_sid != (pid_t)name[0]) { - PROC_UNLOCK(p); - continue; - } - break; - - case KERN_PROC_TTY: - if ((p->p_flag & P_CONTROLT) == 0 || - p->p_session == NULL) { - PROC_UNLOCK(p); - continue; - } - /* XXX proctree_lock */ - SESS_LOCK(p->p_session); - if (p->p_session->s_ttyp == NULL || - tty_udev(p->p_session->s_ttyp) != - (dev_t)name[0]) { - SESS_UNLOCK(p->p_session); - PROC_UNLOCK(p); - continue; - } - SESS_UNLOCK(p->p_session); - break; - - case KERN_PROC_UID: - if (p->p_ucred->cr_uid != (uid_t)name[0]) { - PROC_UNLOCK(p); - continue; - } - break; - - case KERN_PROC_RUID: - if (p->p_ucred->cr_ruid != (uid_t)name[0]) { - PROC_UNLOCK(p); - continue; - } - break; - - case KERN_PROC_PROC: - break; - - default: - break; - - } - - error = sysctl_out_proc(p, req, flags); - if (error) - goto out; - } - } -out: - sx_sunlock(&allproc_lock); + iterarg.flags = flags; + iterarg.oid_number = oid_number; + iterarg.req = req; + iterarg.name = name; + error = proc_iterate(sysctl_kern_proc_iterate, &iterarg); return (error); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index c285afce4011..85232073c911 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -942,8 +942,11 @@ extern pid_t pid_max; #define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) +#define PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; +extern struct sx *pidhashtbl_lock; extern u_long pidhash; +extern u_long pidhashlock; #define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash]) extern LIST_HEAD(tidhashhead, thread) *tidhashtbl; extern u_long tidhash; @@ -1046,6 +1049,7 @@ int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); void procinit(void); +int proc_iterate(int (*cb)(struct proc *, void *), void *cbarg); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); struct proc *proc_realparent(struct proc *child);