proc: implement pid hash locks and an iterator

forks, exits and waits are frequently stalled during poudriere -j 128 runs
due to killpg and process list exports performed for each package.

Both uses take the allproc lock. The latter case can be modified to iterate
over the hash with finer grained locking instead.

Reviewed by:	kib
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D17817
This commit is contained in:
Mateusz Guzik 2018-11-21 18:56:15 +00:00
parent 3bea9a2664
commit 3d3e6793f6
4 changed files with 147 additions and 109 deletions

View File

@ -435,7 +435,6 @@ exit1(struct thread *td, int rval, int signo)
sx_xlock(&allproc_lock);
LIST_REMOVE(p, p_list);
LIST_INSERT_HEAD(&zombproc, p, p_list);
LIST_REMOVE(p, p_hash);
sx_xunlock(&allproc_lock);
/*
@ -876,6 +875,9 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options)
sx_xlock(&allproc_lock);
LIST_REMOVE(p, p_list); /* off zombproc */
sx_xunlock(&allproc_lock);
sx_xlock(PIDHASHLOCK(p->p_pid));
LIST_REMOVE(p, p_hash);
sx_xunlock(PIDHASHLOCK(p->p_pid));
LIST_REMOVE(p, p_sibling);
reaper_abandon_children(p, true);
LIST_REMOVE(p, p_reapsibling);

View File

@ -406,7 +406,9 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
AUDIT_ARG_PID(p2->p_pid);
LIST_INSERT_HEAD(&allproc, p2, p_list);
allproc_gen++;
sx_xlock(PIDHASHLOCK(p2->p_pid));
LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
sx_xunlock(PIDHASHLOCK(p2->p_pid));
PROC_LOCK(p2);
PROC_LOCK(p1);

View File

@ -118,7 +118,9 @@ static struct proc *zpfind_locked(pid_t pid);
* Other process lists
*/
struct pidhashhead *pidhashtbl;
struct sx *pidhashtbl_lock;
u_long pidhash;
u_long pidhashlock;
struct pgrphashhead *pgrphashtbl;
u_long pgrphash;
struct proclist allproc;
@ -173,6 +175,7 @@ CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
void
procinit(void)
{
u_long i;
sx_init(&allproc_lock, "allproc");
sx_init(&proctree_lock, "proctree");
@ -180,6 +183,13 @@ procinit(void)
LIST_INIT(&allproc);
LIST_INIT(&zombproc);
pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
pidhashlock = (pidhash + 1) / 64;
if (pidhashlock > 0)
pidhashlock--;
pidhashtbl_lock = malloc(sizeof(*pidhashtbl_lock) * (pidhashlock + 1),
M_PROC, M_WAITOK | M_ZERO);
for (i = 0; i < pidhashlock + 1; i++)
sx_init(&pidhashtbl_lock[i], "pidhash");
pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
proc_ctor, proc_dtor, proc_init, proc_fini,
@ -306,7 +316,7 @@ pfind_locked(pid_t pid)
LIST_FOREACH(p, PIDHASH(pid), p_hash) {
if (p->p_pid == pid) {
PROC_LOCK(p);
if (p->p_state == PRS_NEW) {
if (p->p_state == PRS_NEW || p->p_state == PRS_ZOMBIE) {
PROC_UNLOCK(p);
p = NULL;
}
@ -1421,13 +1431,134 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
return (0);
}
int
proc_iterate(int (*cb)(struct proc *, void *), void *cbarg)
{
struct proc *p;
int error, i, j;
for (i = 0; i < pidhashlock + 1; i++) {
sx_slock(&pidhashtbl_lock[i]);
for (j = i; j <= pidhash; j += pidhashlock + 1) {
LIST_FOREACH(p, &pidhashtbl[j], p_hash) {
if (p->p_state == PRS_NEW)
continue;
error = cb(p, cbarg);
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
if (error != 0) {
sx_sunlock(&pidhashtbl_lock[i]);
return (error);
}
}
}
sx_sunlock(&pidhashtbl_lock[i]);
}
return (0);
}
struct kern_proc_out_args {
struct sysctl_req *req;
int flags;
int oid_number;
int *name;
};
static int
sysctl_kern_proc_iterate(struct proc *p, void *origarg)
{
struct kern_proc_out_args *arg = origarg;
int *name = arg->name;
int oid_number = arg->oid_number;
int flags = arg->flags;
struct sysctl_req *req = arg->req;
int error = 0;
PROC_LOCK(p);
KASSERT(p->p_ucred != NULL,
("process credential is NULL for non-NEW proc"));
/*
* Show a user only appropriate processes.
*/
if (p_cansee(curthread, p))
goto skip;
/*
* TODO - make more efficient (see notes below).
* do by session.
*/
switch (oid_number) {
case KERN_PROC_GID:
if (p->p_ucred->cr_gid != (gid_t)name[0])
goto skip;
break;
case KERN_PROC_PGRP:
/* could do this by traversing pgrp */
if (p->p_pgrp == NULL ||
p->p_pgrp->pg_id != (pid_t)name[0])
goto skip;
break;
case KERN_PROC_RGID:
if (p->p_ucred->cr_rgid != (gid_t)name[0])
goto skip;
break;
case KERN_PROC_SESSION:
if (p->p_session == NULL ||
p->p_session->s_sid != (pid_t)name[0])
goto skip;
break;
case KERN_PROC_TTY:
if ((p->p_flag & P_CONTROLT) == 0 ||
p->p_session == NULL)
goto skip;
/* XXX proctree_lock */
SESS_LOCK(p->p_session);
if (p->p_session->s_ttyp == NULL ||
tty_udev(p->p_session->s_ttyp) !=
(dev_t)name[0]) {
SESS_UNLOCK(p->p_session);
goto skip;
}
SESS_UNLOCK(p->p_session);
break;
case KERN_PROC_UID:
if (p->p_ucred->cr_uid != (uid_t)name[0])
goto skip;
break;
case KERN_PROC_RUID:
if (p->p_ucred->cr_ruid != (uid_t)name[0])
goto skip;
break;
case KERN_PROC_PROC:
break;
default:
break;
}
error = sysctl_out_proc(p, req, flags);
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
return (error);
skip:
PROC_UNLOCK(p);
return (0);
}
static int
sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
{
struct kern_proc_out_args iterarg;
int *name = (int *)arg1;
u_int namelen = arg2;
struct proc *p;
int flags, doingzomb, oid_number;
int flags, oid_number;
int error = 0;
oid_number = oidp->oid_number;
@ -1479,112 +1610,11 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
if (error != 0)
return (error);
}
sx_slock(&allproc_lock);
for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
if (!doingzomb)
p = LIST_FIRST(&allproc);
else
p = LIST_FIRST(&zombproc);
for (; p != NULL; p = LIST_NEXT(p, p_list)) {
/*
* Skip embryonic processes.
*/
if (p->p_state == PRS_NEW)
continue;
PROC_LOCK(p);
KASSERT(p->p_ucred != NULL,
("process credential is NULL for non-NEW proc"));
/*
* Show a user only appropriate processes.
*/
if (p_cansee(curthread, p)) {
PROC_UNLOCK(p);
continue;
}
/*
* TODO - make more efficient (see notes below).
* do by session.
*/
switch (oid_number) {
case KERN_PROC_GID:
if (p->p_ucred->cr_gid != (gid_t)name[0]) {
PROC_UNLOCK(p);
continue;
}
break;
case KERN_PROC_PGRP:
/* could do this by traversing pgrp */
if (p->p_pgrp == NULL ||
p->p_pgrp->pg_id != (pid_t)name[0]) {
PROC_UNLOCK(p);
continue;
}
break;
case KERN_PROC_RGID:
if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
PROC_UNLOCK(p);
continue;
}
break;
case KERN_PROC_SESSION:
if (p->p_session == NULL ||
p->p_session->s_sid != (pid_t)name[0]) {
PROC_UNLOCK(p);
continue;
}
break;
case KERN_PROC_TTY:
if ((p->p_flag & P_CONTROLT) == 0 ||
p->p_session == NULL) {
PROC_UNLOCK(p);
continue;
}
/* XXX proctree_lock */
SESS_LOCK(p->p_session);
if (p->p_session->s_ttyp == NULL ||
tty_udev(p->p_session->s_ttyp) !=
(dev_t)name[0]) {
SESS_UNLOCK(p->p_session);
PROC_UNLOCK(p);
continue;
}
SESS_UNLOCK(p->p_session);
break;
case KERN_PROC_UID:
if (p->p_ucred->cr_uid != (uid_t)name[0]) {
PROC_UNLOCK(p);
continue;
}
break;
case KERN_PROC_RUID:
if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
PROC_UNLOCK(p);
continue;
}
break;
case KERN_PROC_PROC:
break;
default:
break;
}
error = sysctl_out_proc(p, req, flags);
if (error)
goto out;
}
}
out:
sx_sunlock(&allproc_lock);
iterarg.flags = flags;
iterarg.oid_number = oid_number;
iterarg.req = req;
iterarg.name = name;
error = proc_iterate(sysctl_kern_proc_iterate, &iterarg);
return (error);
}

View File

@ -942,8 +942,11 @@ extern pid_t pid_max;
#define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0)
#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash])
#define PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)])
extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
extern struct sx *pidhashtbl_lock;
extern u_long pidhash;
extern u_long pidhashlock;
#define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash])
extern LIST_HEAD(tidhashhead, thread) *tidhashtbl;
extern u_long tidhash;
@ -1046,6 +1049,7 @@ int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb);
int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb);
int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb);
void procinit(void);
int proc_iterate(int (*cb)(struct proc *, void *), void *cbarg);
void proc_linkup0(struct proc *p, struct thread *td);
void proc_linkup(struct proc *p, struct thread *td);
struct proc *proc_realparent(struct proc *child);