Manage process-related IDs with bitmaps

Currently unique pid allocation on fork often requires a full walk of
process, group, session lists to make sure it is not used by anything.
This has a side effect of requiring proctree to be held along with allproc,
which adds more contention in poudriere -j 128.

The patch below implements trivial bitmaps which gets rid of the problem.
Dedicated lock is introduced to manage IDs.

While here a bug was discovered: all processes would inherit reap id from
the first process spawned by init. This had a side effect of keeping the
ID used and when allocation rolls over to the beginning it keeps being
skipped.

The patch is loosely based on initial work by mjoras@.

Reviewed by:	kib
Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Mateusz Guzik 2018-12-07 12:22:32 +00:00
parent 6e8c1ccbe2
commit 34ebdceac0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=341684
4 changed files with 120 additions and 77 deletions

View File

@ -147,6 +147,27 @@ reaper_abandon_children(struct proc *p, bool exiting)
p->p_treeflag &= ~P_TREE_REAPER;
}
static void
reaper_clear(struct proc *p)
{
struct proc *p1;
bool clear;
sx_assert(&proctree_lock, SX_LOCKED);
LIST_REMOVE(p, p_reapsibling);
if (p->p_reapsubtree == 1)
return;
clear = true;
LIST_FOREACH(p1, &p->p_reaper->p_reaplist, p_reapsibling) {
if (p1->p_reapsubtree == p->p_reapsubtree) {
clear = false;
break;
}
}
if (clear)
proc_id_clear(PROC_ID_REAP, p->p_reapsubtree);
}
static void
clear_orphan(struct proc *p)
{
@ -881,7 +902,8 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options)
sx_xunlock(PIDHASHLOCK(p->p_pid));
LIST_REMOVE(p, p_sibling);
reaper_abandon_children(p, true);
LIST_REMOVE(p, p_reapsibling);
reaper_clear(p);
proc_id_clear(PROC_ID_PID, p->p_pid);
PROC_LOCK(p);
clear_orphan(p);
PROC_UNLOCK(p);

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bitstring.h>
#include <sys/sysproto.h>
#include <sys/eventhandler.h>
#include <sys/fcntl.h>
@ -232,20 +233,16 @@ sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
0, 0, sysctl_kern_randompid, "I", "Random PID modulus. Special values: 0: disable, 1: choose random value");
extern bitstr_t proc_id_pidmap;
extern bitstr_t proc_id_grpidmap;
extern bitstr_t proc_id_sessidmap;
extern bitstr_t proc_id_reapmap;
static int
fork_findpid(int flags)
{
struct proc *p;
pid_t result;
int trypid;
static int pidchecked = 0;
bool locked_zomb = false;
/*
* Requires allproc_lock in order to iterate over the list
* of processes, and proctree_lock to access p_pgrp.
*/
sx_assert(&allproc_lock, SX_LOCKED);
sx_assert(&proctree_lock, SX_LOCKED);
/*
* Find an unused process ID. We remember a range of unused IDs
@ -262,6 +259,7 @@ fork_findpid(int flags)
if (randompid)
trypid += arc4random() % randompid;
}
mtx_lock(&procid_lock);
retry:
/*
* If the process ID prototype has wrapped around,
@ -272,74 +270,28 @@ fork_findpid(int flags)
trypid = trypid % pid_max;
if (trypid < 100)
trypid += 100;
pidchecked = 0;
}
if (trypid >= pidchecked) {
int doingzomb = 0;
pidchecked = PID_MAX;
/*
* Scan the active and zombie procs to check whether this pid
* is in use. Remember the lowest pid that's greater
* than trypid, so we can avoid checking for a while.
*
* Avoid reuse of the process group id, session id or
* the reaper subtree id. Note that for process group
* and sessions, the amount of reserved pids is
* limited by process limit. For the subtree ids, the
* id is kept reserved only while there is a
* non-reaped process in the subtree, so amount of
* reserved pids is limited by process limit times
* two.
*/
p = LIST_FIRST(&allproc);
again:
for (; p != NULL; p = LIST_NEXT(p, p_list)) {
while (p->p_pid == trypid ||
p->p_reapsubtree == trypid ||
(p->p_pgrp != NULL &&
(p->p_pgrp->pg_id == trypid ||
(p->p_session != NULL &&
p->p_session->s_sid == trypid)))) {
trypid++;
if (trypid >= pidchecked)
goto retry;
}
if (p->p_pid > trypid && pidchecked > p->p_pid)
pidchecked = p->p_pid;
if (p->p_pgrp != NULL) {
if (p->p_pgrp->pg_id > trypid &&
pidchecked > p->p_pgrp->pg_id)
pidchecked = p->p_pgrp->pg_id;
if (p->p_session != NULL &&
p->p_session->s_sid > trypid &&
pidchecked > p->p_session->s_sid)
pidchecked = p->p_session->s_sid;
}
}
if (!doingzomb) {
doingzomb = 1;
if (!locked_zomb) {
sx_slock(&zombproc_lock);
locked_zomb = true;
}
p = LIST_FIRST(&zombproc);
goto again;
}
bit_ffc_at(&proc_id_pidmap, trypid, pid_max, &result);
if (result == -1)
goto retry;
if (bit_test(&proc_id_grpidmap, result) ||
bit_test(&proc_id_sessidmap, result) ||
bit_test(&proc_id_reapmap, result)) {
trypid++;
goto retry;
}
/*
* RFHIGHPID does not mess with the lastpid counter during boot.
*/
if (flags & RFHIGHPID)
pidchecked = 0;
else
lastpid = trypid;
if ((flags & RFHIGHPID) == 0)
lastpid = result;
if (locked_zomb)
sx_sunlock(&zombproc_lock);
bit_set(&proc_id_pidmap, result);
mtx_unlock(&procid_lock);
return (trypid);
return (result);
}
static int
@ -402,13 +354,11 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
struct filedesc_to_leader *fdtol;
struct sigacts *newsigacts;
sx_assert(&proctree_lock, SX_LOCKED);
sx_assert(&allproc_lock, SX_XLOCKED);
p1 = td->td_proc;
trypid = fork_findpid(fr->fr_flags);
p2->p_state = PRS_NEW; /* protect against others */
p2->p_pid = trypid;
AUDIT_ARG_PID(p2->p_pid);
@ -421,7 +371,6 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
PROC_LOCK(p1);
sx_xunlock(&allproc_lock);
sx_xunlock(&proctree_lock);
bcopy(&p1->p_startcopy, &p2->p_startcopy,
__rangeof(struct proc, p_startcopy, p_endcopy));
@ -657,8 +606,10 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
LIST_INIT(&p2->p_reaplist);
LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
if (p2->p_reaper == p1)
if (p2->p_reaper == p1 && p1 != initproc) {
p2->p_reapsubtree = p2->p_pid;
proc_id_set_cond(PROC_ID_REAP, p2->p_pid);
}
sx_xunlock(&proctree_lock);
/* Inform accounting that we have forked. */
@ -975,8 +926,6 @@ fork1(struct thread *td, struct fork_req *fr)
newproc->p_klist = knlist_alloc(&newproc->p_mtx);
STAILQ_INIT(&newproc->p_ktr);
/* We have to lock the process tree while we look for a pid. */
sx_xlock(&proctree_lock);
sx_xlock(&allproc_lock);
/*
@ -999,7 +948,6 @@ fork1(struct thread *td, struct fork_req *fr)
error = EAGAIN;
sx_xunlock(&allproc_lock);
sx_xunlock(&proctree_lock);
#ifdef MAC
mac_proc_destroy(newproc);
#endif

View File

@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bitstring.h>
#include <sys/elf.h>
#include <sys/eventhandler.h>
#include <sys/exec.h>
@ -128,6 +129,7 @@ struct sx __exclusive_cache_line allproc_lock;
struct sx __exclusive_cache_line zombproc_lock;
struct sx __exclusive_cache_line proctree_lock;
struct mtx __exclusive_cache_line ppeers_lock;
struct mtx __exclusive_cache_line procid_lock;
uma_zone_t proc_zone;
/*
@ -181,6 +183,7 @@ procinit(void)
sx_init(&zombproc_lock, "zombproc");
sx_init(&proctree_lock, "proctree");
mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
mtx_init(&procid_lock, "procid", NULL, MTX_DEF);
LIST_INIT(&allproc);
LIST_INIT(&zombproc);
pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
@ -292,6 +295,62 @@ proc_fini(void *mem, int size)
#endif
}
/*
* PID space management.
*
* These bitmaps are used by fork_findpid.
*/
bitstr_t bit_decl(proc_id_pidmap, PID_MAX);
bitstr_t bit_decl(proc_id_grpidmap, PID_MAX);
bitstr_t bit_decl(proc_id_sessidmap, PID_MAX);
bitstr_t bit_decl(proc_id_reapmap, PID_MAX);
static bitstr_t *proc_id_array[] = {
proc_id_pidmap,
proc_id_grpidmap,
proc_id_sessidmap,
proc_id_reapmap,
};
void
proc_id_set(int type, pid_t id)
{
KASSERT(type >= 0 && type < nitems(proc_id_array),
("invalid type %d\n", type));
mtx_lock(&procid_lock);
KASSERT(bit_test(proc_id_array[type], id) == 0,
("bit %d already set in %d\n", id, type));
bit_set(proc_id_array[type], id);
mtx_unlock(&procid_lock);
}
void
proc_id_set_cond(int type, pid_t id)
{
KASSERT(type >= 0 && type < nitems(proc_id_array),
("invalid type %d\n", type));
if (bit_test(proc_id_array[type], id))
return;
mtx_lock(&procid_lock);
bit_set(proc_id_array[type], id);
mtx_unlock(&procid_lock);
}
void
proc_id_clear(int type, pid_t id)
{
KASSERT(type >= 0 && type < nitems(proc_id_array),
("invalid type %d\n", type));
mtx_lock(&procid_lock);
KASSERT(bit_test(proc_id_array[type], id) != 0,
("bit %d not set in %d\n", id, type));
bit_clear(proc_id_array[type], id);
mtx_unlock(&procid_lock);
}
/*
* Is p an inferior of the current process?
*/
@ -495,6 +554,7 @@ enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
PGRP_LOCK(pgrp);
sess->s_leader = p;
sess->s_sid = p->p_pid;
proc_id_set(PROC_ID_SESSION, p->p_pid);
refcount_init(&sess->s_count, 1);
sess->s_ttyvp = NULL;
sess->s_ttydp = NULL;
@ -510,6 +570,7 @@ enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
PGRP_LOCK(pgrp);
}
pgrp->pg_id = pgid;
proc_id_set(PROC_ID_GROUP, p->p_pid);
LIST_INIT(&pgrp->pg_members);
/*
@ -640,6 +701,7 @@ pgdelete(struct pgrp *pgrp)
tty_rel_pgrp(tp, pgrp);
}
proc_id_clear(PROC_ID_GROUP, pgrp->pg_id);
mtx_destroy(&pgrp->pg_mtx);
free(pgrp, M_PGRP);
sess_release(savesess);
@ -824,6 +886,7 @@ sess_release(struct session *s)
tty_lock(s->s_ttyp);
tty_rel_sess(s->s_ttyp, s);
}
proc_id_clear(PROC_ID_SESSION, s->s_sid);
mtx_destroy(&s->s_mtx);
free(s, M_SESSION);
}

View File

@ -962,6 +962,7 @@ extern int allproc_gen;
extern struct sx zombproc_lock;
extern struct sx proctree_lock;
extern struct mtx ppeers_lock;
extern struct mtx procid_lock;
extern struct proc proc0; /* Process slot for swapper. */
extern struct thread0_storage thread0_st; /* Primary thread in proc0. */
#define thread0 (thread0_st.t0st_thread)
@ -1165,6 +1166,15 @@ td_softdep_cleanup(struct thread *td)
softdep_ast_cleanup(td);
}
#define PROC_ID_PID 0
#define PROC_ID_GROUP 1
#define PROC_ID_SESSION 2
#define PROC_ID_REAP 3
void proc_id_set(int type, pid_t id);
void proc_id_set_cond(int type, pid_t id);
void proc_id_clear(int type, pid_t id);
#endif /* _KERNEL */
#endif /* !_SYS_PROC_H_ */