diff --git a/lib/libc/sys/procctl.2 b/lib/libc/sys/procctl.2 index 6ad0590804ad..a5d3d8990314 100644 --- a/lib/libc/sys/procctl.2 +++ b/lib/libc/sys/procctl.2 @@ -2,6 +2,10 @@ .\" Written by: John H. Baldwin .\" All rights reserved. .\" +.\" Copyright (c) 2014 The FreeBSD Foundation +.\" Portions of this documentation were written by Konstantin Belousov +.\" under sponsorship from the FreeBSD Foundation. +.\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: @@ -25,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 19, 2013 +.Dd December 15, 2014 .Dt PROCCTL 2 .Os .Sh NAME @@ -67,7 +71,7 @@ The control request to perform is specified by the .Fa cmd argument. The following commands are supported: -.Bl -tag -width "Dv PROC_SPROTECT" +.Bl -tag -width "Dv PROC_REAP_GETPIDS" .It Dv PROC_SPROTECT Set process protection state. This is used to mark a process as protected from being killed if the system @@ -95,6 +99,174 @@ When used with mark all future child processes of each selected process as protected. Future child processes will also mark all of their future child processes. .El +.It Dv PROC_REAP_ACQUIRE +Acquires the reaper status for the current process. +The status means that orphaned children by the reaper descendants, +forked after the acquisition of the status, are reparented to the +reaper. +After the system initialization, +.Xr init 8 +is the default reaper. +.Pp +.It Dv PROC_REAP_RELEASE +Releases the reaper state fpr the current process. +The reaper of the current process becomes the new reaper of the +current process descendants. +.It Dv PROC_REAP_STATUS +Provides the information about the reaper of the specified process, +or the process itself, in case it is a reaper. +The +.Fa data +argument must point to the +.Vt "struct procctl_reaper_status" , +which if filled by the syscall on successfull return. +.Bd -literal +struct procctl_reaper_status { + u_int rs_flags; + u_int rs_children; + u_int rs_descendants; + pid_t rs_reaper; + pid_t rs_pid; +}; +.Ed +The +.Fa rs_flags +may have the following flags returned: +.Bl -tag -width "Dv REAPER_STATUS_REALINIT" +.It Dv REAPER_STATUS_OWNED +The specified process has acquired the reaper status and did not +released it. +When the flag is returned, the +.Fa id +pid identifies reaper, otherwise the +.Fa rs_reaper +field of the structure is the pid of the reaper for passed process id. +.It Dv REAPER_STATUS_REALINIT +The specified process is the root of the reaper tree, i.e. +.Xr init 8. +.El +The +.Fa rs_children +returns the number of the children of the reaper. +The +.Fa rs_descendants +returns the total number of descendants of the reaper, +not counting descendants of the reapers in the subtree. +The +.Fa rs_reaper +returns the reaper pid. +The +.Fa rs_pid +returns pid of some reaper child if there is any descendant. +.It Dv PROC_REAP_GETPIDS +Queries the list of descendants of the reaper of the specified process. +The request takes the pointer to +.Vt "struct procctl_reaper_pids" +as +.Fa data . +.Bd -literal +struct procctl_reaper_pids { + u_int rp_count; + struct procctl_reaper_pidinfo *rp_pids; +}; +.Ed +On call, the +.Fa rp_pids +must point to the array of +.Vt procctl_reaper_pidinfo +structures, to be filled on return, +and the +.Fa rp_count +must specify the size of the array, +no more than rp_count elements is filled by kernel. +.Pp +The +.Vt "struct procctl_reaper_pidinfo" +structure provides some information about one reaper' descendant. +Note that for the descendant which is not child, it is the subject +of usual race with process exiting and pid reuse. +.Bd -literal +struct procctl_reaper_pidinfo { + pid_t pi_pid; + pid_t pi_subtree; + u_int pi_flags; +}; +.Ed +The +.Fa pi_pid +is the process id of the descendant. +The +.Fa pi_subtree +provides the pid of the child of the reaper, which is (grand-)parent +of the process. +The +.Fa pi_flags +returns the following flags, further describing the descendant: +.Bl -tag -width "Dv REAPER_PIDINFO_VALID" +.It Dv REAPER_PIDINFO_VALID +Set for the +.Vt procctl_reaper_pidinfo +structure, which was filled by kernel. +Zero-filling the +.Fa rp_pids +array and testing the flag allows the caller to detect the end +of returned array. +.It Dv REAPER_PIDINFO_CHILD +The +.Fa pi_pid +is the direct child of the reaper. +.El +.It Dv PROC_REAP_KILL +Request to deliver a signal to some subset of descendants of the reaper. +The +.Fa data +must point to +.Vt procctl_reaper_kill +structure, which is used both for parameters and status return. +.Bd -literal +struct procctl_reaper_kill { + int rk_sig; + u_int rk_flags; + pid_t rk_subtree; + u_int rk_killed; + pid_t rk_fpid; +}; +.Ed +The +.Fa rk_sig +specifies the signal to be delivered. +Zero is not a valid signal number, unlike +.Xr kill 2 . +The +.Fa rk_flags +further directs the operation. +It is or-ed from the following flags: +.Bl -tag -width "Dv REAPER_KILL_CHILDREN" +.It Dv REAPER_KILL_CHILDREN +Deliver the specified signal only to direct children of the reaper. +.It Dv REAPER_KILL_SUBTREE +Deliver the specified signal only to descendants which were forked by +the direct child with pid specified in +.Fa rk_subtree . +.El +If no +.Dv REAPER_KILL_CHILDREN +and +.Dv REAPER_KILL_SUBTREE +flags are specified, all current descendants of the reaper are signalled. +.Pp +If signal was delivered to any process, the return value from the request +is zero. +In this case, +.Fa rk_killed +field is filled with the count of processes signalled. +The +.Fa rk_fpid +field is set to the pid of the first process for which signal +delivery failed, e.g. due to the permission problems. +If no such process exist, the +.Fa rk_fpid +is set to -1. .El .Sh RETURN VALUES If an error occurs, a value of -1 is returned and @@ -132,11 +304,48 @@ An invalid operation or flag was passed in for a .Dv PROC_SPROTECT command. +.It Bq Er EPERM +The +.Fa idtype +argument is not equal to +.Dv P_PID , +or +.Fa id +is not equal to the pid of the calling process, for +.Dv PROC_REAP_ACQUIRE +or +.Dv PROC_REAP_RELEASE +requests. +.It Bq Er EINVAL +Invalid or undefined flags were passed to +.Dv PROC_REAP_KILL +request. +.It Bq Er EINVAL +Invalid or zero signal number was requested for +.Dv PROC_REAP_KILL +request. +.It Bq Er EINVAL +The +.Dv PROC_REAP_RELEASE +request was issued by the +.Xr init 8 +process. +.It Bq Er EBUSY +The +.Dv PROC_REAP_ACQUIRE +request was issued by the process which already acquired reaper status +and did not released it. .El .Sh SEE ALSO -.Xr ptrace 2 +.Xr kill 2 , +.Xr ptrace 2 , +.Xr wait 2 , +.Xr init 8 .Sh HISTORY The .Fn procctl function appeared in .Fx 10.0 . +Reaper facility was created based on the similar feature of Linux and +DragonflyBSD, and first appeared in +.Fx 10.2 . diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h index af10055d0936..ed3df7ac0ee5 100644 --- a/sys/compat/freebsd32/freebsd32.h +++ b/sys/compat/freebsd32/freebsd32.h @@ -390,4 +390,10 @@ struct kld32_file_stat { char pathname[MAXPATHLEN]; }; +struct procctl_reaper_pids32 { + u_int rp_count; + u_int rp_pad0[15]; + uint32_t rp_pids; +}; + #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 24c573813535..1457f57b78f7 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -2957,20 +2957,63 @@ int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; - int error, flags; + union { + struct procctl_reaper_status rs; + struct procctl_reaper_pids rp; + struct procctl_reaper_kill rk; + } x; + union { + struct procctl_reaper_pids32 rp; + } x32; + int error, error1, flags; switch (uap->com) { case PROC_SPROTECT: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); - if (error) + if (error != 0) return (error); data = &flags; break; + case PROC_REAP_ACQUIRE: + case PROC_REAP_RELEASE: + if (uap->data != NULL) + return (EINVAL); + data = NULL; + break; + case PROC_REAP_STATUS: + data = &x.rs; + break; + case PROC_REAP_GETPIDS: + error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); + if (error != 0) + return (error); + CP(x32.rp, x.rp, rp_count); + PTRIN_CP(x32.rp, x.rp, rp_pids); + data = &x.rp; + break; + case PROC_REAP_KILL: + error = copyin(uap->data, &x.rk, sizeof(x.rk)); + if (error != 0) + return (error); + data = &x.rk; + break; default: return (EINVAL); } - return (kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), - uap->com, data)); + error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), + uap->com, data); + switch (uap->com) { + case PROC_REAP_STATUS: + if (error == 0) + error = copyout(&x.rs, uap->data, sizeof(x.rs)); + break; + case PROC_REAP_KILL: + error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); + if (error == 0) + error = error1; + break; + } + return (error); } int diff --git a/sys/conf/files b/sys/conf/files index 018d77bfa84d..939b63517d38 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2987,6 +2987,7 @@ kern/kern_pmc.c standard kern/kern_poll.c optional device_polling kern/kern_priv.c standard kern/kern_proc.c standard +kern/kern_procctl.c standard kern/kern_prot.c standard kern/kern_racct.c standard kern/kern_rangelock.c standard diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index e903f4cc89f5..beb49bc56962 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -496,7 +496,8 @@ proc0_init(void *dummy __unused) prison0.pr_cpuset = cpuset_ref(td->td_cpuset); p->p_peers = 0; p->p_leader = p; - + p->p_reaper = p; + LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); @@ -821,8 +822,11 @@ create_init(const void *udata __unused) KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); + sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; + initproc->p_treeflag |= P_TREE_REAPER; + LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling); oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC @@ -833,6 +837,7 @@ create_init(const void *udata __unused) #endif initproc->p_ucred = newcred; PROC_UNLOCK(initproc); + sx_xunlock(&proctree_lock); crfree(oldcred); cred_update_thread(FIRST_THREAD_IN_PROC(initproc)); cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index a4313091e44d..ce1f8f9e6e32 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -123,6 +123,31 @@ proc_realparent(struct proc *child) return (parent); } +void +reaper_abandon_children(struct proc *p, bool exiting) +{ + struct proc *p1, *p2, *ptmp; + + sx_assert(&proctree_lock, SX_LOCKED); + KASSERT(p != initproc, ("reaper_abandon_children for initproc")); + if ((p->p_treeflag & P_TREE_REAPER) == 0) + return; + p1 = p->p_reaper; + LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) { + LIST_REMOVE(p2, p_reapsibling); + p2->p_reaper = p1; + p2->p_reapsubtree = p->p_reapsubtree; + LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling); + if (exiting && p2->p_pptr == p) { + PROC_LOCK(p2); + proc_reparent(p2, p1); + PROC_UNLOCK(p2); + } + } + KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty")); + p->p_treeflag &= ~P_TREE_REAPER; +} + static void clear_orphan(struct proc *p) { @@ -458,14 +483,14 @@ exit1(struct thread *td, int rv) sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ - wakeup(initproc); + wakeup(q->p_reaper); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); q->p_sigparent = SIGCHLD; if (!(q->p_flag & P_TRACED)) { - proc_reparent(q, initproc); + proc_reparent(q, q->p_reaper); } else { /* * Traced processes are killed since their existence @@ -473,7 +498,7 @@ exit1(struct thread *td, int rv) */ t = proc_realparent(q); if (t == p) { - proc_reparent(q, initproc); + proc_reparent(q, q->p_reaper); } else { PROC_LOCK(t); proc_reparent(q, t); @@ -562,7 +587,7 @@ exit1(struct thread *td, int rv) mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); - proc_reparent(p, initproc); + proc_reparent(p, p->p_reaper); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); @@ -575,8 +600,8 @@ exit1(struct thread *td, int rv) } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); - if (p->p_pptr == initproc) - kern_psignal(p->p_pptr, SIGCHLD); + if (p->p_pptr == p->p_reaper || p->p_pptr == initproc) + childproc_exited(p); else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) childproc_exited(p); @@ -849,6 +874,8 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options) LIST_REMOVE(p, p_list); /* off zombproc */ sx_xunlock(&allproc_lock); LIST_REMOVE(p, p_sibling); + reaper_abandon_children(p, true); + LIST_REMOVE(p, p_reapsibling); PROC_LOCK(p); clear_orphan(p); PROC_UNLOCK(p); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index c5298388c888..f469db634bca 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -261,11 +261,21 @@ fork_findpid(int flags) * Scan the active and zombie procs to check whether this pid * is in use. Remember the lowest pid that's greater * than trypid, so we can avoid checking for a while. + * + * Avoid reuse of the process group id, session id or + * the reaper subtree id. Note that for process group + * and sessions, the amount of reserved pids is + * limited by process limit. For the subtree ids, the + * id is kept reserved only while there is a + * non-reaped process in the subtree, so amount of + * reserved pids is limited by process limit times + * two. */ p = LIST_FIRST(&allproc); again: for (; p != NULL; p = LIST_NEXT(p, p_list)) { while (p->p_pid == trypid || + p->p_reapsubtree == trypid || (p->p_pgrp != NULL && (p->p_pgrp->pg_id == trypid || (p->p_session != NULL && @@ -611,12 +621,20 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2, * of init. This effectively disassociates the child from the * parent. */ - if (flags & RFNOWAIT) - pptr = initproc; - else + if ((flags & RFNOWAIT) != 0) { + pptr = p1->p_reaper; + p2->p_reaper = pptr; + } else { + p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? + p1 : p1->p_reaper; pptr = p1; + } p2->p_pptr = pptr; LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); + LIST_INIT(&p2->p_reaplist); + LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); + if (p2->p_reaper == p1) + p2->p_reapsubtree = p2->p_pid; sx_xunlock(&proctree_lock); /* Inform accounting that we have forked. */ diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c new file mode 100644 index 000000000000..5ee2953a1697 --- /dev/null +++ b/sys/kern/kern_procctl.c @@ -0,0 +1,460 @@ +/*- + * Copyright (c) 2014 John Baldwin + * Copyright (c) 2014 The FreeBSD Foundation + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int +protect_setchild(struct thread *td, struct proc *p, int flags) +{ + + PROC_LOCK_ASSERT(p, MA_OWNED); + if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0) + return (0); + if (flags & PPROT_SET) { + p->p_flag |= P_PROTECTED; + if (flags & PPROT_INHERIT) + p->p_flag2 |= P2_INHERIT_PROTECTED; + } else { + p->p_flag &= ~P_PROTECTED; + p->p_flag2 &= ~P2_INHERIT_PROTECTED; + } + return (1); +} + +static int +protect_setchildren(struct thread *td, struct proc *top, int flags) +{ + struct proc *p; + int ret; + + p = top; + ret = 0; + sx_assert(&proctree_lock, SX_LOCKED); + for (;;) { + ret |= protect_setchild(td, p, flags); + PROC_UNLOCK(p); + /* + * If this process has children, descend to them next, + * otherwise do any siblings, and if done with this level, + * follow back up the tree (but not past top). + */ + if (!LIST_EMPTY(&p->p_children)) + p = LIST_FIRST(&p->p_children); + else for (;;) { + if (p == top) { + PROC_LOCK(p); + return (ret); + } + if (LIST_NEXT(p, p_sibling)) { + p = LIST_NEXT(p, p_sibling); + break; + } + p = p->p_pptr; + } + PROC_LOCK(p); + } +} + +static int +protect_set(struct thread *td, struct proc *p, int flags) +{ + int error, ret; + + switch (PPROT_OP(flags)) { + case PPROT_SET: + case PPROT_CLEAR: + break; + default: + return (EINVAL); + } + + if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0) + return (EINVAL); + + error = priv_check(td, PRIV_VM_MADV_PROTECT); + if (error) + return (error); + + if (flags & PPROT_DESCEND) + ret = protect_setchildren(td, p, flags); + else + ret = protect_setchild(td, p, flags); + if (ret == 0) + return (EPERM); + return (0); +} + +static int +reap_acquire(struct thread *td, struct proc *p) +{ + + sx_assert(&proctree_lock, SX_XLOCKED); + if (p != curproc) + return (EPERM); + if ((p->p_treeflag & P_TREE_REAPER) != 0) + return (EBUSY); + p->p_treeflag |= P_TREE_REAPER; + /* + * We do not reattach existing children and the whole tree + * under them to us, since p->p_reaper already seen them. + */ + return (0); +} + +static int +reap_release(struct thread *td, struct proc *p) +{ + + sx_assert(&proctree_lock, SX_XLOCKED); + if (p != curproc) + return (EPERM); + if (p == initproc) + return (EINVAL); + if ((p->p_treeflag & P_TREE_REAPER) == 0) + return (EINVAL); + reaper_abandon_children(p, false); + return (0); +} + +static int +reap_status(struct thread *td, struct proc *p, + struct procctl_reaper_status *rs) +{ + struct proc *reap, *p2; + + sx_assert(&proctree_lock, SX_LOCKED); + bzero(rs, sizeof(*rs)); + if ((p->p_treeflag & P_TREE_REAPER) == 0) { + reap = p->p_reaper; + } else { + reap = p; + rs->rs_flags |= REAPER_STATUS_OWNED; + } + if (reap == initproc) + rs->rs_flags |= REAPER_STATUS_REALINIT; + rs->rs_reaper = reap->p_pid; + rs->rs_descendants = 0; + rs->rs_children = 0; + if (!LIST_EMPTY(&reap->p_reaplist)) { + KASSERT(!LIST_EMPTY(&reap->p_children), ("no children")); + rs->rs_pid = LIST_FIRST(&reap->p_children)->p_pid; + LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) { + if (proc_realparent(p2) == reap) + rs->rs_children++; + rs->rs_descendants++; + } + } else { + rs->rs_pid = -1; + KASSERT(LIST_EMPTY(&reap->p_reaplist), ("reap children list")); + KASSERT(LIST_EMPTY(&reap->p_children), ("children list")); + } + return (0); +} + +static int +reap_getpids(struct thread *td, struct proc *p, struct procctl_reaper_pids *rp) +{ + struct proc *reap, *p2; + struct procctl_reaper_pidinfo *pi, *pip; + u_int i, n; + int error; + + sx_assert(&proctree_lock, SX_LOCKED); + PROC_UNLOCK(p); + reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p; + n = i = 0; + error = 0; + LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) + n++; + sx_unlock(&proctree_lock); + if (rp->rp_count < n) + n = rp->rp_count; + pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK); + sx_slock(&proctree_lock); + LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) { + if (i == n) + break; + pip = &pi[i]; + bzero(pip, sizeof(*pip)); + pip->pi_pid = p2->p_pid; + pip->pi_subtree = p2->p_reapsubtree; + pip->pi_flags = REAPER_PIDINFO_VALID; + if (proc_realparent(p2) == reap) + pip->pi_flags |= REAPER_PIDINFO_CHILD; + i++; + } + sx_sunlock(&proctree_lock); + error = copyout(pi, rp->rp_pids, i * sizeof(*pi)); + free(pi, M_TEMP); + sx_slock(&proctree_lock); + PROC_LOCK(p); + return (error); +} + +static int +reap_kill(struct thread *td, struct proc *p, struct procctl_reaper_kill *rk) +{ + struct proc *reap, *p2; + ksiginfo_t ksi; + int error, error1; + + sx_assert(&proctree_lock, SX_LOCKED); + PROC_UNLOCK(p); + if (IN_CAPABILITY_MODE(td)) + return (ECAPMODE); + if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG) + return (EINVAL); + if ((rk->rk_flags & ~REAPER_KILL_CHILDREN) != 0) + return (EINVAL); + reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p; + ksiginfo_init(&ksi); + ksi.ksi_signo = rk->rk_sig; + ksi.ksi_code = SI_USER; + ksi.ksi_pid = td->td_proc->p_pid; + ksi.ksi_uid = td->td_ucred->cr_ruid; + error = ESRCH; + rk->rk_killed = 0; + rk->rk_fpid = -1; + for (p2 = (rk->rk_flags & REAPER_KILL_CHILDREN) != 0 ? + LIST_FIRST(&reap->p_children) : LIST_FIRST(&reap->p_reaplist); + p2 != NULL; + p2 = (rk->rk_flags & REAPER_KILL_CHILDREN) != 0 ? + LIST_NEXT(p2, p_sibling) : LIST_NEXT(p2, p_reapsibling)) { + if ((rk->rk_flags & REAPER_KILL_SUBTREE) != 0 && + p2->p_reapsubtree != rk->rk_subtree) + continue; + PROC_LOCK(p2); + error1 = p_cansignal(td, p2, rk->rk_sig); + if (error1 == 0) { + pksignal(p2, rk->rk_sig, &ksi); + rk->rk_killed++; + error = error1; + } else if (error == ESRCH) { + error = error1; + rk->rk_fpid = p2->p_pid; + } + PROC_UNLOCK(p2); + /* Do not end the loop on error, signal everything we can. */ + } + PROC_LOCK(p); + return (error); +} + +#ifndef _SYS_SYSPROTO_H_ +struct procctl_args { + idtype_t idtype; + id_t id; + int com; + void *data; +}; +#endif +/* ARGSUSED */ +int +sys_procctl(struct thread *td, struct procctl_args *uap) +{ + void *data; + union { + struct procctl_reaper_status rs; + struct procctl_reaper_pids rp; + struct procctl_reaper_kill rk; + } x; + int error, error1, flags; + + switch (uap->com) { + case PROC_SPROTECT: + error = copyin(uap->data, &flags, sizeof(flags)); + if (error != 0) + return (error); + data = &flags; + break; + case PROC_REAP_ACQUIRE: + case PROC_REAP_RELEASE: + if (uap->data != NULL) + return (EINVAL); + data = NULL; + break; + case PROC_REAP_STATUS: + data = &x.rs; + break; + case PROC_REAP_GETPIDS: + error = copyin(uap->data, &x.rp, sizeof(x.rp)); + if (error != 0) + return (error); + data = &x.rp; + break; + case PROC_REAP_KILL: + error = copyin(uap->data, &x.rk, sizeof(x.rk)); + if (error != 0) + return (error); + data = &x.rk; + break; + default: + return (EINVAL); + } + error = kern_procctl(td, uap->idtype, uap->id, uap->com, data); + switch (uap->com) { + case PROC_REAP_STATUS: + if (error == 0) + error = copyout(&x.rs, uap->data, sizeof(x.rs)); + case PROC_REAP_KILL: + error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); + if (error == 0) + error = error1; + break; + } + return (error); +} + +static int +kern_procctl_single(struct thread *td, struct proc *p, int com, void *data) +{ + + PROC_LOCK_ASSERT(p, MA_OWNED); + switch (com) { + case PROC_SPROTECT: + return (protect_set(td, p, *(int *)data)); + case PROC_REAP_ACQUIRE: + return (reap_acquire(td, p)); + case PROC_REAP_RELEASE: + return (reap_release(td, p)); + case PROC_REAP_STATUS: + return (reap_status(td, p, data)); + case PROC_REAP_GETPIDS: + return (reap_getpids(td, p, data)); + case PROC_REAP_KILL: + return (reap_kill(td, p, data)); + default: + return (EINVAL); + } +} + +int +kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) +{ + struct pgrp *pg; + struct proc *p; + int error, first_error, ok; + + switch (com) { + case PROC_REAP_ACQUIRE: + case PROC_REAP_RELEASE: + case PROC_REAP_STATUS: + case PROC_REAP_GETPIDS: + case PROC_REAP_KILL: + if (idtype != P_PID) + return (EINVAL); + } + + switch (com) { + case PROC_SPROTECT: + case PROC_REAP_STATUS: + case PROC_REAP_GETPIDS: + case PROC_REAP_KILL: + sx_slock(&proctree_lock); + break; + case PROC_REAP_ACQUIRE: + case PROC_REAP_RELEASE: + sx_xlock(&proctree_lock); + break; + default: + return (EINVAL); + } + + switch (idtype) { + case P_PID: + p = pfind(id); + if (p == NULL) { + error = ESRCH; + break; + } + error = p_cansee(td, p); + if (error == 0) + error = kern_procctl_single(td, p, com, data); + PROC_UNLOCK(p); + break; + case P_PGID: + /* + * Attempt to apply the operation to all members of the + * group. Ignore processes in the group that can't be + * seen. Ignore errors so long as at least one process is + * able to complete the request successfully. + */ + pg = pgfind(id); + if (pg == NULL) { + error = ESRCH; + break; + } + PGRP_UNLOCK(pg); + ok = 0; + first_error = 0; + LIST_FOREACH(p, &pg->pg_members, p_pglist) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) { + PROC_UNLOCK(p); + continue; + } + error = kern_procctl_single(td, p, com, data); + PROC_UNLOCK(p); + if (error == 0) + ok = 1; + else if (first_error == 0) + first_error = error; + } + if (ok) + error = 0; + else if (first_error != 0) + error = first_error; + else + /* + * Was not able to see any processes in the + * process group. + */ + error = ESRCH; + break; + default: + error = EINVAL; + break; + } + sx_unlock(&proctree_lock); + return (error); +} diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 3105d94d2998..7dd3d1754361 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -43,7 +43,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -1234,193 +1233,3 @@ stopevent(struct proc *p, unsigned int event, unsigned int val) msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0); } while (p->p_step); } - -static int -protect_setchild(struct thread *td, struct proc *p, int flags) -{ - - PROC_LOCK_ASSERT(p, MA_OWNED); - if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0) - return (0); - if (flags & PPROT_SET) { - p->p_flag |= P_PROTECTED; - if (flags & PPROT_INHERIT) - p->p_flag2 |= P2_INHERIT_PROTECTED; - } else { - p->p_flag &= ~P_PROTECTED; - p->p_flag2 &= ~P2_INHERIT_PROTECTED; - } - return (1); -} - -static int -protect_setchildren(struct thread *td, struct proc *top, int flags) -{ - struct proc *p; - int ret; - - p = top; - ret = 0; - sx_assert(&proctree_lock, SX_LOCKED); - for (;;) { - ret |= protect_setchild(td, p, flags); - PROC_UNLOCK(p); - /* - * If this process has children, descend to them next, - * otherwise do any siblings, and if done with this level, - * follow back up the tree (but not past top). - */ - if (!LIST_EMPTY(&p->p_children)) - p = LIST_FIRST(&p->p_children); - else for (;;) { - if (p == top) { - PROC_LOCK(p); - return (ret); - } - if (LIST_NEXT(p, p_sibling)) { - p = LIST_NEXT(p, p_sibling); - break; - } - p = p->p_pptr; - } - PROC_LOCK(p); - } -} - -static int -protect_set(struct thread *td, struct proc *p, int flags) -{ - int error, ret; - - switch (PPROT_OP(flags)) { - case PPROT_SET: - case PPROT_CLEAR: - break; - default: - return (EINVAL); - } - - if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0) - return (EINVAL); - - error = priv_check(td, PRIV_VM_MADV_PROTECT); - if (error) - return (error); - - if (flags & PPROT_DESCEND) - ret = protect_setchildren(td, p, flags); - else - ret = protect_setchild(td, p, flags); - if (ret == 0) - return (EPERM); - return (0); -} - -#ifndef _SYS_SYSPROTO_H_ -struct procctl_args { - idtype_t idtype; - id_t id; - int com; - void *data; -}; -#endif -/* ARGSUSED */ -int -sys_procctl(struct thread *td, struct procctl_args *uap) -{ - int error, flags; - void *data; - - switch (uap->com) { - case PROC_SPROTECT: - error = copyin(uap->data, &flags, sizeof(flags)); - if (error) - return (error); - data = &flags; - break; - default: - return (EINVAL); - } - - return (kern_procctl(td, uap->idtype, uap->id, uap->com, data)); -} - -static int -kern_procctl_single(struct thread *td, struct proc *p, int com, void *data) -{ - - PROC_LOCK_ASSERT(p, MA_OWNED); - switch (com) { - case PROC_SPROTECT: - return (protect_set(td, p, *(int *)data)); - default: - return (EINVAL); - } -} - -int -kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data) -{ - struct pgrp *pg; - struct proc *p; - int error, first_error, ok; - - sx_slock(&proctree_lock); - switch (idtype) { - case P_PID: - p = pfind(id); - if (p == NULL) { - error = ESRCH; - break; - } - error = p_cansee(td, p); - if (error == 0) - error = kern_procctl_single(td, p, com, data); - PROC_UNLOCK(p); - break; - case P_PGID: - /* - * Attempt to apply the operation to all members of the - * group. Ignore processes in the group that can't be - * seen. Ignore errors so long as at least one process is - * able to complete the request successfully. - */ - pg = pgfind(id); - if (pg == NULL) { - error = ESRCH; - break; - } - PGRP_UNLOCK(pg); - ok = 0; - first_error = 0; - LIST_FOREACH(p, &pg->pg_members, p_pglist) { - PROC_LOCK(p); - if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) { - PROC_UNLOCK(p); - continue; - } - error = kern_procctl_single(td, p, com, data); - PROC_UNLOCK(p); - if (error == 0) - ok = 1; - else if (first_error == 0) - first_error = error; - } - if (ok) - error = 0; - else if (first_error != 0) - error = first_error; - else - /* - * Was not able to see any processes in the - * process group. - */ - error = ESRCH; - break; - default: - error = EINVAL; - break; - } - sx_sunlock(&proctree_lock); - return (error); -} diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 6590394f234d..d7a45e972867 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -513,6 +513,11 @@ struct proc { struct proc *p_pptr; /* (c + e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ + struct proc *p_reaper; /* (e) My reaper. */ + LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants + (if I am reaper). */ + LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of + the same reaper. */ struct mtx p_mtx; /* (n) Lock for this struct. */ struct mtx p_statmtx; /* Lock for the stats */ struct mtx p_itimmtx; /* Lock for the virt/prof timers */ @@ -570,6 +575,9 @@ struct proc { rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c) Process "nice" value. */ int p_fibnum; /* in this routing domain XXX MRT */ + pid_t p_reapsubtree; /* (e) Pid of the direct child of the + reaper which spawned + our subtree. */ /* End area that is copied on creation. */ #define p_endcopy p_xstat @@ -671,6 +679,7 @@ struct proc { #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ #define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan list */ +#define P_TREE_REAPER 0x00000004 /* Reaper of subtree */ /* * These were process status values (p_stat), now they are only used in @@ -920,6 +929,7 @@ void proc_reparent(struct proc *child, struct proc *newparent); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); void pstats_free(struct pstats *ps); +void reaper_abandon_children(struct proc *p, bool exiting); int securelevel_ge(struct ucred *cr, int level); int securelevel_gt(struct ucred *cr, int level); void sess_hold(struct session *); diff --git a/sys/sys/procctl.h b/sys/sys/procctl.h index ff577c06be84..d11b2b29600a 100644 --- a/sys/sys/procctl.h +++ b/sys/sys/procctl.h @@ -30,7 +30,17 @@ #ifndef _SYS_PROCCTL_H_ #define _SYS_PROCCTL_H_ +#ifndef _KERNEL +#include +#include +#endif + #define PROC_SPROTECT 1 /* set protected state */ +#define PROC_REAP_ACQUIRE 2 /* reaping enable */ +#define PROC_REAP_RELEASE 3 /* reaping disable */ +#define PROC_REAP_STATUS 4 /* reaping status */ +#define PROC_REAP_GETPIDS 5 /* get descendants */ +#define PROC_REAP_KILL 6 /* kill descendants */ /* Operations for PROC_SPROTECT (passed in integer arg). */ #define PPROT_OP(x) ((x) & 0xf) @@ -42,10 +52,51 @@ #define PPROT_DESCEND 0x10 #define PPROT_INHERIT 0x20 -#ifndef _KERNEL -#include -#include +/* Result of PREAP_STATUS (returned by value). */ +struct procctl_reaper_status { + u_int rs_flags; + u_int rs_children; + u_int rs_descendants; + pid_t rs_reaper; + pid_t rs_pid; + u_int rs_pad0[15]; +}; +/* struct procctl_reaper_status rs_flags */ +#define REAPER_STATUS_OWNED 0x00000001 +#define REAPER_STATUS_REALINIT 0x00000002 + +struct procctl_reaper_pidinfo { + pid_t pi_pid; + pid_t pi_subtree; + u_int pi_flags; + u_int pi_pad0[15]; +}; + +#define REAPER_PIDINFO_VALID 0x00000001 +#define REAPER_PIDINFO_CHILD 0x00000002 + +struct procctl_reaper_pids { + u_int rp_count; + u_int rp_pad0[15]; + struct procctl_reaper_pidinfo *rp_pids; +}; + +struct procctl_reaper_kill { + int rk_sig; /* in - signal to send */ + u_int rk_flags; /* in - REAPER_KILL flags */ + pid_t rk_subtree; /* in - subtree, if REAPER_KILL_SUBTREE */ + u_int rk_killed; /* out - count of processes sucessfully + killed */ + pid_t rk_fpid; /* out - first failed pid for which error + is returned */ + u_int rk_pad0[15]; +}; + +#define REAPER_KILL_CHILDREN 0x00000001 +#define REAPER_KILL_SUBTREE 0x00000002 + +#ifndef _KERNEL __BEGIN_DECLS int procctl(idtype_t, id_t, int, void *); __END_DECLS