kqueue EVFILT_PROC: avoid collision between NOTE_CHILD and NOTE_EXIT

NOTE_CHILD and NOTE_EXIT return something in kevent.data: the parent
pid (ppid) for NOTE_CHILD and the exit status for NOTE_EXIT.
Do not let the two events be combined, since one would overwrite
the other's data.

PR:		180385
Submitted by:	David A. Bright <david_a_bright@dell.com>
Reviewed by:	jhb
MFC after:	1 month
Sponsored by:	Dell Inc.
Differential Revision:	https://reviews.freebsd.org/D4900
This commit is contained in:
vangyzen 2016-01-28 20:24:15 +00:00
parent f91c9c2798
commit f8f3d27673
5 changed files with 281 additions and 25 deletions

View File

@ -373,11 +373,21 @@ filt_procattach(struct knote *kn)
kn->kn_flags |= EV_CLEAR; /* automatically set */
/*
* internal flag indicating registration done by kernel
* Internal flag indicating registration done by kernel for the
* purposes of getting a NOTE_CHILD notification.
*/
if (kn->kn_flags & EV_FLAG1) {
if (kn->kn_flags & EV_FLAG2) {
kn->kn_flags &= ~EV_FLAG2;
kn->kn_data = kn->kn_sdata; /* ppid */
kn->kn_fflags = NOTE_CHILD;
kn->kn_sfflags &= ~NOTE_EXIT;
immediate = 1; /* Force immediate activation of child note. */
}
/*
* Internal flag indicating registration done by kernel (for other than
* NOTE_CHILD).
*/
if (kn->kn_flags & EV_FLAG1) {
kn->kn_flags &= ~EV_FLAG1;
}
@ -385,9 +395,10 @@ filt_procattach(struct knote *kn)
knlist_add(&p->p_klist, kn, 1);
/*
* Immediately activate any exit notes if the target process is a
* zombie. This is necessary to handle the case where the target
* process, e.g. a child, dies before the kevent is registered.
* Immediately activate any child notes or, in the case of a zombie
* target process, exit notes. The latter is necessary to handle the
* case where the target process, e.g. a child, dies before the kevent
* is registered.
*/
if (immediate && filt_proc(kn, NOTE_EXIT))
KNOTE_ACTIVATE(kn, 0);
@ -495,7 +506,7 @@ knote_fork(struct knlist *list, int pid)
/*
* The NOTE_TRACK case. In addition to the activation
* of the event, we need to register new event to
* of the event, we need to register new events to
* track the child. Drop the locks in preparation for
* the call to kqueue_register().
*/
@ -504,8 +515,27 @@ knote_fork(struct knlist *list, int pid)
list->kl_unlock(list->kl_lockarg);
/*
* Activate existing knote and register a knote with
* Activate existing knote and register tracking knotes with
* new process.
*
* First register a knote to get just the child notice. This
* must be a separate note from a potential NOTE_EXIT
* notification since both NOTE_CHILD and NOTE_EXIT are defined
* to use the data field (in conflicting ways).
*/
kev.ident = pid;
kev.filter = kn->kn_filter;
kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2;
kev.fflags = kn->kn_sfflags;
kev.data = kn->kn_id; /* parent */
kev.udata = kn->kn_kevent.udata;/* preserve udata */
error = kqueue_register(kq, &kev, NULL, 0);
if (error)
kn->kn_fflags |= NOTE_TRACKERR;
/*
* Then register another knote to track other potential events
* from the new process.
*/
kev.ident = pid;
kev.filter = kn->kn_filter;
@ -1129,7 +1159,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa
if (fp->f_type == DTYPE_KQUEUE) {
/*
* if we add some inteligence about what we are doing,
* If we add some intelligence about what we are doing,
* we should be able to support events on ourselves.
* We need to know when we are doing this to prevent
* getting both the knlist lock and the kq lock since
@ -1161,7 +1191,18 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa
kqueue_expand(kq, fops, kev->ident, waitok);
KQ_LOCK(kq);
if (kq->kq_knhashmask != 0) {
/*
* If possible, find an existing knote to use for this kevent.
*/
if (kev->filter == EVFILT_PROC &&
(kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
/* This is an internal creation of a process tracking
* note. Don't attempt to coalesce this with an
* existing note.
*/
;
} else if (kq->kq_knhashmask != 0) {
struct klist *list;
list = &kq->kq_knhash[
@ -1173,7 +1214,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa
}
}
/* knote is in the process of changing, wait for it to stablize. */
/* knote is in the process of changing, wait for it to stabilize. */
if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
if (filedesc_unlock) {

View File

@ -80,6 +80,7 @@ struct kevent {
#define EV_SYSFLAGS 0xF000 /* reserved by system */
#define EV_DROP 0x1000 /* note should be dropped */
#define EV_FLAG1 0x2000 /* filter-specific flag */
#define EV_FLAG2 0x4000 /* filter-specific flag */
/* returned values */
#define EV_EOF 0x8000 /* EOF detected */

View File

@ -46,6 +46,7 @@ int vnode_fd;
extern const char * kevent_to_str(struct kevent *);
struct kevent * kevent_get(int);
struct kevent * kevent_get_timeout(int, int);
void kevent_cmp(struct kevent *, struct kevent *);

View File

@ -69,6 +69,28 @@ kevent_get(int kqfd)
return (kev);
}
/* Retrieve a single kevent, specifying a maximum time to wait for it. */
struct kevent *
kevent_get_timeout(int kqfd, int seconds)
{
int nfds;
struct kevent *kev;
struct timespec timeout = {seconds, 0};
if ((kev = calloc(1, sizeof(*kev))) == NULL)
err(1, "out of memory");
nfds = kevent(kqfd, NULL, 0, kev, 1, &timeout);
if (nfds < 0) {
err(1, "kevent(2)");
} else if (nfds == 0) {
free(kev);
kev = NULL;
}
return (kev);
}
char *
kevent_fflags_dump(struct kevent *kev)
{
@ -82,25 +104,39 @@ kevent_fflags_dump(struct kevent *kev)
abort();
/* Not every filter has meaningful fflags */
if (kev->filter != EVFILT_VNODE) {
snprintf(buf, 1024, "fflags = %d", kev->fflags);
return (buf);
}
snprintf(buf, 1024, "fflags = %d (", kev->fflags);
KEVFFL_DUMP(NOTE_DELETE);
KEVFFL_DUMP(NOTE_WRITE);
KEVFFL_DUMP(NOTE_EXTEND);
if (kev->filter == EVFILT_PROC) {
snprintf(buf, 1024, "fflags = %x (", kev->fflags);
KEVFFL_DUMP(NOTE_EXIT);
KEVFFL_DUMP(NOTE_FORK);
KEVFFL_DUMP(NOTE_EXEC);
KEVFFL_DUMP(NOTE_CHILD);
KEVFFL_DUMP(NOTE_TRACKERR);
KEVFFL_DUMP(NOTE_TRACK);
buf[strlen(buf) - 1] = ')';
} else if (kev->filter == EVFILT_PROCDESC) {
snprintf(buf, 1024, "fflags = %x (", kev->fflags);
KEVFFL_DUMP(NOTE_EXIT);
KEVFFL_DUMP(NOTE_FORK);
KEVFFL_DUMP(NOTE_EXEC);
buf[strlen(buf) - 1] = ')';
} else if (kev->filter == EVFILT_VNODE) {
snprintf(buf, 1024, "fflags = %x (", kev->fflags);
KEVFFL_DUMP(NOTE_DELETE);
KEVFFL_DUMP(NOTE_WRITE);
KEVFFL_DUMP(NOTE_EXTEND);
#if HAVE_NOTE_TRUNCATE
KEVFFL_DUMP(NOTE_TRUNCATE);
KEVFFL_DUMP(NOTE_TRUNCATE);
#endif
KEVFFL_DUMP(NOTE_ATTRIB);
KEVFFL_DUMP(NOTE_LINK);
KEVFFL_DUMP(NOTE_RENAME);
KEVFFL_DUMP(NOTE_ATTRIB);
KEVFFL_DUMP(NOTE_LINK);
KEVFFL_DUMP(NOTE_RENAME);
#if HAVE_NOTE_REVOKE
KEVFFL_DUMP(NOTE_REVOKE);
KEVFFL_DUMP(NOTE_REVOKE);
#endif
buf[strlen(buf) - 1] = ')';
buf[strlen(buf) - 1] = ')';
} else {
snprintf(buf, 1024, "fflags = %x", kev->fflags);
}
return (buf);
}
@ -260,6 +296,15 @@ main(int argc, char **argv)
argc--;
}
/*
* Some tests fork. If output is fully buffered,
* the children inherit some buffered data and flush
* it when they exit, causing some data to be printed twice.
* Use line buffering to avoid this problem.
*/
setlinebuf(stdout);
setlinebuf(stderr);
test_kqueue();
test_kqueue_close();

View File

@ -74,6 +74,172 @@ add_and_delete(void)
}
static void
proc_track(int sleep_time)
{
char test_id[64];
struct kevent kev;
pid_t pid;
int pipe_fd[2];
ssize_t result;
snprintf(test_id, sizeof(test_id),
"kevent(EVFILT_PROC, NOTE_TRACK); sleep %d", sleep_time);
test_begin(test_id);
test_no_kevents();
if (pipe(pipe_fd)) {
err(1, "pipe (parent) failed! (%s() at %s:%d)",
__func__, __FILE__, __LINE__);
}
/* Create a child to track. */
pid = fork();
if (pid == 0) { /* Child */
pid_t grandchild = -1;
/*
* Give the parent a chance to start tracking us.
*/
result = read(pipe_fd[1], test_id, 1);
if (result != 1) {
err(1, "read from pipe in child failed! (ret %zd) (%s() at %s:%d)",
result, __func__, __FILE__, __LINE__);
}
/*
* Spawn a grandchild that will immediately exit. If the kernel has bug
* 180385, the parent will see a kevent with both NOTE_CHILD and
* NOTE_EXIT. If that bug is fixed, it will see two separate kevents
* for those notes. Note that this triggers the conditions for
* detecting the bug quite reliably on a 1 CPU system (or if the test
* process is restricted to a single CPU), but may not trigger it on a
* multi-CPU system.
*/
grandchild = fork();
if (grandchild == 0) { /* Grandchild */
if (sleep_time) sleep(sleep_time);
exit(1);
} else if (grandchild == -1) { /* Error */
err(1, "fork (grandchild) failed! (%s() at %s:%d)",
__func__, __FILE__, __LINE__);
} else { /* Child (Grandchild Parent) */
printf(" -- grandchild created (pid %d)\n", (int) grandchild);
}
if (sleep_time) sleep(sleep_time);
exit(0);
} else if (pid == -1) { /* Error */
err(1, "fork (child) failed! (%s() at %s:%d)",
__func__, __FILE__, __LINE__);
}
printf(" -- child created (pid %d)\n", (int) pid);
kevent_add(kqfd, &kev, pid, EVFILT_PROC, EV_ADD | EV_ENABLE,
NOTE_TRACK | NOTE_EXEC | NOTE_EXIT | NOTE_FORK,
0, NULL);
printf(" -- tracking child (pid %d)\n", (int) pid);
/* Now that we're tracking the child, tell it to proceed. */
result = write(pipe_fd[0], test_id, 1);
if (result != 1) {
err(1, "write to pipe in parent failed! (ret %zd) (%s() at %s:%d)",
result, __func__, __FILE__, __LINE__);
}
/*
* Several events should be received:
* - NOTE_FORK (from child)
* - NOTE_CHILD (from grandchild)
* - NOTE_EXIT (from grandchild)
* - NOTE_EXIT (from child)
*
* The NOTE_FORK and NOTE_EXIT from the child could be combined into a
* single event, but the NOTE_CHILD and NOTE_EXIT from the grandchild must
* not be combined.
*
* The loop continues until no events are received within a 5 second
* period, at which point it is assumed that no more will be coming. The
* loop is deliberately designed to attempt to get events even after all
* the expected ones are received in case some spurious events are
* generated as well as the expected ones.
*/
{
int child_exit = 0;
int child_fork = 0;
int gchild_exit = 0;
int gchild_note = 0;
pid_t gchild_pid = -1;
int done = 0;
while (!done)
{
int handled = 0;
struct kevent *kevp;
kevp = kevent_get_timeout(kqfd, 5);
if (kevp == NULL) {
done = 1;
} else {
printf(" -- Received kevent: %s\n", kevent_to_str(kevp));
if ((kevp->fflags & NOTE_CHILD) && (kevp->fflags & NOTE_EXIT)) {
errx(1, "NOTE_CHILD and NOTE_EXIT in same kevent: %s", kevent_to_str(kevp));
}
if (kevp->fflags & NOTE_CHILD) {
if (kevp->data == pid) {
if (!gchild_note) {
++gchild_note;
gchild_pid = kevp->ident;
++handled;
} else {
errx(1, "Spurious NOTE_CHILD: %s", kevent_to_str(kevp));
}
}
}
if (kevp->fflags & NOTE_EXIT) {
if ((kevp->ident == pid) && (!child_exit)) {
++child_exit;
++handled;
} else if ((kevp->ident == gchild_pid) && (!gchild_exit)) {
++gchild_exit;
++handled;
} else {
errx(1, "Spurious NOTE_EXIT: %s", kevent_to_str(kevp));
}
}
if (kevp->fflags & NOTE_FORK) {
if ((kevp->ident == pid) && (!child_fork)) {
++child_fork;
++handled;
} else {
errx(1, "Spurious NOTE_FORK: %s", kevent_to_str(kevp));
}
}
if (!handled) {
errx(1, "Spurious kevent: %s", kevent_to_str(kevp));
}
free(kevp);
}
}
/* Make sure all expected events were received. */
if (child_exit && child_fork && gchild_exit && gchild_note) {
printf(" -- Received all expected events.\n");
} else {
errx(1, "Did not receive all expected events.");
}
}
success();
}
#ifdef TODO
static void
event_trigger(void)
@ -236,6 +402,8 @@ test_evfilt_proc()
signal(SIGUSR1, sig_handler);
add_and_delete();
proc_track(0); /* Run without sleeping before children exit. */
proc_track(1); /* Sleep a bit in the children before exiting. */
#if TODO
event_trigger();