daemon: use kqueue for all events

Refactor daemon to use kqueue/kevent instead of signals.

This changes allows to simplify the code in several ways:
- the execution flow is now linear, no async events.
- several variables became redundant and got removed.
- all event handling is now concentrated inside of the event loop, which
  makes code reading and comprehension easier.
- new kqueuex(2) call is used for CLOEXEC, but maintained closing the
  kq fd prior to execve() to ease later MFC

No UX/API changes are intended.

Reviewed by:	kevans
Pull Request:	https://github.com/freebsd/freebsd-src/pull/701
This commit is contained in:
Ihor Antonov 2023-04-14 00:10:29 -05:00 committed by Kyle Evans
parent ec671f4980
commit 8935a39932

View File

@ -34,6 +34,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/wait.h>
@ -59,11 +60,14 @@ __FBSDID("$FreeBSD$");
#define LBUF_SIZE 4096
enum daemon_mode {
MODE_DAEMON = 0, /* simply daemonize, no supervision */
MODE_SUPERVISE, /* initial supervision state */
MODE_TERMINATING, /* user requested termination */
MODE_NOCHILD, /* child is terminated, final state of the event loop */
};
struct daemon_state {
sigset_t mask_orig;
sigset_t mask_read;
sigset_t mask_term;
sigset_t mask_susp;
int pipe_fd[2];
char **argv;
const char *child_pidfile;
@ -74,6 +78,8 @@ struct daemon_state {
const char *user;
struct pidfh *parent_pidfh;
struct pidfh *child_pidfh;
enum daemon_mode mode;
int pid;
int keep_cur_workdir;
int restart_delay;
int stdmask;
@ -81,33 +87,25 @@ struct daemon_state {
int syslog_facility;
int keep_fds_open;
int output_fd;
bool supervision_enabled;
bool child_eof;
bool restart_enabled;
bool syslog_enabled;
bool log_reopen;
};
static void setup_signals(struct daemon_state *);
static void restrict_process(const char *);
static void handle_term(int);
static void handle_chld(int);
static void handle_hup(int);
static int open_log(const char *);
static void reopen_log(struct daemon_state *);
static bool listen_child(int, struct daemon_state *);
static int get_log_mapping(const char *, const CODE *);
static void open_pid_files(struct daemon_state *);
static void do_output(const unsigned char *, size_t, struct daemon_state *);
static void daemon_sleep(time_t, long);
static void daemon_sleep(struct daemon_state *);
static void daemon_state_init(struct daemon_state *);
static void daemon_eventloop(struct daemon_state *);
static void daemon_terminate(struct daemon_state *);
static volatile sig_atomic_t terminate = 0;
static volatile sig_atomic_t child_gone = 0;
static volatile sig_atomic_t pid = 0;
static volatile sig_atomic_t do_log_reopen = 0;
static void daemon_exec(struct daemon_state *);
static bool daemon_is_child_dead(struct daemon_state *);
static void daemon_set_child_pipe(struct daemon_state *);
static const char shortopts[] = "+cfHSp:P:ru:o:s:l:t:m:R:T:h";
@ -172,6 +170,10 @@ main(int argc, char *argv[])
daemon_state_init(&state);
/* Signals are processed via kqueue */
signal(SIGHUP, SIG_IGN);
signal(SIGTERM, SIG_IGN);
/*
* Supervision mode is enabled if one of the following options are used:
* --child-pidfile -p
@ -207,7 +209,7 @@ main(int argc, char *argv[])
errx(5, "unrecognized syslog facility");
}
state.syslog_enabled = true;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'm':
state.stdmask = strtol(optarg, &p, 10);
@ -223,19 +225,19 @@ main(int argc, char *argv[])
* daemon could open the specified file and set it's
* descriptor as both stderr and stout before execve()
*/
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'p':
state.child_pidfile = optarg;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'P':
state.parent_pidfile = optarg;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'r':
state.restart_enabled = true;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'R':
state.restart_enabled = true;
@ -251,11 +253,11 @@ main(int argc, char *argv[])
errx(4, "unrecognized syslog priority");
}
state.syslog_enabled = true;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'S':
state.syslog_enabled = true;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 't':
state.title = optarg;
@ -263,7 +265,7 @@ main(int argc, char *argv[])
case 'T':
state.syslog_tag = optarg;
state.syslog_enabled = true;
state.supervision_enabled = true;
state.mode = MODE_SUPERVISE;
break;
case 'u':
state.user = optarg;
@ -304,246 +306,212 @@ main(int argc, char *argv[])
* to be able to report the error intelligently
*/
open_pid_files(&state);
/*
* TODO: add feature to avoid backgrounding
* i.e. --foreground, -f
*/
if (daemon(state.keep_cur_workdir, state.keep_fds_open) == -1) {
warn("daemon");
daemon_terminate(&state);
}
if (state.mode == MODE_DAEMON) {
daemon_exec(&state);
}
/* Write out parent pidfile if needed. */
pidfile_write(state.parent_pidfh);
if (state.supervision_enabled) {
/* Block SIGTERM to avoid racing until the child is spawned. */
if (sigprocmask(SIG_BLOCK, &state.mask_term, &state.mask_orig)) {
warn("sigprocmask");
daemon_terminate(&state);
}
setup_signals(&state);
/*
* Try to protect against pageout kill. Ignore the
* error, madvise(2) will fail only if a process does
* not have superuser privileges.
*/
(void)madvise(NULL, 0, MADV_PROTECT);
}
do {
state.mode = MODE_SUPERVISE;
daemon_eventloop(&state);
close(state.pipe_fd[0]);
state.pipe_fd[0] = -1;
} while (state.restart_enabled && !terminate);
daemon_sleep(&state);
} while (state.restart_enabled);
daemon_terminate(&state);
}
static void
daemon_exec(struct daemon_state *state)
{
pidfile_write(state->child_pidfh);
/*
* Main event loop: fork the child and watch for events.
* In legacy mode simply execve into the target process.
*
* Signal handling logic:
*
* - SIGTERM is masked while there is no child.
*
* - SIGCHLD is masked while reading from the pipe. SIGTERM has to be
* caught, to avoid indefinite blocking on read().
*
* - Both SIGCHLD and SIGTERM are masked before calling sigsuspend()
* to avoid racing.
*
* - After SIGTERM is recieved and propagated to the child there are
* several options on what to do next:
* - read until EOF
* - read until EOF but only for a while
* - bail immediately
* Currently the third option is used, because otherwise there is no
* guarantee that read() won't block indefinitely if the child refuses
* to depart. To handle the second option, a different approach
* would be needed (procctl()?).
*
* - Child's exit might be detected by receiveing EOF from the pipe.
* But the child might have closed its stdout and stderr, so deamon
* must wait for the SIGCHLD to ensure that the child is actually gone.
if (state->user != NULL) {
restrict_process(state->user);
}
/* Ignored signals remain ignored after execve, unignore them */
signal(SIGHUP, SIG_DFL);
signal(SIGTERM, SIG_DFL);
execvp(state->argv[0], state->argv);
/* execvp() failed - report error and exit this process */
err(1, "%s", state->argv[0]);
}
/* Main event loop: fork the child and watch for events.
* After SIGTERM is recieved and propagated to the child there are
* several options on what to do next:
* - read until EOF
* - read until EOF but only for a while
* - bail immediately
* Currently the third option is used, because otherwise there is no
* guarantee that read() won't block indefinitely if the child refuses
* to depart. To handle the second option, a different approach
* would be needed (procctl()?).
*/
static void
daemon_eventloop(struct daemon_state *state)
{
if (state->supervision_enabled) {
if (pipe(state->pipe_fd)) {
err(1, "pipe");
}
/*
* Spawn a child to exec the command.
*/
child_gone = 0;
pid = fork();
}
/* fork failed, this can only happen when supervision is enabled */
if (pid == -1) {
warn("fork");
daemon_terminate(state);
}
/* fork succeeded, this is child's branch or supervision is disabled */
if (pid == 0) {
pidfile_write(state->child_pidfh);
if (state->user != NULL) {
restrict_process(state->user);
}
/*
* In supervision mode, the child gets the original sigmask,
* and dup'd pipes.
*/
if (state->supervision_enabled) {
close(state->pipe_fd[0]);
if (sigprocmask(SIG_SETMASK, &state->mask_orig, NULL)) {
err(1, "sigprogmask");
}
if (state->stdmask & STDERR_FILENO) {
if (dup2(state->pipe_fd[1], STDERR_FILENO) == -1) {
err(1, "dup2");
}
}
if (state->stdmask & STDOUT_FILENO) {
if (dup2(state->pipe_fd[1], STDOUT_FILENO) == -1) {
err(1, "dup2");
}
}
if (state->pipe_fd[1] != STDERR_FILENO &&
state->pipe_fd[1] != STDOUT_FILENO) {
close(state->pipe_fd[1]);
}
}
execvp(state->argv[0], state->argv);
/* execvp() failed - report error and exit this process */
err(1, "%s", state->argv[0]);
}
struct kevent event;
int kq;
int ret;
/*
* else: pid > 0
* fork succeeded, this is the parent branch, this can only happen when
* supervision is enabled.
*
* Unblock SIGTERM - now there is a valid child PID to signal to.
* Try to protect against pageout kill. Ignore the
* error, madvise(2) will fail only if a process does
* not have superuser privileges.
*/
if (sigprocmask(SIG_UNBLOCK, &state->mask_term, NULL)) {
warn("sigprocmask");
daemon_terminate(state);
(void)madvise(NULL, 0, MADV_PROTECT);
if (pipe(state->pipe_fd)) {
err(1, "pipe");
}
kq = kqueuex(KQUEUE_CLOEXEC);
EV_SET(&event, state->pipe_fd[0], EVFILT_READ, EV_ADD|EV_CLEAR, 0, 0,
NULL);
if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) {
err(EXIT_FAILURE, "failed to register kevent");
}
EV_SET(&event, SIGHUP, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) {
err(EXIT_FAILURE, "failed to register kevent");
}
EV_SET(&event, SIGTERM, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) {
err(EXIT_FAILURE, "failed to register kevent");
}
EV_SET(&event, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) {
err(EXIT_FAILURE, "failed to register kevent");
}
memset(&event, 0, sizeof(struct kevent));
/* Spawn a child to exec the command. */
state->pid = fork();
/* fork failed, this can only happen when supervision is enabled */
switch (state->pid) {
case -1:
warn("fork");
state->mode = MODE_NOCHILD;
return;
/* fork succeeded, this is child's branch */
case 0:
close(kq);
daemon_set_child_pipe(state);
daemon_exec(state);
break;
}
/* case: pid > 0; fork succeeded */
close(state->pipe_fd[1]);
state->pipe_fd[1] = -1;
setproctitle("%s[%d]", state->title, (int)state->pid);
setproctitle("%s[%d]", state->title, (int)pid);
for (;;) {
if (child_gone && state->child_eof) {
break;
}
if (terminate) {
daemon_terminate(state);
}
if (state->child_eof) {
if (sigprocmask(SIG_BLOCK, &state->mask_susp, NULL)) {
warn("sigprocmask");
daemon_terminate(state);
}
while (!terminate && !child_gone) {
sigsuspend(&state->mask_orig);
}
if (sigprocmask(SIG_UNBLOCK, &state->mask_susp, NULL)) {
warn("sigprocmask");
daemon_terminate(state);
}
while (state->mode != MODE_NOCHILD) {
ret = kevent(kq, NULL, 0, &event, 1, NULL);
switch (ret) {
case -1:
err(EXIT_FAILURE, "kevent wait");
case 0:
continue;
}
if (sigprocmask(SIG_BLOCK, &state->mask_read, NULL)) {
warn("sigprocmask");
daemon_terminate(state);
if (event.flags & EV_ERROR) {
errx(EXIT_FAILURE, "Event error: %s",
strerror(event.data));
}
state->child_eof = !listen_child(state->pipe_fd[0], state);
switch (event.filter) {
case EVFILT_SIGNAL:
if (sigprocmask(SIG_UNBLOCK, &state->mask_read, NULL)) {
warn("sigprocmask");
daemon_terminate(state);
switch (event.ident) {
case SIGCHLD:
if (daemon_is_child_dead(state)) {
/* child is dead, read all until EOF */
state->pid = -1;
state->mode = MODE_NOCHILD;
while (listen_child(state->pipe_fd[0],
state))
;
}
continue;
case SIGTERM:
if (state->mode != MODE_SUPERVISE) {
/* user is impatient */
/* TODO: warn about repeated SIGTERM? */
continue;
}
state->mode = MODE_TERMINATING;
state->restart_enabled = false;
if (state->pid > 0) {
kill(state->pid, SIGTERM);
}
/*
* TODO set kevent timer to exit
* unconditionally after some time
*/
continue;
case SIGHUP:
if (state->log_reopen && state->output_fd >= 0) {
reopen_log(state);
}
continue;
}
break;
case EVFILT_READ:
/*
* detecting EOF is no longer necessary
* if child closes the pipe daemon will stop getting
* EVFILT_READ events
*/
if (event.data > 0) {
(void)listen_child(state->pipe_fd[0], state);
}
continue;
default:
continue;
}
}
/*
* At the end of the loop the the child is already gone.
* Block SIGTERM to avoid racing until the child is spawned.
*/
if (sigprocmask(SIG_BLOCK, &state->mask_term, NULL)) {
warn("sigprocmask");
daemon_terminate(state);
}
/* sleep before exiting mainloop if restart is enabled */
if (state->restart_enabled && !terminate) {
daemon_sleep(state->restart_delay, 0);
}
close(kq);
close(state->pipe_fd[0]);
state->pipe_fd[0] = -1;
}
static void
daemon_sleep(time_t secs, long nsecs)
daemon_sleep(struct daemon_state *state)
{
struct timespec ts = { secs, nsecs };
struct timespec ts = { state->restart_delay, 0 };
while (!terminate && nanosleep(&ts, &ts) == -1) {
if (!state->restart_enabled) {
return;
}
while (nanosleep(&ts, &ts) == -1) {
if (errno != EINTR) {
err(1, "nanosleep");
}
}
}
/*
* Setup SIGTERM, SIGCHLD and SIGHUP handlers.
* To avoid racing SIGCHLD with SIGTERM corresponding
* signal handlers mask the other signal.
*/
static void
setup_signals(struct daemon_state *state)
{
struct sigaction act_term = { 0 };
struct sigaction act_chld = { 0 };
struct sigaction act_hup = { 0 };
/* Setup SIGTERM */
act_term.sa_handler = handle_term;
sigemptyset(&act_term.sa_mask);
sigaddset(&act_term.sa_mask, SIGCHLD);
if (sigaction(SIGTERM, &act_term, NULL) == -1) {
warn("sigaction");
daemon_terminate(state);
}
/* Setup SIGCHLD */
act_chld.sa_handler = handle_chld;
sigemptyset(&act_chld.sa_mask);
sigaddset(&act_chld.sa_mask, SIGTERM);
if (sigaction(SIGCHLD, &act_chld, NULL) == -1) {
warn("sigaction");
daemon_terminate(state);
}
/* Setup SIGHUP if configured */
if (!state->log_reopen || state->output_fd < 0) {
return;
}
act_hup.sa_handler = handle_hup;
sigemptyset(&act_hup.sa_mask);
if (sigaction(SIGHUP, &act_hup, NULL) == -1) {
warn("sigaction");
daemon_terminate(state);
}
}
static void
open_pid_files(struct daemon_state *state)
{
@ -612,6 +580,8 @@ restrict_process(const char *user)
*
* Return value of false is assumed to mean EOF or error, and true indicates to
* continue reading.
*
* TODO: simplify signature - state contains pipefd
*/
static bool
listen_child(int fd, struct daemon_state *state)
@ -623,9 +593,6 @@ listen_child(int fd, struct daemon_state *state)
assert(state != NULL);
assert(bytes_read < LBUF_SIZE - 1);
if (do_log_reopen) {
reopen_log(state);
}
rv = read(fd, buf + bytes_read, LBUF_SIZE - bytes_read - 1);
if (rv > 0) {
unsigned char *cp;
@ -697,42 +664,6 @@ do_output(const unsigned char *buf, size_t len, struct daemon_state *state)
}
}
/*
* We use the global PID acquired directly from fork. If there is no valid
* child pid, the handler should be blocked and/or child_gone == 1.
*/
static void
handle_term(int signo)
{
if (pid > 0 && !child_gone) {
kill(pid, signo);
}
terminate = 1;
}
static void
handle_chld(int signo __unused)
{
for (;;) {
int rv = waitpid(-1, NULL, WNOHANG);
if (pid == rv) {
child_gone = 1;
break;
} else if (rv == -1 && errno != EINTR) {
warn("waitpid");
return;
}
}
}
static void
handle_hup(int signo __unused)
{
do_log_reopen = 1;
}
static int
open_log(const char *outfn)
{
@ -745,7 +676,6 @@ reopen_log(struct daemon_state *state)
{
int outfd;
do_log_reopen = 0;
outfd = open_log(state->output_filename);
if (state->output_fd >= 0) {
close(state->output_fd);
@ -765,9 +695,9 @@ daemon_state_init(struct daemon_state *state)
.parent_pidfile = NULL,
.title = NULL,
.user = NULL,
.supervision_enabled = false,
.child_eof = false,
.mode = MODE_DAEMON,
.restart_enabled = false,
.pid = 0,
.keep_cur_workdir = 1,
.restart_delay = 1,
.stdmask = STDOUT_FILENO | STDERR_FILENO,
@ -780,25 +710,23 @@ daemon_state_init(struct daemon_state *state)
.output_fd = -1,
.output_filename = NULL,
};
sigemptyset(&state->mask_susp);
sigemptyset(&state->mask_read);
sigemptyset(&state->mask_term);
sigemptyset(&state->mask_orig);
sigaddset(&state->mask_susp, SIGTERM);
sigaddset(&state->mask_susp, SIGCHLD);
sigaddset(&state->mask_term, SIGTERM);
sigaddset(&state->mask_read, SIGCHLD);
}
static _Noreturn void
daemon_terminate(struct daemon_state *state)
{
assert(state != NULL);
close(state->output_fd);
close(state->pipe_fd[0]);
close(state->pipe_fd[1]);
if (state->output_fd >= 0) {
close(state->output_fd);
}
if (state->pipe_fd[0] >= 0) {
close(state->pipe_fd[0]);
}
if (state->pipe_fd[1] >= 0) {
close(state->pipe_fd[1]);
}
if (state->syslog_enabled) {
closelog();
}
@ -812,3 +740,45 @@ daemon_terminate(struct daemon_state *state)
*/
exit(1);
}
/*
* Returns true if SIGCHILD came from state->pid
* This function could hang if SIGCHILD was emittied for a reason other than
* child dying (e.g., ptrace attach).
*/
static bool
daemon_is_child_dead(struct daemon_state *state)
{
for (;;) {
int who = waitpid(-1, NULL, WNOHANG);
if (state->pid == who) {
return true;
}
if (who == -1 && errno != EINTR) {
warn("waitpid");
return false;
}
}
}
static void
daemon_set_child_pipe(struct daemon_state *state)
{
if (state->stdmask & STDERR_FILENO) {
if (dup2(state->pipe_fd[1], STDERR_FILENO) == -1) {
err(1, "dup2");
}
}
if (state->stdmask & STDOUT_FILENO) {
if (dup2(state->pipe_fd[1], STDOUT_FILENO) == -1) {
err(1, "dup2");
}
}
if (state->pipe_fd[1] != STDERR_FILENO &&
state->pipe_fd[1] != STDOUT_FILENO) {
close(state->pipe_fd[1]);
}
/* The child gets dup'd pipes. */
close(state->pipe_fd[0]);
}