diff --git a/sbin/hastd/Makefile b/sbin/hastd/Makefile index bfb62f4f11f6..6e3147dcd31e 100644 --- a/sbin/hastd/Makefile +++ b/sbin/hastd/Makefile @@ -5,7 +5,7 @@ PROG= hastd SRCS= activemap.c SRCS+= control.c -SRCS+= ebuf.c +SRCS+= ebuf.c event.c SRCS+= hast_proto.c hastd.c hooks.c SRCS+= metadata.c SRCS+= nv.c diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h index 71379178e767..a42865f0d061 100644 --- a/sbin/hastd/hast.h +++ b/sbin/hastd/hast.h @@ -181,6 +181,8 @@ struct hast_resource { pid_t hr_workerpid; /* Control connection between parent and child. */ struct proto_conn *hr_ctrl; + /* Events from child to parent. */ + struct proto_conn *hr_event; /* Activemap structure. */ struct activemap *hr_amp; diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index ccc25cfe0f44..6ddcbd2f831e 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include "control.h" +#include "event.h" #include "hast.h" #include "hast_proto.h" #include "hastd.h" @@ -158,6 +159,11 @@ child_exit(void) role2str(res->hr_role)); child_exit_log(pid, status); proto_close(res->hr_ctrl); + res->hr_ctrl = NULL; + if (res->hr_event != NULL) { + proto_close(res->hr_event); + res->hr_event = NULL; + } res->hr_workerpid = 0; if (res->hr_role == HAST_ROLE_PRIMARY) { /* @@ -624,9 +630,10 @@ listen_accept(void) static void main_loop(void) { - fd_set rfds; - int cfd, lfd, maxfd, ret; + struct hast_resource *res; struct timeval timeout; + int fd, maxfd, ret; + fd_set rfds; timeout.tv_sec = REPORT_INTERVAL; timeout.tv_usec = 0; @@ -646,14 +653,20 @@ main_loop(void) hastd_reload(); } - cfd = proto_descriptor(cfg->hc_controlconn); - lfd = proto_descriptor(cfg->hc_listenconn); - maxfd = cfd > lfd ? cfd : lfd; - /* Setup descriptors for select(2). */ FD_ZERO(&rfds); - FD_SET(cfd, &rfds); - FD_SET(lfd, &rfds); + maxfd = fd = proto_descriptor(cfg->hc_controlconn); + FD_SET(fd, &rfds); + fd = proto_descriptor(cfg->hc_listenconn); + FD_SET(fd, &rfds); + maxfd = fd > maxfd ? fd : maxfd; + TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { + if (res->hr_event == NULL) + continue; + fd = proto_descriptor(res->hr_event); + FD_SET(fd, &rfds); + maxfd = fd > maxfd ? fd : maxfd; + } ret = select(maxfd + 1, &rfds, NULL, NULL, &timeout); if (ret == 0) @@ -665,10 +678,21 @@ main_loop(void) pjdlog_exit(EX_OSERR, "select() failed"); } - if (FD_ISSET(cfd, &rfds)) + if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds)) control_handle(cfg); - if (FD_ISSET(lfd, &rfds)) + if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds)) listen_accept(); + TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { + if (res->hr_event == NULL) + continue; + if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { + if (event_recv(res) == 0) + continue; + /* The worker process exited? */ + proto_close(res->hr_event); + res->hr_event = NULL; + } + } } } diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index e018e89a186f..a8a2d41f87a8 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include "control.h" +#include "event.h" #include "hast.h" #include "hast_proto.h" #include "hastd.h" @@ -225,12 +226,6 @@ static void *ggate_send_thread(void *arg); static void *sync_thread(void *arg); static void *guard_thread(void *arg); -static void -dummy_sighandler(int sig __unused) -{ - /* Nothing to do. */ -} - static void cleanup(struct hast_resource *res) { @@ -429,8 +424,6 @@ init_environment(struct hast_resource *res __unused) /* * Turn on signals handling. */ - /* Because SIGCHLD is ignored by default, setup dummy handler for it. */ - PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR); PJDLOG_VERIFY(sigfillset(&mask) == 0); PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); } @@ -672,11 +665,11 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, res->hr_remotein = in; res->hr_remoteout = out; } - hook_exec(res->hr_exec, "connect", res->hr_name, NULL); + event_send(res, EVENT_CONNECT); return (true); close: if (errmsg != NULL && strcmp(errmsg, "Split-brain condition!") == 0) - hook_exec(res->hr_exec, "split-brain", res->hr_name, NULL); + event_send(res, EVENT_SPLITBRAIN); proto_close(out); if (in != NULL) proto_close(in); @@ -774,6 +767,14 @@ hastd_primary(struct hast_resource *res) pjdlog_exit(EX_OSERR, "Unable to create control sockets between parent and child"); } + /* + * Create communication channel between child and parent. + */ + if (proto_client("socketpair://", &res->hr_event) < 0) { + KEEP_ERRNO((void)pidfile_remove(pfh)); + pjdlog_exit(EX_OSERR, + "Unable to create event sockets between child and parent"); + } pid = fork(); if (pid < 0) { @@ -783,6 +784,8 @@ hastd_primary(struct hast_resource *res) if (pid > 0) { /* This is parent. */ + /* Declare that we are receiver. */ + proto_recv(res->hr_event, NULL, 0); res->hr_workerpid = pid; return; } @@ -797,7 +800,9 @@ hastd_primary(struct hast_resource *res) signal(SIGHUP, SIG_DFL); signal(SIGCHLD, SIG_DFL); - hook_init(); + /* Declare that we are sender. */ + proto_send(res->hr_event, NULL, 0); + init_local(res); if (real_remote(res) && init_remote(res, NULL, NULL)) sync_start(); @@ -896,7 +901,7 @@ remote_close(struct hast_resource *res, int ncomp) */ sync_stop(); - hook_exec(res->hr_exec, "disconnect", res->hr_name, NULL); + event_send(res, EVENT_DISCONNECT); } /* @@ -1512,7 +1517,7 @@ sync_thread(void *arg __unused) pjdlog_info("Synchronization interrupted. " "%jd bytes synchronized so far.", (intmax_t)synced); - hook_exec(res->hr_exec, "syncintr", res->hr_name, NULL); + event_send(res, EVENT_SYNCINTR); } while (!sync_inprogress) { dorewind = true; @@ -1545,8 +1550,7 @@ sync_thread(void *arg __unused) pjdlog_info("Synchronization started. %ju bytes to go.", (uintmax_t)(res->hr_extentsize * activemap_ndirty(res->hr_amp))); - hook_exec(res->hr_exec, "syncstart", - res->hr_name, NULL); + event_send(res, EVENT_SYNCSTART); } } if (offset < 0) { @@ -1563,8 +1567,7 @@ sync_thread(void *arg __unused) pjdlog_info("Synchronization complete. " "%jd bytes synchronized.", (intmax_t)synced); - hook_exec(res->hr_exec, "syncdone", - res->hr_name, NULL); + event_send(res, EVENT_SYNCDONE); } mtx_lock(&metadata_lock); res->hr_syncsrc = HAST_SYNCSRC_UNDEF; @@ -1950,7 +1953,6 @@ guard_thread(void *arg) PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0); PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0); PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0); - PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0); timeout.tv_nsec = 0; signo = -1; @@ -1969,7 +1971,6 @@ guard_thread(void *arg) default: break; } - hook_check(signo == SIGCHLD); pjdlog_debug(2, "remote_guard: Checking connections."); now = time(NULL); diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c index d1575e8a5a7e..14016f703db1 100644 --- a/sbin/hastd/secondary.c +++ b/sbin/hastd/secondary.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include #include "control.h" +#include "event.h" #include "hast.h" #include "hast_proto.h" #include "hastd.h" @@ -325,7 +326,7 @@ init_remote(struct hast_resource *res, struct nv *nvin) if (res->hr_secondary_localcnt > res->hr_primary_remotecnt && res->hr_primary_localcnt > res->hr_secondary_remotecnt) { /* Exit on split-brain. */ - hook_exec(res->hr_exec, "split-brain", res->hr_name, NULL); + event_send(res, EVENT_SPLITBRAIN); exit(EX_CONFIG); } } @@ -345,6 +346,14 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin) pjdlog_exit(EX_OSERR, "Unable to create control sockets between parent and child"); } + /* + * Create communication channel between child and parent. + */ + if (proto_client("socketpair://", &res->hr_event) < 0) { + KEEP_ERRNO((void)pidfile_remove(pfh)); + pjdlog_exit(EX_OSERR, + "Unable to create event sockets between child and parent"); + } pid = fork(); if (pid < 0) { @@ -358,6 +367,8 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin) res->hr_remotein = NULL; proto_close(res->hr_remoteout); res->hr_remoteout = NULL; + /* Declare that we are receiver. */ + proto_recv(res->hr_event, NULL, 0); res->hr_workerpid = pid; return; } @@ -372,17 +383,19 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin) signal(SIGHUP, SIG_DFL); signal(SIGCHLD, SIG_DFL); + /* Declare that we are sender. */ + proto_send(res->hr_event, NULL, 0); + /* Error in setting timeout is not critical, but why should it fail? */ if (proto_timeout(res->hr_remotein, 0) < 0) pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); if (proto_timeout(res->hr_remoteout, res->hr_timeout) < 0) pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); - hook_init(); init_local(res); init_remote(res, nvin); init_environment(); - hook_exec(res->hr_exec, "connect", res->hr_name, NULL); + event_send(res, EVENT_CONNECT); error = pthread_create(&td, NULL, recv_thread, res); assert(error == 0); @@ -515,7 +528,7 @@ secondary_exit(int exitcode, const char *fmt, ...) va_start(ap, fmt); pjdlogv_errno(LOG_ERR, fmt, ap); va_end(ap); - hook_exec(gres->hr_exec, "disconnect", gres->hr_name, NULL); + event_send(gres, EVENT_DISCONNECT); exit(exitcode); }