diff --git a/sbin/hastd/control.c b/sbin/hastd/control.c index 4faf05dc8ccb..5e186918fab7 100644 --- a/sbin/hastd/control.c +++ b/sbin/hastd/control.c @@ -62,6 +62,10 @@ child_cleanup(struct hast_resource *res) proto_close(res->hr_event); res->hr_event = NULL; } + if (res->hr_conn != NULL) { + proto_close(res->hr_conn); + res->hr_conn = NULL; + } res->hr_workerpid = 0; } diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h index dbbe0da015fb..35a43effffee 100644 --- a/sbin/hastd/hast.h +++ b/sbin/hastd/hast.h @@ -182,10 +182,12 @@ struct hast_resource { int hr_previous_role; /* PID of child worker process. 0 - no child. */ pid_t hr_workerpid; - /* Control connection between parent and child. */ + /* Control commands from parent to child. */ struct proto_conn *hr_ctrl; /* Events from child to parent. */ struct proto_conn *hr_event; + /* Connection requests from child to parent. */ + struct proto_conn *hr_conn; /* Activemap structure. */ struct activemap *hr_amp; diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index 5bced2c24d7d..cbac4f469c35 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -214,6 +214,19 @@ descriptors_assert(const struct hast_resource *res, int pjdlogmode) fd, dtype2str(mode), dtype2str(S_IFSOCK)); break; } + } else if (fd == proto_descriptor(res->hr_conn)) { + if (!isopen) { + snprintf(msg, sizeof(msg), + "Descriptor %d (conn) is closed, but should be open.", + fd); + break; + } + if (!S_ISSOCK(mode)) { + snprintf(msg, sizeof(msg), + "Descriptor %d (conn) is %s, but should be %s.", + fd, dtype2str(mode), dtype2str(S_IFSOCK)); + break; + } } else if (res->hr_role == HAST_ROLE_SECONDARY && fd == proto_descriptor(res->hr_remotein)) { if (!isopen) { @@ -801,6 +814,41 @@ listen_accept(void) pjdlog_prefix_set("%s", ""); } +static void +connection_migrate(struct hast_resource *res) +{ + struct proto_conn *conn; + int16_t val = 0; + + if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to receive connection command"); + return; + } + if (proto_client(res->hr_remoteaddr, &conn) < 0) { + val = errno; + pjdlog_errno(LOG_WARNING, + "Unable to create outgoing connection to %s", + res->hr_remoteaddr); + goto out; + } + if (proto_connect(conn, -1) < 0) { + val = errno; + pjdlog_errno(LOG_WARNING, "Unable to connect to %s", + res->hr_remoteaddr); + proto_close(conn); + goto out; + } + val = 0; +out: + if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to send reply to connection request"); + } + if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0) + pjdlog_errno(LOG_WARNING, "Unable to send connection"); +} + static void main_loop(void) { @@ -858,10 +906,18 @@ main_loop(void) TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { if (res->hr_event == NULL) continue; + PJDLOG_ASSERT(res->hr_conn != NULL); fd = proto_descriptor(res->hr_event); PJDLOG_ASSERT(fd >= 0); FD_SET(fd, &rfds); maxfd = fd > maxfd ? fd : maxfd; + if (res->hr_role == HAST_ROLE_PRIMARY) { + /* Only primary workers asks for connections. */ + fd = proto_descriptor(res->hr_conn); + PJDLOG_ASSERT(fd >= 0); + FD_SET(fd, &rfds); + maxfd = fd > maxfd ? fd : maxfd; + } } PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE); @@ -882,12 +938,20 @@ main_loop(void) TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { if (res->hr_event == NULL) continue; + PJDLOG_ASSERT(res->hr_conn != NULL); if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) { if (event_recv(res) == 0) continue; /* The worker process exited? */ proto_close(res->hr_event); res->hr_event = NULL; + proto_close(res->hr_conn); + res->hr_conn = NULL; + continue; + } + if (res->hr_role == HAST_ROLE_PRIMARY && + FD_ISSET(proto_descriptor(res->hr_conn), &rfds)) { + connection_migrate(res); } } } diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index 708109625feb..408edbcbee43 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -488,6 +488,46 @@ init_local(struct hast_resource *res) exit(EX_NOINPUT); } +static int +primary_connect(struct hast_resource *res, struct proto_conn **connp) +{ + struct proto_conn *conn; + int16_t val; + + val = 1; + if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) { + primary_exit(EX_TEMPFAIL, + "Unable to send connection request to parent"); + } + if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) { + primary_exit(EX_TEMPFAIL, + "Unable to receive reply to connection request from parent"); + } + if (val != 0) { + errno = val; + pjdlog_errno(LOG_WARNING, "Unable to connect to %s", + res->hr_remoteaddr); + return (-1); + } + if (proto_connection_recv(res->hr_conn, true, &conn) < 0) { + primary_exit(EX_TEMPFAIL, + "Unable to receive connection from parent"); + } + if (proto_connect_wait(conn, HAST_TIMEOUT) < 0) { + pjdlog_errno(LOG_WARNING, "Unable to connect to %s", + res->hr_remoteaddr); + proto_close(conn); + return (-1); + } + /* Error in setting timeout is not critical, but why should it fail? */ + if (proto_timeout(conn, res->hr_timeout) < 0) + pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); + + *connp = conn; + + return (0); +} + static bool init_remote(struct hast_resource *res, struct proto_conn **inp, struct proto_conn **outp) @@ -508,21 +548,9 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, in = out = NULL; errmsg = NULL; - /* Prepare outgoing connection with remote node. */ - if (proto_client(res->hr_remoteaddr, &out) < 0) { - primary_exit(EX_TEMPFAIL, - "Unable to create outgoing connection to %s", - res->hr_remoteaddr); - } - /* Try to connect, but accept failure. */ - if (proto_connect(out, HAST_TIMEOUT) < 0) { - pjdlog_errno(LOG_WARNING, "Unable to connect to %s", - res->hr_remoteaddr); - goto close; - } - /* Error in setting timeout is not critical, but why should it fail? */ - if (proto_timeout(out, res->hr_timeout) < 0) - pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); + if (primary_connect(res, &out) == -1) + return (false); + /* * First handshake step. * Setup outgoing connection with remote node. @@ -576,20 +604,9 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, * Second handshake step. * Setup incoming connection with remote node. */ - if (proto_client(res->hr_remoteaddr, &in) < 0) { - primary_exit(EX_TEMPFAIL, - "Unable to create incoming connection to %s", - res->hr_remoteaddr); - } - /* Try to connect, but accept failure. */ - if (proto_connect(in, HAST_TIMEOUT) < 0) { - pjdlog_errno(LOG_WARNING, "Unable to connect to %s", - res->hr_remoteaddr); + if (primary_connect(res, &in) == -1) goto close; - } - /* Error in setting timeout is not critical, but why should it fail? */ - if (proto_timeout(in, res->hr_timeout) < 0) - pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); + nvout = nv_alloc(); nv_add_string(nvout, res->hr_name, "resource"); nv_add_uint8_array(nvout, res->hr_token, sizeof(res->hr_token), @@ -792,7 +809,8 @@ hastd_primary(struct hast_resource *res) int error, mode; /* - * Create communication channel between parent and child. + * Create communication channel for sending control commands from + * parent to child. */ if (proto_client("socketpair://", &res->hr_ctrl) < 0) { /* TODO: There's no need for this to be fatal error. */ @@ -801,7 +819,7 @@ hastd_primary(struct hast_resource *res) "Unable to create control sockets between parent and child"); } /* - * Create communication channel between child and parent. + * Create communication channel for sending events from child to parent. */ if (proto_client("socketpair://", &res->hr_event) < 0) { /* TODO: There's no need for this to be fatal error. */ @@ -809,6 +827,16 @@ hastd_primary(struct hast_resource *res) pjdlog_exit(EX_OSERR, "Unable to create event sockets between child and parent"); } + /* + * Create communication channel for sending connection requests from + * child to parent. + */ + if (proto_client("socketpair://", &res->hr_conn) < 0) { + /* TODO: There's no need for this to be fatal error. */ + KEEP_ERRNO((void)pidfile_remove(pfh)); + pjdlog_exit(EX_OSERR, + "Unable to create connection sockets between child and parent"); + } pid = fork(); if (pid < 0) { @@ -821,6 +849,7 @@ hastd_primary(struct hast_resource *res) /* This is parent. */ /* Declare that we are receiver. */ proto_recv(res->hr_event, NULL, 0); + proto_recv(res->hr_conn, NULL, 0); /* Declare that we are sender. */ proto_send(res->hr_ctrl, NULL, 0); res->hr_workerpid = pid; @@ -832,6 +861,7 @@ hastd_primary(struct hast_resource *res) /* Declare that we are sender. */ proto_send(res->hr_event, NULL, 0); + proto_send(res->hr_conn, NULL, 0); /* Declare that we are receiver. */ proto_recv(res->hr_ctrl, NULL, 0); descriptors_cleanup(res); diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c index 12a68deecf8c..9c859e080cd7 100644 --- a/sbin/hastd/secondary.c +++ b/sbin/hastd/secondary.c @@ -364,6 +364,16 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin) pjdlog_exit(EX_OSERR, "Unable to create event sockets between child and parent"); } + /* + * Create communication channel for sending connection requests from + * parent to child. + */ + if (proto_client("socketpair://", &res->hr_conn) < 0) { + /* TODO: There's no need for this to be fatal error. */ + KEEP_ERRNO((void)pidfile_remove(pfh)); + pjdlog_exit(EX_OSERR, + "Unable to create connection sockets between parent and child"); + } pid = fork(); if (pid < 0) { @@ -381,6 +391,7 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin) proto_recv(res->hr_event, NULL, 0); /* Declare that we are sender. */ proto_send(res->hr_ctrl, NULL, 0); + proto_send(res->hr_conn, NULL, 0); res->hr_workerpid = pid; return; } @@ -392,6 +403,7 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin) proto_send(res->hr_event, NULL, 0); /* Declare that we are receiver. */ proto_recv(res->hr_ctrl, NULL, 0); + proto_recv(res->hr_conn, NULL, 0); descriptors_cleanup(res); descriptors_assert(res, mode);