diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index 444dd633ff89..7f87dd6ad1d6 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -736,6 +736,13 @@ listen_accept(void) nv_add_stringf(nverr, "errmsg", "Remote node acts as %s for the resource and not as %s.", role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY)); + if (res->hr_role == HAST_ROLE_PRIMARY) { + /* + * If we act as primary request the other side to wait + * for us for a bit, as may might be finishing cleanups. + */ + nv_add_uint8(nverr, 1, "wait"); + } goto fail; } /* Does token (if exists) match? */ diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index 751929e4e43d..3363fcbb051a 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -219,6 +219,7 @@ static pthread_cond_t range_regular_cond; static struct rangelocks *range_sync; static bool range_sync_wait; static pthread_cond_t range_sync_cond; +static bool fullystarted; static void *ggate_recv_thread(void *arg); static void *local_send_thread(void *arg); @@ -524,7 +525,7 @@ primary_connect(struct hast_resource *res, struct proto_conn **connp) return (0); } -static bool +static int init_remote(struct hast_resource *res, struct proto_conn **inp, struct proto_conn **outp) { @@ -537,6 +538,7 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, int64_t datasize; uint32_t mapsize; size_t size; + int error; PJDLOG_ASSERT((inp == NULL && outp == NULL) || (inp != NULL && outp != NULL)); PJDLOG_ASSERT(real_remote(res)); @@ -545,7 +547,9 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, errmsg = NULL; if (primary_connect(res, &out) == -1) - return (false); + return (ECONNREFUSED); + + error = ECONNABORTED; /* * First handshake step. @@ -577,6 +581,8 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, errmsg = nv_get_string(nvin, "errmsg"); if (errmsg != NULL) { pjdlog_warning("%s", errmsg); + if (nv_exists(nvin, "wait")) + error = EBUSY; nv_free(nvin); goto close; } @@ -734,14 +740,14 @@ init_remote(struct hast_resource *res, struct proto_conn **inp, res->hr_remoteout = out; } event_send(res, EVENT_CONNECT); - return (true); + return (0); close: if (errmsg != NULL && strcmp(errmsg, "Split-brain condition!") == 0) event_send(res, EVENT_SPLITBRAIN); proto_close(out); if (in != NULL) proto_close(in); - return (false); + return (error); } static void @@ -920,8 +926,30 @@ hastd_primary(struct hast_resource *res) */ error = pthread_create(&td, NULL, ctrl_thread, res); PJDLOG_ASSERT(error == 0); - if (real_remote(res) && init_remote(res, NULL, NULL)) - sync_start(); + if (real_remote(res)) { + error = init_remote(res, NULL, NULL); + if (error == 0) { + sync_start(); + } else if (error == EBUSY) { + time_t start = time(NULL); + + pjdlog_warning("Waiting for remote node to become %s for %ds.", + role2str(HAST_ROLE_SECONDARY), + res->hr_timeout); + for (;;) { + sleep(1); + error = init_remote(res, NULL, NULL); + if (error != EBUSY) + break; + if (time(NULL) > start + res->hr_timeout) + break; + } + if (error == EBUSY) { + pjdlog_warning("Remote node is still %s, starting anyway.", + role2str(HAST_ROLE_PRIMARY)); + } + } + } error = pthread_create(&td, NULL, ggate_recv_thread, res); PJDLOG_ASSERT(error == 0); error = pthread_create(&td, NULL, local_send_thread, res); @@ -932,6 +960,7 @@ hastd_primary(struct hast_resource *res) PJDLOG_ASSERT(error == 0); error = pthread_create(&td, NULL, ggate_send_thread, res); PJDLOG_ASSERT(error == 0); + fullystarted = true; (void)sync_thread(res); } @@ -2095,7 +2124,7 @@ guard_one(struct hast_resource *res, unsigned int ncomp) pjdlog_debug(2, "remote_guard: Reconnecting to %s.", res->hr_remoteaddr); in = out = NULL; - if (init_remote(res, &in, &out)) { + if (init_remote(res, &in, &out) == 0) { rw_wlock(&hio_remote_lock[ncomp]); PJDLOG_ASSERT(res->hr_remotein == NULL); PJDLOG_ASSERT(res->hr_remoteout == NULL); @@ -2153,12 +2182,19 @@ guard_thread(void *arg) break; } - pjdlog_debug(2, "remote_guard: Checking connections."); - now = time(NULL); - if (lastcheck + HAST_KEEPALIVE <= now) { - for (ii = 0; ii < ncomps; ii++) - guard_one(res, ii); - lastcheck = now; + /* + * Don't check connections until we fully started, + * as we may still be looping, waiting for remote node + * to switch from primary to secondary. + */ + if (fullystarted) { + pjdlog_debug(2, "remote_guard: Checking connections."); + now = time(NULL); + if (lastcheck + HAST_KEEPALIVE <= now) { + for (ii = 0; ii < ncomps; ii++) + guard_one(res, ii); + lastcheck = now; + } } signo = sigtimedwait(&mask, NULL, &timeout); }