Scenario:
- We have two nodes connected and synchronized (local counters on both sides are 0). - We take secondary down and recreate it. - Primary connects to it and starts synchronization (but local counters are still 0). - We switch the roles. - Synchronization restarts but data is synchronized now from new primary (because local counters are 0) that doesn't have new data yet. This fix this issue we bump local counter on primary when we discover that connected secondary was recreated and has no data yet. Reported by: trociny Discussed with: trociny Tested by: trociny MFC after: 1 week
This commit is contained in:
parent
58ff0f42ba
commit
06cbf54941
@ -667,6 +667,25 @@ init_remote(struct hast_resource *res, struct proto_conn **inp,
|
||||
res->hr_secondary_localcnt = nv_get_uint64(nvin, "localcnt");
|
||||
res->hr_secondary_remotecnt = nv_get_uint64(nvin, "remotecnt");
|
||||
res->hr_syncsrc = nv_get_uint8(nvin, "syncsrc");
|
||||
if (nv_exists(nvin, "virgin")) {
|
||||
/*
|
||||
* Secondary was reinitialized, bump localcnt if it is 0 as
|
||||
* only we have the data.
|
||||
*/
|
||||
PJDLOG_ASSERT(res->hr_syncsrc == HAST_SYNCSRC_PRIMARY);
|
||||
PJDLOG_ASSERT(res->hr_secondary_localcnt == 0);
|
||||
|
||||
if (res->hr_primary_localcnt == 0) {
|
||||
PJDLOG_ASSERT(res->hr_secondary_remotecnt == 0);
|
||||
|
||||
mtx_lock(&metadata_lock);
|
||||
res->hr_primary_localcnt++;
|
||||
pjdlog_debug(1, "Increasing localcnt to %ju.",
|
||||
(uintmax_t)res->hr_primary_localcnt);
|
||||
(void)metadata_write(res);
|
||||
mtx_unlock(&metadata_lock);
|
||||
}
|
||||
}
|
||||
map = NULL;
|
||||
mapsize = nv_get_uint32(nvin, "mapsize");
|
||||
if (mapsize > 0) {
|
||||
|
@ -261,6 +261,7 @@ init_remote(struct hast_resource *res, struct nv *nvin)
|
||||
} else {
|
||||
memset(map, 0xff, mapsize);
|
||||
}
|
||||
nv_add_int8(nvout, 1, "virgin");
|
||||
nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc");
|
||||
} else if (res->hr_resuid != resuid) {
|
||||
char errmsg[256];
|
||||
|
Loading…
x
Reference in New Issue
Block a user