nvme_rdma: Detect stale connection failures.

This is the first step in properly reconnecting after a hard power off event. Change-Id: I9739bffacd66ec6d9f8f1d376bf42291c84f90f2 Signed-off-by: Seth Howell <seth.howell@intel.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/473061 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
2019-11-01 15:31:32 -07:00 · 2019-11-01 15:31:32 -07:00 · 6b87dd8023
commit 6b87dd8023
parent 208fbb675c
1 changed files with 15 additions and 0 deletions
--- a/lib/nvme/nvme_rdma.c
+++ b/lib/nvme/nvme_rdma.c
@ -362,6 +362,21 @@ nvme_rdma_validate_cm_event(enum rdma_cm_event_type expected_evt_type,
 		return 0;
 	}

+	switch (expected_evt_type) {
+	case RDMA_CM_EVENT_ESTABLISHED:
+		/*
+		 * There is an enum ib_cm_rej_reason in the kernel headers that sets 10 as
+		 * IB_CM_REJ_STALE_CONN. I can't find the corresponding userspace but we get
+		 * the same values here.
+		 */
+		if (reaped_evt->event == RDMA_CM_EVENT_REJECTED && reaped_evt->status == 10) {
+			rc = -ESTALE;
+		}
+		break;
+	default:
+		break;
+	}
+
 	SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n",
 		    nvme_rdma_cm_event_str_get(expected_evt_type),
 		    nvme_rdma_cm_event_str_get(reaped_evt->event), reaped_evt->event,