From 2f163613d85b72093ed4feb9449e3727266af113 Mon Sep 17 00:00:00 2001 From: np Date: Tue, 19 May 2020 16:28:20 +0000 Subject: [PATCH] cxgbe/iw_cxgbe: Add an async callback to notify iw_cxgbe in case of a fatal error. Submitted by: Krishnamraju Eraparaju @ Chelsio MFC after: 2 weeks Sponsored by: Chelsio Communications --- sys/dev/cxgbe/adapter.h | 1 + sys/dev/cxgbe/iw_cxgbe/cm.c | 65 ++++++++++++++++----------------- sys/dev/cxgbe/iw_cxgbe/device.c | 19 ++++++++++ sys/dev/cxgbe/offload.h | 1 + sys/dev/cxgbe/t4_main.c | 31 ++++++++++++++++ 5 files changed, 84 insertions(+), 33 deletions(-) diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 3fa3abb125f5..c602f0747756 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -830,6 +830,7 @@ struct adapter { int sc_do_rxcopy; struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */ + struct task async_event_task; struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; /* channel -> port */ diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index 849ece6e4bec..a2328ab2fccc 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -1085,7 +1085,7 @@ c4iw_so_upcall(struct socket *so, void *arg, int waitflag) * Wake up any threads waiting in rdma_init()/rdma_fini(), * with locks held. */ - if (so->so_error) + if (so->so_error || (ep->com.dev->rdev.flags & T4_FATAL_ERROR)) c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); add_ep_to_req_list(ep, C4IW_EVENT_SOCKET); @@ -2700,6 +2700,11 @@ c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) CTR3(KTR_IW_CXGBE, "%s: cm_id %p, backlog %s", __func__, cm_id, backlog); + if (c4iw_fatal_error(&dev->rdev)) { + CTR2(KTR_IW_CXGBE, "%s: cm_id %p, fatal error", __func__, + cm_id); + return -EIO; + } lep = alloc_ep(sizeof(*lep), GFP_KERNEL); lep->com.cm_id = cm_id; ref_cm_id(&lep->com); @@ -2800,7 +2805,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) { int ret = 0; int close = 0; - int fatal = 0; struct c4iw_rdev *rdev; @@ -2809,12 +2813,14 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { - - CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep); - fatal = 1; + CTR3(KTR_IW_CXGBE, "%s:ced1 fatal error %p %s", __func__, ep, + states[ep->com.state]); + if (ep->com.state != DEAD) { + send_abort(ep); + ep->com.state = DEAD; + } close_complete_upcall(ep, -ECONNRESET); - send_abort(ep); - ep->com.state = DEAD; + return ECONNRESET; } CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, states[ep->com.state]); @@ -2877,9 +2883,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep, -ECONNRESET); - ret = send_abort(ep); - if (ret) - fatal = 1; + send_abort(ep); } else { CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); @@ -2889,33 +2893,28 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) ep->com.state = MORIBUND; CURVNET_SET(ep->com.so->so_vnet); - sodisconnect(ep->com.so); + ret = sodisconnect(ep->com.so); CURVNET_RESTORE(); - } - - } - - if (fatal) { - set_bit(EP_DISC_FAIL, &ep->com.history); - if (!abrupt) { - STOP_EP_TIMER(ep); - close_complete_upcall(ep, -EIO); - } - if (ep->com.qp) { - struct c4iw_qp_attributes attrs = {0}; - - attrs.next_state = C4IW_QP_STATE_ERROR; - ret = c4iw_modify_qp(ep->com.dev, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, - &attrs, 1); if (ret) { - CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep); - printf("%s - qp <- error failed!\n", __func__); + CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); + STOP_EP_TIMER(ep); + send_abort(ep); + ep->com.state = DEAD; + close_complete_upcall(ep, -ECONNRESET); + set_bit(EP_DISC_FAIL, &ep->com.history); + if (ep->com.qp) { + struct c4iw_qp_attributes attrs = {0}; + + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp( + ep->com.dev, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + CTR3(KTR_IW_CXGBE, "%s:ced7 %p ret %d", + __func__, ep, ret); + } } } - release_ep_resources(ep); - ep->com.state = DEAD; - CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); } c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); diff --git a/sys/dev/cxgbe/iw_cxgbe/device.c b/sys/dev/cxgbe/iw_cxgbe/device.c index 51b7bf0c4a08..7f5bf4b86da7 100644 --- a/sys/dev/cxgbe/iw_cxgbe/device.c +++ b/sys/dev/cxgbe/iw_cxgbe/device.c @@ -261,11 +261,13 @@ static int c4iw_mod_load(void); static int c4iw_mod_unload(void); static int c4iw_activate(struct adapter *); static int c4iw_deactivate(struct adapter *); +static void c4iw_async_event(struct adapter *); static struct uld_info c4iw_uld_info = { .uld_id = ULD_IWARP, .activate = c4iw_activate, .deactivate = c4iw_deactivate, + .async_event = c4iw_async_event, }; static int @@ -326,6 +328,23 @@ c4iw_deactivate(struct adapter *sc) return (0); } +static void +c4iw_async_event(struct adapter *sc) +{ + struct c4iw_dev *iwsc = sc->iwarp_softc; + + if (iwsc) { + struct ib_event event = {0}; + + device_printf(sc->dev, + "iWARP driver received FATAL ERROR event.\n"); + iwsc->rdev.flags |= T4_FATAL_ERROR; + event.event = IB_EVENT_DEVICE_FATAL; + event.device = &iwsc->ibdev; + ib_dispatch_event(&event); + } +} + static void c4iw_activate_all(struct adapter *sc, void *arg __unused) { diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index 811f9d349d43..e4150db002d8 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -228,6 +228,7 @@ struct uld_info { int uld_id; int (*activate)(struct adapter *); int (*deactivate)(struct adapter *); + void (*async_event)(struct adapter *); }; struct tom_tunables { diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index d0e40b4434a3..4adee8e0f393 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -758,6 +758,7 @@ static int read_i2c(struct adapter *, struct t4_i2c_data *); static int clear_stats(struct adapter *, u_int); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); +static void t4_async_event(void *, int); #endif static int mod_event(module_t, int, void *); static int notify_siblings(device_t, int); @@ -1063,6 +1064,10 @@ t4_attach(device_t dev) callout_init(&sc->ktls_tick, 1); +#ifdef TCP_OFFLOAD + TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc); +#endif + rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ @@ -1567,6 +1572,10 @@ t4_detach_common(device_t dev) } } +#ifdef TCP_OFFLOAD + taskqueue_drain(taskqueue_thread, &sc->async_event_task); +#endif + for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); @@ -2788,6 +2797,9 @@ t4_fatal_err(struct adapter *sc, bool fw_error) sc->flags |= ADAP_ERR; ADAPTER_UNLOCK(sc); } +#ifdef TCP_OFFLOAD + taskqueue_enqueue(taskqueue_thread, &sc->async_event_task); +#endif if (t4_panic_on_fatal_err) { log(LOG_ALERT, "%s: panic on fatal error after 30s", @@ -10859,6 +10871,25 @@ t4_deactivate_uld(struct adapter *sc, int id) return (rc); } +static void +t4_async_event(void *arg, int n) +{ + struct uld_info *ui; + struct adapter *sc = (struct adapter *)arg; + + if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4async") != 0) + return; + sx_slock(&t4_uld_list_lock); + SLIST_FOREACH(ui, &t4_uld_list, link) { + if (ui->uld_id == ULD_IWARP) { + ui->async_event(sc); + break; + } + } + sx_sunlock(&t4_uld_list_lock); + end_synchronized_op(sc, 0); +} + int uld_active(struct adapter *sc, int uld_id) {