From ddd65a514c77c96be1f2cfae80c23ba752823ff4 Mon Sep 17 00:00:00 2001 From: np Date: Tue, 7 Oct 2014 21:26:22 +0000 Subject: [PATCH] cxgbe/tom: don't leak resources tied to an active open request that cannot be sent to the chip because a prerequisite L2 resolution failed. Submitted by: Hariprasad at chelsio dot com (original version) MFC after: 2 weeks. --- sys/dev/cxgbe/common/t4_msg.h | 1 + sys/dev/cxgbe/tom/t4_connect.c | 40 +++++++++++++++----------- sys/dev/cxgbe/tom/t4_tom.c | 52 ++++++++++++++++++++++++++++++++++ sys/dev/cxgbe/tom/t4_tom.h | 6 ++++ sys/dev/cxgbe/tom/t4_tom_l2t.c | 28 +++++++----------- 5 files changed, 93 insertions(+), 34 deletions(-) diff --git a/sys/dev/cxgbe/common/t4_msg.h b/sys/dev/cxgbe/common/t4_msg.h index ef8fbe68478f..c30b6f1f4871 100644 --- a/sys/dev/cxgbe/common/t4_msg.h +++ b/sys/dev/cxgbe/common/t4_msg.h @@ -273,6 +273,7 @@ union opcode_tid { /* extract the TID from a CPL command */ #define GET_TID(cmd) (G_TID(ntohl(OPCODE_TID(cmd)))) +#define GET_OPCODE(cmd) ((cmd)->ot.opcode) /* partitioning of TID fields that also carry a queue id */ #define S_TID_TID 0 diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index 9973fa57ed66..425c56358e7e 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -115,8 +115,8 @@ do_act_establish(struct sge_iq *iq, const struct rss_header *rss, { struct adapter *sc = iq->adapter; const struct cpl_act_establish *cpl = (const void *)(rss + 1); - unsigned int tid = GET_TID(cpl); - unsigned int atid = G_TID_TID(ntohl(cpl->tos_atid)); + u_int tid = GET_TID(cpl); + u_int atid = G_TID_TID(ntohl(cpl->tos_atid)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; @@ -178,17 +178,34 @@ act_open_rpl_status_to_errno(int status) } } +void +act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status) +{ + struct toepcb *toep = lookup_atid(sc, atid); + struct inpcb *inp = toep->inp; + struct toedev *tod = &toep->td->tod; + + free_atid(sc, atid); + toep->tid = -1; + + if (status != EAGAIN) + INP_INFO_WLOCK(&V_tcbinfo); + INP_WLOCK(inp); + toe_connect_failed(tod, inp, status); + final_cpl_received(toep); /* unlocks inp */ + if (status != EAGAIN) + INP_INFO_WUNLOCK(&V_tcbinfo); +} + static int do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); - unsigned int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); - unsigned int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); + u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); + u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct toepcb *toep = lookup_atid(sc, atid); - struct inpcb *inp = toep->inp; - struct toedev *tod = &toep->td->tod; int rc; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); @@ -200,20 +217,11 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, if (negative_advice(status)) return (0); - free_atid(sc, atid); - toep->tid = -1; - if (status && act_open_has_tid(status)) release_tid(sc, GET_TID(cpl), toep->ctrlq); rc = act_open_rpl_status_to_errno(status); - if (rc != EAGAIN) - INP_INFO_WLOCK(&V_tcbinfo); - INP_WLOCK(inp); - toe_connect_failed(tod, inp, rc); - final_cpl_received(toep); /* unlocks inp */ - if (rc != EAGAIN) - INP_INFO_WUNLOCK(&V_tcbinfo); + act_open_failure_cleanup(sc, atid, rc); return (0); } diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 109543a6ec55..71ea1df58857 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -98,6 +98,7 @@ static void t4_clip_task(void *, int); static void update_clip_table(struct adapter *, struct tom_data *); static void destroy_clip_table(struct adapter *, struct tom_data *); static void free_tom_data(struct adapter *, struct tom_data *); +static void reclaim_wr_resources(void *, int); static int in6_ifaddr_gen; static eventhandler_tag ifaddr_evhandler; @@ -903,6 +904,8 @@ free_tom_data(struct adapter *sc, struct tom_data *td) if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); + if (mtx_initialized(&td->unsent_wr_lock)) + mtx_destroy(&td->unsent_wr_lock); if (mtx_initialized(&td->lctx_hash_lock)) mtx_destroy(&td->lctx_hash_lock); if (mtx_initialized(&td->toep_list_lock)) @@ -912,6 +915,44 @@ free_tom_data(struct adapter *sc, struct tom_data *td) free(td, M_CXGBE); } +static void +reclaim_wr_resources(void *arg, int count) +{ + struct tom_data *td = arg; + STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); + struct cpl_act_open_req *cpl; + u_int opcode, atid; + struct wrqe *wr; + struct adapter *sc; + + mtx_lock(&td->unsent_wr_lock); + STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); + mtx_unlock(&td->unsent_wr_lock); + + while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { + STAILQ_REMOVE_HEAD(&twr_list, link); + + cpl = wrtod(wr); + opcode = GET_OPCODE(cpl); + + switch (opcode) { + case CPL_ACT_OPEN_REQ: + case CPL_ACT_OPEN_REQ6: + atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); + sc = td_adapter(td); + + CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); + act_open_failure_cleanup(sc, atid, EHOSTUNREACH); + free(wr, M_CXGBE); + break; + default: + log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " + "opcode %x\n", __func__, wr, wr->wr_len, opcode); + /* WR not freed here; go look at it with a debugger. */ + } + } +} + /* * Ground control to Major TOM * Commencing countdown, engines on @@ -939,6 +980,11 @@ t4_tom_activate(struct adapter *sc) td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, &td->listen_mask, HASH_NOWAIT); + /* List of WRs for which L2 resolution failed */ + mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); + STAILQ_INIT(&td->unsent_wr_list); + TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); + /* TID tables */ rc = alloc_tid_tabs(&sc->tids); if (rc != 0) @@ -1012,6 +1058,12 @@ t4_tom_deactivate(struct adapter *sc) rc = EBUSY; mtx_unlock(&td->lctx_hash_lock); + taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); + mtx_lock(&td->unsent_wr_lock); + if (!STAILQ_EMPTY(&td->unsent_wr_list)) + rc = EBUSY; + mtx_unlock(&td->unsent_wr_lock); + if (rc == 0) { unregister_toedev(sc->tom_softc); free_tom_data(sc, td); diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index 1d883fed89e8..5b68074b2dfd 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -210,6 +210,11 @@ struct tom_data { struct mtx clip_table_lock; struct clip_head clip_table; int clip_gen; + + /* WRs that will not be sent to the chip because L2 resolution failed */ + struct mtx unsent_wr_lock; + STAILQ_HEAD(, wrqe) unsent_wr_list; + struct task reclaim_wr_resources; }; static inline struct tom_data * @@ -252,6 +257,7 @@ void release_lip(struct tom_data *, struct clip_entry *); void t4_init_connect_cpl_handlers(struct adapter *); int t4_connect(struct toedev *, struct socket *, struct rtentry *, struct sockaddr *); +void act_open_failure_cleanup(struct adapter *, u_int, u_int); /* t4_listen.c */ void t4_init_listen_cpl_handlers(struct adapter *); diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c index 7a75394566d4..65f7d2389473 100644 --- a/sys/dev/cxgbe/tom/t4_tom_l2t.c +++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -161,25 +162,17 @@ send_pending(struct adapter *sc, struct l2t_entry *e) } static void -resolution_failed_for_wr(struct wrqe *wr) +resolution_failed(struct adapter *sc, struct l2t_entry *e) { - log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr, - wr->wr_len); - - /* free(wr, M_CXGBE); */ -} - -static void -resolution_failed(struct l2t_entry *e) -{ - struct wrqe *wr; + struct tom_data *td = sc->tom_softc; mtx_assert(&e->lock, MA_OWNED); - while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { - STAILQ_REMOVE_HEAD(&e->wr_list, link); - resolution_failed_for_wr(wr); - } + mtx_lock(&td->unsent_wr_lock); + STAILQ_CONCAT(&td->unsent_wr_list, &e->wr_list); + mtx_unlock(&td->unsent_wr_lock); + + taskqueue_enqueue(taskqueue_thread, &td->reclaim_wr_resources); } static void @@ -203,7 +196,7 @@ update_entry(struct adapter *sc, struct l2t_entry *e, uint8_t *lladdr, * need to wlock the table). */ e->state = L2T_STATE_FAILED; - resolution_failed(e); + resolution_failed(sc, e); return; } else if (lladdr == NULL) { @@ -305,12 +298,11 @@ t4_l2t_send_slow(struct adapter *sc, struct wrqe *wr, struct l2t_entry *e) if (e->state == L2T_STATE_VALID && !STAILQ_EMPTY(&e->wr_list)) send_pending(sc, e); if (e->state == L2T_STATE_FAILED) - resolution_failed(e); + resolution_failed(sc, e); mtx_unlock(&e->lock); break; case L2T_STATE_FAILED: - resolution_failed_for_wr(wr); return (EHOSTUNREACH); }