cxgbei: Support iSCSI offload on T6.

T6 makes several changes relative to T5 for receive of iSCSI PDUs.

First, earlier adapters issue either 2 or 3 messages to the host for
each PDU received: CPL_ISCSI_HDR contains the BHS of the PDU,
CPL_ISCSI_DATA (when DDP is not used for zero-copy receive) contains
the PDU data as buffers on the freelist, and CPL_RX_ISCSI_DDP with
status of the PDU such as result of CRC checks.  In T6, a new
CPL_RX_ISCSI_CMP combines CPL_ISCSI_HDR and CPL_RX_ISCSI_DDP.  Data
PDUs which are directly placed via DDP only report a single
CPL_RX_ISCSI_CMP message.  Data PDUs received on the free lists are
reported as CPL_ISCSI_DATA followed by CPL_RX_ISCSI_CMP.  Control PDUs
such as R2T are still reported via CPL_ISCSI_HDR and CPL_RX_ISCSI_DDP.

Supporting this requires changing the CPL_ISCSI_DATA handler to
allocate a PDU structure if it is not preceded by a CPL_ISCSI_HDR as
well as support for the new CPL_RX_ISCSI_CMP.

Second, when using DDP for zero-copy receive, T6 will only issue a
CPL_RX_ISCSI_CMP after a burst of PDUs have been received (indicated
by the F flag in the BHS).  In this case, the CPL_RX_ISCSI_CMP can
reflect the completion of multiple PDUs and the BHS and TCP sequence
number included in the message are from the last PDU received in the
burst.  Notably, the message does not include any information about
earlier PDUs received as part of the burst.  Instead, the driver must
track the amount of data already received for a given transfer and use
this to compute the amount of data received in a burst.  In addition,
the iSCSI layer currently has no way to permit receiving a logical PDU
which spans multiple PDUs.  Instead, the driver presents each burst as
a single, "large" PDU to the iSCSI target and initiators.  This is
done by rewriting the buffer offset and data length fields in the BHS
of the final PDU as well as rewriting the DataSN so that the received
PDUs appear to be in order.

To track all this, cxgbei maintains a hash table of 'cxgbei_cmp'
structures indexed by transfer tags for each offloaded iSCSI
connection.  When a SCSI_DATA_IN message is received, the ITT from the
received BHS is used to find the necessary state in the hash table,
whereas SCSI_DATA_OUT replies use the TTT as the key.  The structure
tracks the expected starting offset and DataSN of the next burst as
well as the rewritten DataSN value used for the previously received
PDU.

Discussed with:	np
Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D30458
This commit is contained in:
John Baldwin 2021-05-28 16:45:29 -07:00
parent 55baee8cdb
commit 67360f7bb0
3 changed files with 395 additions and 55 deletions

View File

@ -222,27 +222,47 @@ do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m
u_int tid = GET_TID(cpl);
struct toepcb *toep = lookup_tid(sc, tid);
struct icl_cxgbei_pdu *icp = toep->ulpcb2;
struct icl_pdu *ip;
M_ASSERTPKTHDR(m);
MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
/* Must already have received the header (but not the data). */
MPASS(icp != NULL);
if (icp == NULL) {
/*
* T6 completion enabled, start of a new pdu. Header
* will come in completion CPL.
*/
ip = icl_cxgbei_new_pdu(M_NOWAIT);
if (ip == NULL)
CXGBE_UNIMPLEMENTED("PDU allocation failure");
icp = ip_to_icp(ip);
} else {
/* T5 mode, header is already received. */
MPASS(icp->icp_flags == ICPF_RX_HDR);
MPASS(icp->ip.ip_data_mbuf == NULL);
MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
}
/* Trim the cpl header from mbuf. */
m_adj(m, sizeof(*cpl));
MPASS(icp->ip.ip_data_len == m->m_pkthdr.len);
icp->icp_flags |= ICPF_RX_FLBUF;
icp->ip.ip_data_mbuf = m;
toep->ofld_rxq->rx_iscsi_fl_pdus++;
toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
/*
* For T6, save the icp for further processing in the
* completion handler.
*/
if (icp->icp_flags == ICPF_RX_FLBUF) {
MPASS(toep->ulpcb2 == NULL);
toep->ulpcb2 = icp;
}
#if 0
CTR3(KTR_CXGBE, "%s: tid %u, cpl->len %u", __func__, tid,
be16toh(cpl->len));
CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
be16toh(cpl->len), icp);
#endif
return (0);
@ -304,15 +324,17 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
__func__, tid, pdu_len, inp->inp_flags);
INP_WUNLOCK(inp);
icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
toep->ulpcb2 = NULL;
#endif
return (0);
}
/*
* T6+ does not report data PDUs received via DDP without F
* set. This can result in gaps in the TCP sequence space.
*/
tp = intotcpcb(inp);
MPASS(icp->icp_seq == tp->rcv_nxt);
tp->rcv_nxt += pdu_len;
MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
tp->rcv_nxt = icp->icp_seq + pdu_len;
tp->t_rcvtime = ticks;
/*
@ -342,9 +364,7 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
CURVNET_RESTORE();
icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
toep->ulpcb2 = NULL;
#endif
return (0);
}
MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
@ -399,9 +419,237 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
#ifdef INVARIANTS
toep->ulpcb2 = NULL;
return (0);
}
static int
do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
struct epoch_tracker et;
struct adapter *sc = iq->adapter;
struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
u_int tid = GET_TID(cpl);
struct toepcb *toep = lookup_tid(sc, tid);
struct icl_cxgbei_pdu *icp = toep->ulpcb2;
struct icl_pdu *ip;
struct cxgbei_cmp *cmp;
struct inpcb *inp = toep->inp;
#ifdef INVARIANTS
uint16_t len = be16toh(cpl->len);
#endif
struct socket *so;
struct sockbuf *sb;
struct tcpcb *tp;
struct icl_cxgbei_conn *icc;
struct icl_conn *ic;
struct iscsi_bhs_data_out *bhsdo;
u_int val = be32toh(cpl->ddpvld);
u_int npdus, pdu_len, data_digest_len, hdr_digest_len;
uint32_t prev_seg_len;
M_ASSERTPKTHDR(m);
MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
if ((val & F_DDP_PDU) == 0) {
MPASS(icp != NULL);
MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
ip = &icp->ip;
}
if (icp == NULL) {
/* T6 completion enabled, start of a new PDU. */
ip = icl_cxgbei_new_pdu(M_NOWAIT);
if (ip == NULL)
CXGBE_UNIMPLEMENTED("PDU allocation failure");
icp = ip_to_icp(ip);
}
pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
#if 0
CTR5(KTR_CXGBE,
"%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
__func__, tid, pdu_len, val, icp);
#endif
/* Copy header */
m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
bhsdo->bhsdo_data_segment_len[1] << 8 |
bhsdo->bhsdo_data_segment_len[2];
icp->icp_seq = ntohl(cpl->seq);
icp->icp_flags |= ICPF_RX_HDR;
icp->icp_flags |= ICPF_RX_STATUS;
if (val & F_DDP_PADDING_ERR)
icp->icp_flags |= ICPF_PAD_ERR;
if (val & F_DDP_HDRCRC_ERR)
icp->icp_flags |= ICPF_HCRC_ERR;
if (val & F_DDP_DATACRC_ERR)
icp->icp_flags |= ICPF_DCRC_ERR;
INP_WLOCK(inp);
if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
__func__, tid, pdu_len, inp->inp_flags);
INP_WUNLOCK(inp);
icl_cxgbei_conn_pdu_free(NULL, ip);
toep->ulpcb2 = NULL;
m_freem(m);
return (0);
}
tp = intotcpcb(inp);
/*
* If icc is NULL, the connection is being closed in
* icl_cxgbei_conn_close(), just drop this data.
*/
icc = toep->ulpcb;
if (__predict_false(icc == NULL)) {
CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
__func__, tid, pdu_len, icc);
/*
* Update rcv_nxt so the sequence number of the FIN
* doesn't appear wrong.
*/
tp->rcv_nxt = icp->icp_seq + pdu_len;
tp->t_rcvtime = ticks;
INP_WUNLOCK(inp);
icl_cxgbei_conn_pdu_free(NULL, ip);
toep->ulpcb2 = NULL;
m_freem(m);
return (0);
}
data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
ISCSI_DATA_DIGEST_SIZE : 0;
hdr_digest_len = (icc->ulp_submode & ULP_CRC_HEADER) ?
ISCSI_HEADER_DIGEST_SIZE : 0;
MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
MPASS(ip->ip_data_len > 0);
icp->icp_flags |= ICPF_RX_DDP;
bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
cmp = cxgbei_find_cmp(icc,
be32toh(bhsdo->bhsdo_initiator_task_tag));
break;
case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
cmp = cxgbei_find_cmp(icc,
be32toh(bhsdo->bhsdo_target_transfer_tag));
break;
default:
__assert_unreachable();
}
MPASS(cmp != NULL);
/* Must be the final PDU. */
MPASS(bhsdo->bhsdo_flags & BHSDO_FLAGS_F);
/*
* The difference between the end of the last burst
* and the offset of the last PDU in this burst is
* the additional data received via DDP.
*/
prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
cmp->next_buffer_offset;
if (prev_seg_len != 0) {
/*
* Since cfiscsi doesn't know about previous
* headers, pretend that the entire r2t data
* length was received in this single segment.
*/
ip->ip_data_len += prev_seg_len;
bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
bhsdo->bhsdo_buffer_offset =
htobe32(cmp->next_buffer_offset);
npdus = htobe32(bhsdo->bhsdo_datasn) - cmp->last_datasn;
} else {
MPASS(htobe32(bhsdo->bhsdo_datasn) ==
cmp->last_datasn + 1);
npdus = 1;
}
cmp->next_buffer_offset += ip->ip_data_len;
cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
bhsdo->bhsdo_datasn = htobe32(cmp->next_datasn);
cmp->next_datasn++;
toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
} else {
MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
MPASS(icp->icp_seq == tp->rcv_nxt);
}
tp->rcv_nxt = icp->icp_seq + pdu_len;
tp->t_rcvtime = ticks;
/*
* Don't update the window size or return credits since RX
* flow control is disabled.
*/
so = inp->inp_socket;
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
CTR5(KTR_CXGBE,
"%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
__func__, tid, pdu_len, icc, sb->sb_state);
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
CURVNET_SET(so->so_vnet);
NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp != NULL)
INP_WUNLOCK(inp);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
icl_cxgbei_conn_pdu_free(NULL, ip);
toep->ulpcb2 = NULL;
m_freem(m);
return (0);
}
MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
ic = &icc->ic;
icl_cxgbei_new_pdu_set_conn(ip, ic);
/* Enqueue the PDU to the received pdus queue. */
STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
if ((icc->rx_flags & RXF_ACTIVE) == 0) {
struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];
mtx_lock(&cwt->cwt_lock);
icc->rx_flags |= RXF_ACTIVE;
TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
if (cwt->cwt_state == CWT_SLEEPING) {
cwt->cwt_state = CWT_RUNNING;
cv_signal(&cwt->cwt_cv);
}
mtx_unlock(&cwt->cwt_lock);
}
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
toep->ulpcb2 = NULL;
m_freem(m);
return (0);
}
@ -669,6 +917,7 @@ cxgbei_mod_load(void)
t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp);
rc = start_worker_threads();
if (rc != 0)
@ -699,6 +948,7 @@ cxgbei_mod_unload(void)
t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL);
return (0);
}

View File

@ -53,6 +53,17 @@ enum {
RXF_ACTIVE = 1 << 0, /* In the worker thread's queue */
};
struct cxgbei_cmp {
LIST_ENTRY(cxgbei_cmp) link;
uint32_t tt; /* Transfer tag. */
uint32_t next_datasn;
uint32_t next_buffer_offset;
uint32_t last_datasn;
};
LIST_HEAD(cxgbei_cmp_head, cxgbei_cmp);
struct icl_cxgbei_conn {
struct icl_conn ic;
@ -67,6 +78,10 @@ struct icl_cxgbei_conn {
u_int cwt;
STAILQ_HEAD(, icl_pdu) rcvd_pdus; /* protected by so_rcv lock */
TAILQ_ENTRY(icl_cxgbei_conn) rx_link; /* protected by cwt lock */
struct cxgbei_cmp_head *cmp_table; /* protected by cmp_lock */
struct mtx cmp_lock;
unsigned long cmp_hash_mask;
};
static inline struct icl_cxgbei_conn *
@ -128,5 +143,6 @@ int icl_cxgbei_mod_unload(void);
struct icl_pdu *icl_cxgbei_new_pdu(int);
void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *);
void icl_cxgbei_conn_pdu_free(struct icl_conn *, struct icl_pdu *);
struct cxgbei_cmp *cxgbei_find_cmp(struct icl_cxgbei_conn *, uint32_t);
#endif

View File

@ -60,7 +60,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sx.h>
#include <sys/uio.h>
#include <machine/bus.h>
#include <vm/uma.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <netinet/in.h>
@ -100,6 +99,16 @@ __FBSDID("$FreeBSD$");
#include "tom/t4_tom.h"
#include "cxgbei.h"
/*
* Use the page pod tag for the TT hash.
*/
#define TT_HASH(icc, tt) (G_PPOD_TAG(tt) & (icc)->cmp_hash_mask)
struct cxgbei_ddp_state {
struct ppod_reservation prsv;
struct cxgbei_cmp cmp;
};
static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)");
SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
@ -117,7 +126,6 @@ static int recvspace = 1048576;
SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN,
&recvspace, 0, "Default receive socket buffer size");
static uma_zone_t prsv_zone;
static volatile u_int icl_cxgbei_ncons;
#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock)
@ -555,6 +563,9 @@ icl_cxgbei_new_conn(const char *name, struct mtx *lock)
icc->icc_signature = CXGBEI_CONN_SIGNATURE;
STAILQ_INIT(&icc->rcvd_pdus);
icc->cmp_table = hashinit(64, M_CXGBEI, &icc->cmp_hash_mask);
mtx_init(&icc->cmp_lock, "cxgbei_cmp", NULL, MTX_DEF);
ic = &icc->ic;
ic->ic_lock = lock;
@ -586,6 +597,8 @@ icl_cxgbei_conn_free(struct icl_conn *ic)
cv_destroy(&ic->ic_send_cv);
cv_destroy(&ic->ic_receive_cv);
mtx_destroy(&icc->cmp_lock);
hashdestroy(icc->cmp_table, M_CXGBEI, icc->cmp_hash_mask);
kobj_delete((struct kobj *)icc, M_CXGBE);
refcount_release(&icl_cxgbei_ncons);
}
@ -904,6 +917,61 @@ icl_cxgbei_conn_close(struct icl_conn *ic)
soclose(so);
}
static void
cxgbei_insert_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp,
uint32_t tt)
{
#ifdef INVARIANTS
struct cxgbei_cmp *cmp2;
#endif
cmp->tt = tt;
mtx_lock(&icc->cmp_lock);
#ifdef INVARIANTS
LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, tt)], link) {
KASSERT(cmp2->tt != tt, ("%s: duplicate cmp", __func__));
}
#endif
LIST_INSERT_HEAD(&icc->cmp_table[TT_HASH(icc, tt)], cmp, link);
mtx_unlock(&icc->cmp_lock);
}
struct cxgbei_cmp *
cxgbei_find_cmp(struct icl_cxgbei_conn *icc, uint32_t tt)
{
struct cxgbei_cmp *cmp;
mtx_lock(&icc->cmp_lock);
LIST_FOREACH(cmp, &icc->cmp_table[TT_HASH(icc, tt)], link) {
if (cmp->tt == tt)
break;
}
mtx_unlock(&icc->cmp_lock);
return (cmp);
}
static void
cxgbei_rm_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp)
{
#ifdef INVARIANTS
struct cxgbei_cmp *cmp2;
#endif
mtx_lock(&icc->cmp_lock);
#ifdef INVARIANTS
LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, cmp->tt)], link) {
if (cmp2 == cmp)
goto found;
}
panic("%s: could not find cmp", __func__);
found:
#endif
LIST_REMOVE(cmp, link);
mtx_unlock(&icc->cmp_lock);
}
int
icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
struct ccb_scsiio *csio, uint32_t *ittp, void **arg)
@ -913,6 +981,7 @@ icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
struct adapter *sc = icc->sc;
struct cxgbei_data *ci = sc->iscsi_ulp_softc;
struct ppod_region *pr = &ci->pr;
struct cxgbei_ddp_state *ddp;
struct ppod_reservation *prsv;
uint32_t itt;
int rc = 0;
@ -943,30 +1012,32 @@ no_ddp:
* Reserve resources for DDP, update the itt that should be used in the
* PDU, and save DDP specific state for this I/O in *arg.
*/
prsv = uma_zalloc(prsv_zone, M_NOWAIT);
if (prsv == NULL) {
ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO);
if (ddp == NULL) {
rc = ENOMEM;
goto no_ddp;
}
prsv = &ddp->prsv;
/* XXX add support for all CAM_DATA_ types */
MPASS((csio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR);
rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr,
csio->dxfer_len, prsv);
if (rc != 0) {
uma_zfree(prsv_zone, prsv);
free(ddp, M_CXGBEI);
goto no_ddp;
}
rc = t4_write_page_pods_for_buf(sc, toep, prsv,
(vm_offset_t)csio->data_ptr, csio->dxfer_len);
if (rc != 0) {
if (__predict_false(rc != 0)) {
t4_free_page_pods(prsv);
uma_zfree(prsv_zone, prsv);
free(ddp, M_CXGBEI);
goto no_ddp;
}
ddp->cmp.last_datasn = -1;
cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
*ittp = htobe32(prsv->prsv_tag);
*arg = prsv;
counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1);
@ -978,10 +1049,11 @@ icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg)
{
if (arg != NULL) {
struct ppod_reservation *prsv = arg;
struct cxgbei_ddp_state *ddp = arg;
t4_free_page_pods(prsv);
uma_zfree(prsv_zone, prsv);
cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp);
t4_free_page_pods(&ddp->prsv);
free(ddp, M_CXGBEI);
}
}
@ -1009,7 +1081,7 @@ ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen)
/* XXXNP: PDU should be passed in as parameter, like on the initiator. */
#define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr)
#define io_to_ppod_reservation(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
#define io_to_ddp_state(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
int
icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
@ -1021,6 +1093,7 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
struct adapter *sc = icc->sc;
struct cxgbei_data *ci = sc->iscsi_ulp_softc;
struct ppod_region *pr = &ci->pr;
struct cxgbei_ddp_state *ddp;
struct ppod_reservation *prsv;
struct ctl_sg_entry *sgl, sg_entry;
int sg_entries = ctsio->kern_sg_entries;
@ -1064,7 +1137,7 @@ no_ddp:
ttt = *tttp & M_PPOD_TAG;
ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit;
*tttp = htobe32(ttt);
MPASS(io_to_ppod_reservation(io) == NULL);
MPASS(io_to_ddp_state(io) == NULL);
if (rc != 0)
counter_u64_add(
toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1);
@ -1086,17 +1159,17 @@ no_ddp:
* Reserve resources for DDP, update the ttt that should be used
* in the PDU, and save DDP specific state for this I/O.
*/
MPASS(io_to_ppod_reservation(io) == NULL);
prsv = uma_zalloc(prsv_zone, M_NOWAIT);
if (prsv == NULL) {
MPASS(io_to_ddp_state(io) == NULL);
ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO);
if (ddp == NULL) {
rc = ENOMEM;
goto no_ddp;
}
prsv = &ddp->prsv;
rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv);
if (rc != 0) {
uma_zfree(prsv_zone, prsv);
free(ddp, M_CXGBEI);
goto no_ddp;
}
@ -1104,12 +1177,16 @@ no_ddp:
xferlen);
if (__predict_false(rc != 0)) {
t4_free_page_pods(prsv);
uma_zfree(prsv_zone, prsv);
free(ddp, M_CXGBEI);
goto no_ddp;
}
ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset +
first_burst;
ddp->cmp.last_datasn = -1;
cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
*tttp = htobe32(prsv->prsv_tag);
io_to_ppod_reservation(io) = prsv;
io_to_ddp_state(io) = ddp;
*arg = ctsio;
counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1);
return (0);
@ -1119,16 +1196,19 @@ no_ddp:
* In the middle of an I/O. A non-NULL page pod reservation indicates
* that a DDP buffer is being used for the I/O.
*/
prsv = io_to_ppod_reservation(ctsio);
if (prsv == NULL)
ddp = io_to_ddp_state(ctsio);
if (ddp == NULL)
goto no_ddp;
prsv = &ddp->prsv;
alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift;
alias++;
prsv->prsv_tag &= ~pr->pr_alias_mask;
prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask;
ddp->cmp.next_datasn = 0;
ddp->cmp.last_datasn = -1;
cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
*tttp = htobe32(prsv->prsv_tag);
*arg = ctsio;
@ -1140,16 +1220,19 @@ icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg)
{
struct ctl_scsiio *ctsio = arg;
if (ctsio != NULL && (ctsio->kern_data_len == ctsio->ext_data_filled ||
ic->ic_disconnecting)) {
struct ppod_reservation *prsv;
if (ctsio != NULL) {
struct cxgbei_ddp_state *ddp;
prsv = io_to_ppod_reservation(ctsio);
MPASS(prsv != NULL);
ddp = io_to_ddp_state(ctsio);
MPASS(ddp != NULL);
t4_free_page_pods(prsv);
uma_zfree(prsv_zone, prsv);
io_to_ppod_reservation(ctsio) = NULL;
cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp);
if (ctsio->kern_data_len == ctsio->ext_data_filled ||
ic->ic_disconnecting) {
t4_free_page_pods(&ddp->prsv);
free(ddp, M_CXGBEI);
io_to_ddp_state(ctsio) = NULL;
}
}
}
@ -1208,13 +1291,6 @@ icl_cxgbei_mod_load(void)
{
int rc;
/*
* Space to track pagepod reservations.
*/
prsv_zone = uma_zcreate("Pagepod reservations",
sizeof(struct ppod_reservation), NULL, NULL, NULL, NULL,
UMA_ALIGN_CACHE, 0);
refcount_init(&icl_cxgbei_ncons, 0);
rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits,
@ -1232,8 +1308,6 @@ icl_cxgbei_mod_unload(void)
icl_unregister("cxgbei", false);
uma_zdestroy(prsv_zone);
return (0);
}
#endif