cxgbei: Support for ISO (iSCSI segmentation offload).

ISO can be disabled before establishing a connection by setting
dev.tNnex.N.toe.iso to 0.

Sponsored by:	Chelsio Communications
Differential Revision:	https://reviews.freebsd.org/D31223
This commit is contained in:
John Baldwin 2021-08-06 14:21:37 -07:00
parent f0594f52f6
commit 5b27e4b27c
8 changed files with 254 additions and 58 deletions

View File

@ -739,6 +739,7 @@ struct sge_ofld_txq {
struct sge_wrq wrq;
counter_u64_t tx_iscsi_pdus;
counter_u64_t tx_iscsi_octets;
counter_u64_t tx_iscsi_iso_wrs;
counter_u64_t tx_toe_tls_records;
counter_u64_t tx_toe_tls_octets;
} __aligned(CACHE_LINE_SIZE);

View File

@ -134,6 +134,8 @@ struct cxgbei_data {
struct sysctl_ctx_list ctx; /* from uld_activate to deactivate */
};
#define CXGBEI_MAX_ISO_PAYLOAD 65535
/* cxgbei.c */
u_int cxgbei_select_worker_thread(struct icl_cxgbei_conn *);

View File

@ -335,13 +335,14 @@ finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp)
uint8_t ulp_submode, padding;
struct mbuf *m, *last;
struct iscsi_bhs *bhs;
int data_len;
/*
* Fix up the data segment mbuf first.
*/
m = ip->ip_data_mbuf;
ulp_submode = icc->ulp_submode;
if (m) {
if (m != NULL) {
last = m_last(m);
/*
@ -349,7 +350,8 @@ finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp)
* necessary. There will definitely be room in the mbuf.
*/
padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len;
if (padding) {
if (padding != 0) {
MPASS(padding <= M_TRAILINGSPACE(last));
bzero(mtod(last, uint8_t *) + last->m_len, padding);
last->m_len += padding;
}
@ -367,9 +369,41 @@ finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp)
MPASS(m->m_len == sizeof(struct iscsi_bhs));
bhs = ip->ip_bhs;
bhs->bhs_data_segment_len[2] = ip->ip_data_len;
bhs->bhs_data_segment_len[1] = ip->ip_data_len >> 8;
bhs->bhs_data_segment_len[0] = ip->ip_data_len >> 16;
data_len = ip->ip_data_len;
if (data_len > icc->ic.ic_max_send_data_segment_length) {
struct iscsi_bhs_data_in *bhsdi;
int flags;
KASSERT(padding == 0, ("%s: ISO with padding %d for icp %p",
__func__, padding, icp));
switch (bhs->bhs_opcode) {
case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
flags = 1;
break;
case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
flags = 2;
break;
default:
panic("invalid opcode %#x for ISO", bhs->bhs_opcode);
}
data_len = icc->ic.ic_max_send_data_segment_length;
bhsdi = (struct iscsi_bhs_data_in *)bhs;
if (bhsdi->bhsdi_flags & BHSDI_FLAGS_F) {
/*
* Firmware will set F on the final PDU in the
* burst.
*/
flags |= CXGBE_ISO_F;
bhsdi->bhsdi_flags &= ~BHSDI_FLAGS_F;
}
set_mbuf_iscsi_iso(m, true);
set_mbuf_iscsi_iso_flags(m, flags);
set_mbuf_iscsi_iso_mss(m, data_len);
}
bhs->bhs_data_segment_len[2] = data_len;
bhs->bhs_data_segment_len[1] = data_len >> 8;
bhs->bhs_data_segment_len[0] = data_len >> 16;
/*
* Extract mbuf chain from PDU.
@ -477,7 +511,8 @@ icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip,
}
MPASS(len == 0);
}
MPASS(ip->ip_data_len <= ic->ic_max_send_data_segment_length);
MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length,
ic->ic_hw_isomax));
return (0);
}
@ -748,7 +783,7 @@ icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd)
struct tcpcb *tp;
struct toepcb *toep;
cap_rights_t rights;
int error;
int error, max_iso_pdus;
MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
ICL_CONN_LOCK_ASSERT_NOT(ic);
@ -815,12 +850,21 @@ icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd)
icc->ulp_submode |= ULP_CRC_HEADER;
if (ic->ic_data_crc32c)
icc->ulp_submode |= ULP_CRC_DATA;
if (icc->sc->tt.iso && chip_id(icc->sc) >= CHELSIO_T5) {
max_iso_pdus = CXGBEI_MAX_ISO_PAYLOAD /
ci->max_tx_pdu_len;
ic->ic_hw_isomax = max_iso_pdus *
ic->ic_max_send_data_segment_length;
} else
max_iso_pdus = 1;
so->so_options |= SO_NO_DDP;
toep->params.ulp_mode = ULP_MODE_ISCSI;
toep->ulpcb = icc;
send_iscsi_flowc_wr(icc->sc, toep, roundup(ci->max_tx_pdu_len,
tp->t_maxseg));
send_iscsi_flowc_wr(icc->sc, toep,
roundup(max_iso_pdus * ci->max_tx_pdu_len, tp->t_maxseg));
set_ulp_mode_iscsi(icc->sc, toep, icc->ulp_submode);
error = 0;
}

View File

@ -233,6 +233,7 @@ struct tom_tunables {
int cop_managed_offloading;
int autorcvbuf_inc;
int update_hc_on_pmtu_change;
int iso;
};
/* iWARP driver tunables */

View File

@ -7590,6 +7590,10 @@ t4_sysctls(struct adapter *sc)
&sc->tt.update_hc_on_pmtu_change, 0,
"Update hostcache entry if the PMTU changes");
sc->tt.iso = 1;
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "iso", CTLFLAG_RW,
&sc->tt.iso, 0, "Enable iSCSI segmentation offload");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
sysctl_tp_tick, "A", "TP timer tick (us)");
@ -11896,6 +11900,7 @@ clear_stats(struct adapter *sc, u_int port_id)
ofld_txq->wrq.tx_wrs_copied = 0;
counter_u64_zero(ofld_txq->tx_iscsi_pdus);
counter_u64_zero(ofld_txq->tx_iscsi_octets);
counter_u64_zero(ofld_txq->tx_iscsi_iso_wrs);
counter_u64_zero(ofld_txq->tx_toe_tls_records);
counter_u64_zero(ofld_txq->tx_toe_tls_octets);
}

View File

@ -4787,6 +4787,7 @@ alloc_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq, int idx)
ofld_txq->tx_iscsi_pdus = counter_u64_alloc(M_WAITOK);
ofld_txq->tx_iscsi_octets = counter_u64_alloc(M_WAITOK);
ofld_txq->tx_iscsi_iso_wrs = counter_u64_alloc(M_WAITOK);
ofld_txq->tx_toe_tls_records = counter_u64_alloc(M_WAITOK);
ofld_txq->tx_toe_tls_octets = counter_u64_alloc(M_WAITOK);
add_ofld_txq_sysctls(&vi->ctx, oid, ofld_txq);
@ -4824,6 +4825,7 @@ free_ofld_txq(struct vi_info *vi, struct sge_ofld_txq *ofld_txq)
MPASS(!(eq->flags & EQ_HW_ALLOCATED));
counter_u64_free(ofld_txq->tx_iscsi_pdus);
counter_u64_free(ofld_txq->tx_iscsi_octets);
counter_u64_free(ofld_txq->tx_iscsi_iso_wrs);
counter_u64_free(ofld_txq->tx_toe_tls_records);
counter_u64_free(ofld_txq->tx_toe_tls_octets);
free_wrq(sc, &ofld_txq->wrq);
@ -4848,6 +4850,9 @@ add_ofld_txq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_octets",
CTLFLAG_RD, &ofld_txq->tx_iscsi_octets,
"# of payload octets in transmitted iSCSI PDUs");
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_iscsi_iso_wrs",
CTLFLAG_RD, &ofld_txq->tx_iscsi_iso_wrs,
"# of iSCSI segmentation offload work requests");
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "tx_toe_tls_records",
CTLFLAG_RD, &ofld_txq->tx_toe_tls_records,
"# of TOE TLS records transmitted");

View File

@ -67,6 +67,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <dev/iscsi/iscsi_proto.h>
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
@ -516,38 +518,44 @@ t4_close_conn(struct adapter *sc, struct toepcb *toep)
#define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
#define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
#define MIN_ISO_TX_CREDITS (howmany(sizeof(struct cpl_tx_data_iso), 16))
#define MIN_TX_CREDITS(iso) \
(MIN_OFLD_TX_CREDITS + ((iso) ? MIN_ISO_TX_CREDITS : 0))
/* Maximum amount of immediate data we could stuff in a WR */
static inline int
max_imm_payload(int tx_credits)
max_imm_payload(int tx_credits, int iso)
{
const int iso_cpl_size = iso ? sizeof(struct cpl_tx_data_iso) : 0;
const int n = 1; /* Use no more than one desc for imm. data WR */
KASSERT(tx_credits >= 0 &&
tx_credits <= MAX_OFLD_TX_CREDITS,
("%s: %d credits", __func__, tx_credits));
if (tx_credits < MIN_OFLD_TX_CREDITS)
if (tx_credits < MIN_TX_CREDITS(iso))
return (0);
if (tx_credits >= (n * EQ_ESIZE) / 16)
return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr));
return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr) -
iso_cpl_size);
else
return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr));
return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr) -
iso_cpl_size);
}
/* Maximum number of SGL entries we could stuff in a WR */
static inline int
max_dsgl_nsegs(int tx_credits)
max_dsgl_nsegs(int tx_credits, int iso)
{
int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS;
int sge_pair_credits = tx_credits - MIN_TX_CREDITS(iso);
KASSERT(tx_credits >= 0 &&
tx_credits <= MAX_OFLD_TX_CREDITS,
("%s: %d credits", __func__, tx_credits));
if (tx_credits < MIN_OFLD_TX_CREDITS)
if (tx_credits < MIN_TX_CREDITS(iso))
return (0);
nseg += 2 * (sge_pair_credits * 16 / 24);
@ -558,12 +566,13 @@ max_dsgl_nsegs(int tx_credits)
}
static inline void
write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
unsigned int plen, uint8_t credits, int shove, int ulp_submode)
write_tx_wr(void *dst, struct toepcb *toep, int fw_wr_opcode,
unsigned int immdlen, unsigned int plen, uint8_t credits, int shove,
int ulp_submode)
{
struct fw_ofld_tx_data_wr *txwr = dst;
txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) |
txwr->op_to_immdlen = htobe32(V_WR_OP(fw_wr_opcode) |
V_FW_WR_IMMDLEN(immdlen));
txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
V_FW_WR_LEN16(credits));
@ -707,8 +716,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
txsd = &toep->txsd[toep->txsd_pidx];
do {
tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
max_imm = max_imm_payload(tx_credits);
max_nsegs = max_dsgl_nsegs(tx_credits);
max_imm = max_imm_payload(tx_credits, 0);
max_nsegs = max_dsgl_nsegs(tx_credits, 0);
SOCKBUF_LOCK(sb);
sowwakeup = drop;
@ -832,7 +841,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
}
txwr = wrtod(wr);
credits = howmany(wr->wr_len, 16);
write_tx_wr(txwr, toep, plen, plen, credits, shove, 0);
write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, plen, plen,
credits, shove, 0);
m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
nsegs = 0;
} else {
@ -851,7 +861,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
}
txwr = wrtod(wr);
credits = howmany(wr_len, 16);
write_tx_wr(txwr, toep, 0, plen, credits, shove, 0);
write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, 0, plen,
credits, shove, 0);
write_tx_sgl(txwr + 1, sndptr, m, nsegs,
max_nsegs_1mbuf);
if (wr_len & 0xf) {
@ -927,18 +938,71 @@ rqdrop_locked(struct mbufq *q, int plen)
}
}
/*
* Not a bit in the TCB, but is a bit in the ulp_submode field of the
* CPL_TX_DATA flags field in FW_ISCSI_TX_DATA_WR.
*/
#define ULP_ISO G_TX_ULP_SUBMODE(F_FW_ISCSI_TX_DATA_WR_ULPSUBMODE_ISO)
static void
write_tx_data_iso(void *dst, u_int ulp_submode, uint8_t flags, uint16_t mss,
int len, int npdu)
{
struct cpl_tx_data_iso *cpl;
unsigned int burst_size;
unsigned int last;
/*
* The firmware will set the 'F' bit on the last PDU when
* either condition is true:
*
* - this large PDU is marked as the "last" slice
*
* - the amount of data payload bytes equals the burst_size
*
* The strategy used here is to always set the burst_size
* artificially high (len includes the size of the template
* BHS) and only set the "last" flag if the original PDU had
* 'F' set.
*/
burst_size = len;
last = !!(flags & CXGBE_ISO_F);
cpl = (struct cpl_tx_data_iso *)dst;
cpl->op_to_scsi = htonl(V_CPL_TX_DATA_ISO_OP(CPL_TX_DATA_ISO) |
V_CPL_TX_DATA_ISO_FIRST(1) | V_CPL_TX_DATA_ISO_LAST(last) |
V_CPL_TX_DATA_ISO_CPLHDRLEN(0) |
V_CPL_TX_DATA_ISO_HDRCRC(!!(ulp_submode & ULP_CRC_HEADER)) |
V_CPL_TX_DATA_ISO_PLDCRC(!!(ulp_submode & ULP_CRC_DATA)) |
V_CPL_TX_DATA_ISO_IMMEDIATE(0) |
V_CPL_TX_DATA_ISO_SCSI(CXGBE_ISO_TYPE(flags)));
cpl->ahs_len = 0;
cpl->mpdu = htons(DIV_ROUND_UP(mss, 4));
cpl->burst_size = htonl(DIV_ROUND_UP(burst_size, 4));
cpl->len = htonl(len);
cpl->reserved2_seglen_offset = htonl(0);
cpl->datasn_offset = htonl(0);
cpl->buffer_offset = htonl(0);
cpl->reserved3 = 0;
}
static struct wrqe *
write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
{
struct mbuf *m;
struct fw_ofld_tx_data_wr *txwr;
struct cpl_tx_data_iso *cpl_iso;
void *p;
struct wrqe *wr;
u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
u_int adjusted_plen, ulp_submode;
u_int adjusted_plen, imm_data, ulp_submode;
struct inpcb *inp = toep->inp;
struct tcpcb *tp = intotcpcb(inp);
int tx_credits, shove;
int tx_credits, shove, npdu, wr_len;
uint16_t iso_mss;
static const u_int ulp_extra_len[] = {0, 4, 4, 8};
bool iso;
M_ASSERTPKTHDR(sndptr);
@ -958,8 +1022,10 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
return (wr);
}
max_imm = max_imm_payload(tx_credits);
max_nsegs = max_dsgl_nsegs(tx_credits);
iso = mbuf_iscsi_iso(sndptr);
max_imm = max_imm_payload(tx_credits, iso);
max_nsegs = max_dsgl_nsegs(tx_credits, iso);
iso_mss = mbuf_iscsi_iso_mss(sndptr);
plen = 0;
nsegs = 0;
@ -993,8 +1059,6 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
MPASS(sndptr->m_pkthdr.len == plen);
shove = !(tp->t_flags & TF_MORETOCOME);
ulp_submode = mbuf_ulp_submode(sndptr);
MPASS(ulp_submode < nitems(ulp_extra_len));
/*
* plen doesn't include header and data digests, which are
@ -1002,51 +1066,73 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
* they do occupy TCP sequence space and need to be accounted
* for.
*/
adjusted_plen = plen + ulp_extra_len[ulp_submode];
ulp_submode = mbuf_ulp_submode(sndptr);
MPASS(ulp_submode < nitems(ulp_extra_len));
npdu = iso ? howmany(plen - ISCSI_BHS_SIZE, iso_mss) : 1;
adjusted_plen = plen + ulp_extra_len[ulp_submode] * npdu;
if (iso)
adjusted_plen += ISCSI_BHS_SIZE * (npdu - 1);
wr_len = sizeof(*txwr);
if (iso)
wr_len += sizeof(struct cpl_tx_data_iso);
if (plen <= max_imm) {
/* Immediate data tx */
wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
&toep->ofld_txq->wrq);
if (wr == NULL) {
/* XXX: how will we recover from this? */
return (NULL);
}
txwr = wrtod(wr);
credits = howmany(wr->wr_len, 16);
write_tx_wr(txwr, toep, plen, adjusted_plen, credits,
shove, ulp_submode);
m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
imm_data = plen;
wr_len += plen;
nsegs = 0;
} else {
int wr_len;
/* DSGL tx */
wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
imm_data = 0;
wr_len += sizeof(struct ulptx_sgl) +
((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
wr = alloc_wrqe(roundup2(wr_len, 16),
&toep->ofld_txq->wrq);
if (wr == NULL) {
/* XXX: how will we recover from this? */
return (NULL);
}
txwr = wrtod(wr);
credits = howmany(wr_len, 16);
write_tx_wr(txwr, toep, 0, adjusted_plen, credits,
shove, ulp_submode);
write_tx_sgl(txwr + 1, sndptr, m, nsegs, max_nsegs_1mbuf);
}
wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq);
if (wr == NULL) {
/* XXX: how will we recover from this? */
return (NULL);
}
txwr = wrtod(wr);
credits = howmany(wr->wr_len, 16);
if (iso) {
write_tx_wr(txwr, toep, FW_ISCSI_TX_DATA_WR,
imm_data + sizeof(struct cpl_tx_data_iso),
adjusted_plen, credits, shove, ulp_submode | ULP_ISO);
cpl_iso = (struct cpl_tx_data_iso *)(txwr + 1);
MPASS(plen == sndptr->m_pkthdr.len);
write_tx_data_iso(cpl_iso, ulp_submode,
mbuf_iscsi_iso_flags(sndptr), iso_mss, plen, npdu);
p = cpl_iso + 1;
} else {
write_tx_wr(txwr, toep, FW_OFLD_TX_DATA_WR, imm_data,
adjusted_plen, credits, shove, ulp_submode);
p = txwr + 1;
}
if (imm_data != 0) {
m_copydata(sndptr, 0, plen, p);
} else {
write_tx_sgl(p, sndptr, m, nsegs, max_nsegs_1mbuf);
if (wr_len & 0xf) {
uint64_t *pad = (uint64_t *)((uintptr_t)txwr + wr_len);
*pad = 0;
}
}
KASSERT(toep->tx_credits >= credits,
("%s: not enough credits: credits %u "
"toep->tx_credits %u tx_credits %u nsegs %u "
"max_nsegs %u iso %d", __func__, credits,
toep->tx_credits, tx_credits, nsegs, max_nsegs, iso));
tp->snd_nxt += adjusted_plen;
tp->snd_max += adjusted_plen;
counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, 1);
counter_u64_add(toep->ofld_txq->tx_iscsi_pdus, npdu);
counter_u64_add(toep->ofld_txq->tx_iscsi_octets, plen);
if (iso)
counter_u64_add(toep->ofld_txq->tx_iscsi_iso_wrs, 1);
return (wr);
}

View File

@ -362,6 +362,58 @@ mbuf_ulp_submode(struct mbuf *m)
return (m->m_pkthdr.PH_per.eight[0]);
}
static inline void
set_mbuf_iscsi_iso(struct mbuf *m, bool iso)
{
M_ASSERTPKTHDR(m);
m->m_pkthdr.PH_per.eight[1] = iso;
}
static inline bool
mbuf_iscsi_iso(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.PH_per.eight[1]);
}
/* Flags for iSCSI segmentation offload. */
#define CXGBE_ISO_TYPE(flags) ((flags) & 0x3)
#define CXGBE_ISO_F 0x4
static inline void
set_mbuf_iscsi_iso_flags(struct mbuf *m, uint8_t flags)
{
M_ASSERTPKTHDR(m);
m->m_pkthdr.PH_per.eight[2] = flags;
}
static inline uint8_t
mbuf_iscsi_iso_flags(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.PH_per.eight[2]);
}
static inline void
set_mbuf_iscsi_iso_mss(struct mbuf *m, uint16_t mss)
{
M_ASSERTPKTHDR(m);
m->m_pkthdr.PH_per.sixteen[2] = mss;
}
static inline uint16_t
mbuf_iscsi_iso_mss(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
return (m->m_pkthdr.PH_per.sixteen[2]);
}
/* t4_tom.c */
struct toepcb *alloc_toepcb(struct vi_info *, int);
int init_toepcb(struct vi_info *, struct toepcb *);