net/sfc: support TSO for EF100 native datapath

Riverhead boards support TSO version 3.

Signed-off-by: Ivan Malov <ivan.malov@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <arybchenko@solarflare.com>
This commit is contained in:
Ivan Malov 2020-10-13 14:45:40 +01:00 committed by Ferruh Yigit
parent f71965f9df
commit 4f936666d7
5 changed files with 258 additions and 41 deletions

View File

@ -329,7 +329,7 @@ boolean parameters value.
is even more faster then **ef10** but does not support multi-segment
mbufs, disallows multiple mempools and neglects mbuf reference counters.
**ef100** chooses EF100 native datapath which supports multi-segment
mbufs, inner/outer IPv4 and TCP/UDP checksum offloads.
mbufs, inner/outer IPv4 and TCP/UDP checksum and TCP segmentation offloads.
- ``perf_profile`` [auto|throughput|low-latency] (default **throughput**)

View File

@ -205,7 +205,7 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa)
MIN(encp->enc_txq_limit,
limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count);
if (sa->tso)
if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled)
limits.edl_max_txq_count =
MIN(limits.edl_max_txq_count,
encp->enc_fw_assisted_tso_v2_n_contexts /
@ -795,7 +795,8 @@ sfc_attach(struct sfc_adapter *sa)
encp->enc_tunnel_encapsulations_supported;
if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) {
sa->tso = encp->enc_fw_assisted_tso_v2_enabled;
sa->tso = encp->enc_fw_assisted_tso_v2_enabled ||
encp->enc_tso_v3_enabled;
if (!sa->tso)
sfc_info(sa, "TSO support isn't available on this adapter");
}

View File

@ -70,6 +70,16 @@ struct sfc_dp_tx_qcreate_info {
* the hardware to apply TSO packet edits.
*/
uint16_t tso_tcp_header_offset_limit;
/** Maximum number of header DMA descriptors per TSOv3 transaction */
uint16_t tso_max_nb_header_descs;
/** Maximum header length acceptable by TSOv3 transaction */
uint16_t tso_max_header_len;
/** Maximum number of payload DMA descriptors per TSOv3 transaction */
uint16_t tso_max_nb_payload_descs;
/** Maximum payload length per TSOv3 transaction */
uint32_t tso_max_payload_len;
/** Maximum number of frames to be generated per TSOv3 transaction */
uint32_t tso_max_nb_outgoing_frames;
};
/**

View File

@ -77,6 +77,13 @@ struct sfc_ef100_txq {
unsigned int evq_phase_bit_shift;
volatile efx_qword_t *evq_hw_ring;
uint16_t tso_tcp_header_offset_limit;
uint16_t tso_max_nb_header_descs;
uint16_t tso_max_header_len;
uint16_t tso_max_nb_payload_descs;
uint32_t tso_max_payload_len;
uint32_t tso_max_nb_outgoing_frames;
/* Datapath transmit queue anchor */
struct sfc_dp_txq dp;
};
@ -87,6 +94,42 @@ sfc_ef100_txq_by_dp_txq(struct sfc_dp_txq *dp_txq)
return container_of(dp_txq, struct sfc_ef100_txq, dp);
}
static int
sfc_ef100_tx_prepare_pkt_tso(struct sfc_ef100_txq * const txq,
struct rte_mbuf *m)
{
size_t header_len = m->l2_len + m->l3_len + m->l4_len;
size_t payload_len = m->pkt_len - header_len;
unsigned long mss_conformant_max_payload_len;
unsigned int nb_payload_descs;
mss_conformant_max_payload_len =
m->tso_segsz * txq->tso_max_nb_outgoing_frames;
/*
* Don't really want to know exact number of payload segments.
* Just use total number of segments as upper limit. Practically
* maximum number of payload segments is significantly bigger
* than maximum number header segments, so we can neglect header
* segments excluded total number of segments to estimate number
* of payload segments required.
*/
nb_payload_descs = m->nb_segs;
/*
* Carry out multiple independent checks using bitwise OR
* to avoid unnecessary conditional branching.
*/
if (unlikely((header_len > txq->tso_max_header_len) |
(nb_payload_descs > txq->tso_max_nb_payload_descs) |
(payload_len > txq->tso_max_payload_len) |
(payload_len > mss_conformant_max_payload_len) |
(m->pkt_len == header_len)))
return EINVAL;
return 0;
}
static uint16_t
sfc_ef100_tx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts)
@ -110,16 +153,25 @@ sfc_ef100_tx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
(m->ol_flags & PKT_TX_L4_MASK)) {
calc_phdr_cksum = true;
max_nb_header_segs = 1;
} else if (m->ol_flags & PKT_TX_TCP_SEG) {
max_nb_header_segs = txq->tso_max_nb_header_descs;
}
ret = sfc_dp_tx_prepare_pkt(m, max_nb_header_segs, 0,
0, txq->max_fill_level, 0, 0);
txq->tso_tcp_header_offset_limit,
txq->max_fill_level, 1, 0);
if (unlikely(ret != 0)) {
rte_errno = ret;
break;
}
if (m->nb_segs > EFX_MASK32(ESF_GZ_TX_SEND_NUM_SEGS)) {
if (m->ol_flags & PKT_TX_TCP_SEG) {
ret = sfc_ef100_tx_prepare_pkt_tso(txq, m);
if (unlikely(ret != 0)) {
rte_errno = ret;
break;
}
} else if (m->nb_segs > EFX_MASK32(ESF_GZ_TX_SEND_NUM_SEGS)) {
rte_errno = EINVAL;
break;
}
@ -326,6 +378,48 @@ sfc_ef100_tx_qdesc_seg_create(rte_iova_t addr, uint16_t len,
ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG);
}
static void
sfc_ef100_tx_qdesc_tso_create(const struct rte_mbuf *m,
uint16_t nb_header_descs,
uint16_t nb_payload_descs,
size_t header_len, size_t payload_len,
size_t iph_off, size_t tcph_off,
efx_oword_t *tx_desc)
{
efx_oword_t tx_desc_extra_fields;
/*
* If no tunnel encapsulation is present, then the ED_INNER
* fields should be used.
*/
int ed_inner_ip_id = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16;
EFX_POPULATE_OWORD_7(*tx_desc,
ESF_GZ_TX_TSO_MSS, m->tso_segsz,
ESF_GZ_TX_TSO_HDR_NUM_SEGS, nb_header_descs,
ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, nb_payload_descs,
ESF_GZ_TX_TSO_ED_INNER_IP4_ID, ed_inner_ip_id,
ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1,
ESF_GZ_TX_TSO_HDR_LEN_W, header_len >> 1,
ESF_GZ_TX_TSO_PAYLOAD_LEN, payload_len);
EFX_POPULATE_OWORD_5(tx_desc_extra_fields,
/*
* Inner offsets are required for inner IPv4 ID
* and IP length edits.
*/
ESF_GZ_TX_TSO_INNER_L3_OFF_W, iph_off >> 1,
ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcph_off >> 1,
/*
* Use outer full checksum offloads which do
* not require any extra information.
*/
ESF_GZ_TX_TSO_CSO_OUTER_L3, 1,
ESF_GZ_TX_TSO_CSO_OUTER_L4, 1,
ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO);
EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields);
}
static inline void
sfc_ef100_tx_qpush(struct sfc_ef100_txq *txq, unsigned int added)
{
@ -351,30 +445,115 @@ sfc_ef100_tx_qpush(struct sfc_ef100_txq *txq, unsigned int added)
static unsigned int
sfc_ef100_tx_pkt_descs_max(const struct rte_mbuf *m)
{
unsigned int extra_descs = 0;
/** Maximum length of an mbuf segment data */
#define SFC_MBUF_SEG_LEN_MAX UINT16_MAX
RTE_BUILD_BUG_ON(sizeof(m->data_len) != 2);
/*
* mbuf segment cannot be bigger than maximum segment length and
* maximum packet length since TSO is not supported yet.
* Make sure that the first segment does not need fragmentation
* (split into many Tx descriptors).
*/
RTE_BUILD_BUG_ON(SFC_EF100_TX_SEND_DESC_LEN_MAX <
RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, SFC_MBUF_SEG_LEN_MAX));
if (m->ol_flags & PKT_TX_TCP_SEG) {
/* Tx TSO descriptor */
extra_descs++;
/*
* Extra Tx segment descriptor may be required if header
* ends in the middle of segment.
*/
extra_descs++;
} else {
/*
* mbuf segment cannot be bigger than maximum segment length
* and maximum packet length since TSO is not supported yet.
* Make sure that the first segment does not need fragmentation
* (split into many Tx descriptors).
*/
RTE_BUILD_BUG_ON(SFC_EF100_TX_SEND_DESC_LEN_MAX <
RTE_MIN((unsigned int)EFX_MAC_PDU_MAX,
SFC_MBUF_SEG_LEN_MAX));
}
/*
* Any segment of scattered packet cannot be bigger than maximum
* segment length and maximum packet length since TSO is not
* supported yet.
* Make sure that subsequent segments do not need fragmentation (split
* into many Tx descriptors).
* segment length. Make sure that subsequent segments do not need
* fragmentation (split into many Tx descriptors).
*/
RTE_BUILD_BUG_ON(SFC_EF100_TX_SEG_DESC_LEN_MAX <
RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, SFC_MBUF_SEG_LEN_MAX));
RTE_BUILD_BUG_ON(SFC_EF100_TX_SEG_DESC_LEN_MAX < SFC_MBUF_SEG_LEN_MAX);
return m->nb_segs;
return m->nb_segs + extra_descs;
}
static struct rte_mbuf *
sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
struct rte_mbuf *m, unsigned int *added)
{
struct rte_mbuf *m_seg = m;
unsigned int nb_hdr_descs;
unsigned int nb_pld_descs;
unsigned int seg_split = 0;
unsigned int tso_desc_id;
unsigned int id;
size_t iph_off;
size_t tcph_off;
size_t header_len;
size_t remaining_hdr_len;
iph_off = m->l2_len;
tcph_off = iph_off + m->l3_len;
header_len = tcph_off + m->l4_len;
/*
* Remember ID of the TX_TSO descriptor to be filled in.
* We can't fill it in right now since we need to calculate
* number of header and payload segments first and don't want
* to traverse it twice here.
*/
tso_desc_id = (*added)++ & txq->ptr_mask;
remaining_hdr_len = header_len;
do {
id = (*added)++ & txq->ptr_mask;
if (rte_pktmbuf_data_len(m_seg) <= remaining_hdr_len) {
/* The segment is fully header segment */
sfc_ef100_tx_qdesc_seg_create(
rte_mbuf_data_iova(m_seg),
rte_pktmbuf_data_len(m_seg),
&txq->txq_hw_ring[id]);
remaining_hdr_len -= rte_pktmbuf_data_len(m_seg);
} else {
/*
* The segment must be split into header and
* payload segments
*/
sfc_ef100_tx_qdesc_seg_create(
rte_mbuf_data_iova(m_seg),
remaining_hdr_len,
&txq->txq_hw_ring[id]);
SFC_ASSERT(txq->sw_ring[id].mbuf == NULL);
id = (*added)++ & txq->ptr_mask;
sfc_ef100_tx_qdesc_seg_create(
rte_mbuf_data_iova(m_seg) + remaining_hdr_len,
rte_pktmbuf_data_len(m_seg) - remaining_hdr_len,
&txq->txq_hw_ring[id]);
remaining_hdr_len = 0;
seg_split = 1;
}
txq->sw_ring[id].mbuf = m_seg;
m_seg = m_seg->next;
} while (remaining_hdr_len > 0);
/*
* If a segment is split into header and payload segments, added
* pointer counts it twice and we should correct it.
*/
nb_hdr_descs = ((id - tso_desc_id) & txq->ptr_mask) - seg_split;
nb_pld_descs = m->nb_segs - nb_hdr_descs + seg_split;
sfc_ef100_tx_qdesc_tso_create(m, nb_hdr_descs, nb_pld_descs, header_len,
rte_pktmbuf_pkt_len(m) - header_len,
iph_off, tcph_off,
&txq->txq_hw_ring[tso_desc_id]);
return m_seg;
}
static uint16_t
@ -428,27 +607,33 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
break;
}
id = added++ & txq->ptr_mask;
sfc_ef100_tx_qdesc_send_create(m_seg, &txq->txq_hw_ring[id]);
if (m_seg->ol_flags & PKT_TX_TCP_SEG) {
m_seg = sfc_ef100_xmit_tso_pkt(txq, m_seg, &added);
} else {
id = added++ & txq->ptr_mask;
sfc_ef100_tx_qdesc_send_create(m_seg,
&txq->txq_hw_ring[id]);
/*
* rte_pktmbuf_free() is commonly used in DPDK for
* recycling packets - the function checks every
* segment's reference counter and returns the
* buffer to its pool whenever possible;
* nevertheless, freeing mbuf segments one by one
* may entail some performance decline;
* from this point, sfc_efx_tx_reap() does the same job
* on its own and frees buffers in bulks (all mbufs
* within a bulk belong to the same pool);
* from this perspective, individual segment pointers
* must be associated with the corresponding SW
* descriptors independently so that only one loop
* is sufficient on reap to inspect all the buffers
*/
txq->sw_ring[id].mbuf = m_seg;
/*
* rte_pktmbuf_free() is commonly used in DPDK for
* recycling packets - the function checks every
* segment's reference counter and returns the
* buffer to its pool whenever possible;
* nevertheless, freeing mbuf segments one by one
* may entail some performance decline;
* from this point, sfc_efx_tx_reap() does the same job
* on its own and frees buffers in bulks (all mbufs
* within a bulk belong to the same pool);
* from this perspective, individual segment pointers
* must be associated with the corresponding SW
* descriptors independently so that only one loop
* is sufficient on reap to inspect all the buffers
*/
txq->sw_ring[id].mbuf = m_seg;
m_seg = m_seg->next;
}
while ((m_seg = m_seg->next) != NULL) {
while (m_seg != NULL) {
RTE_BUILD_BUG_ON(SFC_MBUF_SEG_LEN_MAX >
SFC_EF100_TX_SEG_DESC_LEN_MAX);
@ -457,6 +642,7 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
rte_pktmbuf_data_len(m_seg),
&txq->txq_hw_ring[id]);
txq->sw_ring[id].mbuf = m_seg;
m_seg = m_seg->next;
}
dma_desc_space -= (added - pkt_start);
@ -552,6 +738,13 @@ sfc_ef100_tx_qcreate(uint16_t port_id, uint16_t queue_id,
(info->hw_index << info->vi_window_shift);
txq->evq_hw_ring = info->evq_hw_ring;
txq->tso_tcp_header_offset_limit = info->tso_tcp_header_offset_limit;
txq->tso_max_nb_header_descs = info->tso_max_nb_header_descs;
txq->tso_max_header_len = info->tso_max_header_len;
txq->tso_max_nb_payload_descs = info->tso_max_nb_payload_descs;
txq->tso_max_payload_len = info->tso_max_payload_len;
txq->tso_max_nb_outgoing_frames = info->tso_max_nb_outgoing_frames;
sfc_ef100_tx_debug(txq, "TxQ doorbell is %p", txq->doorbell);
*dp_txqp = &txq->dp;
@ -690,7 +883,8 @@ struct sfc_dp_tx sfc_ef100_tx = {
DEV_TX_OFFLOAD_OUTER_UDP_CKSUM |
DEV_TX_OFFLOAD_UDP_CKSUM |
DEV_TX_OFFLOAD_TCP_CKSUM |
DEV_TX_OFFLOAD_MULTI_SEGS,
DEV_TX_OFFLOAD_MULTI_SEGS |
DEV_TX_OFFLOAD_TCP_TSO,
.get_dev_info = sfc_ef100_get_dev_info,
.qsize_up_rings = sfc_ef100_tx_qsize_up_rings,
.qcreate = sfc_ef100_tx_qcreate,

View File

@ -188,6 +188,17 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
info.vi_window_shift = encp->enc_vi_window_shift;
info.tso_tcp_header_offset_limit =
encp->enc_tx_tso_tcp_header_offset_limit;
info.tso_max_nb_header_descs =
RTE_MIN(encp->enc_tx_tso_max_header_ndescs,
(uint32_t)UINT16_MAX);
info.tso_max_header_len =
RTE_MIN(encp->enc_tx_tso_max_header_length,
(uint32_t)UINT16_MAX);
info.tso_max_nb_payload_descs =
RTE_MIN(encp->enc_tx_tso_max_payload_ndescs,
(uint32_t)UINT16_MAX);
info.tso_max_payload_len = encp->enc_tx_tso_max_payload_length;
info.tso_max_nb_outgoing_frames = encp->enc_tx_tso_max_nframes;
rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index,
&RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
@ -592,7 +603,8 @@ sfc_tx_start(struct sfc_adapter *sa)
sfc_log_init(sa, "txq_count = %u", sas->txq_count);
if (sa->tso) {
if (!encp->enc_fw_assisted_tso_v2_enabled) {
if (!encp->enc_fw_assisted_tso_v2_enabled &&
!encp->enc_tso_v3_enabled) {
sfc_warn(sa, "TSO support was unable to be restored");
sa->tso = B_FALSE;
sa->tso_encap = B_FALSE;