diff --git a/doc/guides/nics/sfc_efx.rst b/doc/guides/nics/sfc_efx.rst index f3135fdd70..104ab38aa9 100644 --- a/doc/guides/nics/sfc_efx.rst +++ b/doc/guides/nics/sfc_efx.rst @@ -329,7 +329,7 @@ boolean parameters value. is even more faster then **ef10** but does not support multi-segment mbufs, disallows multiple mempools and neglects mbuf reference counters. **ef100** chooses EF100 native datapath which supports multi-segment - mbufs, inner/outer IPv4 and TCP/UDP checksum offloads. + mbufs, inner/outer IPv4 and TCP/UDP checksum and TCP segmentation offloads. - ``perf_profile`` [auto|throughput|low-latency] (default **throughput**) diff --git a/drivers/net/sfc/sfc.c b/drivers/net/sfc/sfc.c index cfba485ad2..b41db65003 100644 --- a/drivers/net/sfc/sfc.c +++ b/drivers/net/sfc/sfc.c @@ -205,7 +205,7 @@ sfc_estimate_resource_limits(struct sfc_adapter *sa) MIN(encp->enc_txq_limit, limits.edl_max_evq_count - 1 - limits.edl_max_rxq_count); - if (sa->tso) + if (sa->tso && encp->enc_fw_assisted_tso_v2_enabled) limits.edl_max_txq_count = MIN(limits.edl_max_txq_count, encp->enc_fw_assisted_tso_v2_n_contexts / @@ -795,7 +795,8 @@ sfc_attach(struct sfc_adapter *sa) encp->enc_tunnel_encapsulations_supported; if (sfc_dp_tx_offload_capa(sa->priv.dp_tx) & DEV_TX_OFFLOAD_TCP_TSO) { - sa->tso = encp->enc_fw_assisted_tso_v2_enabled; + sa->tso = encp->enc_fw_assisted_tso_v2_enabled || + encp->enc_tso_v3_enabled; if (!sa->tso) sfc_info(sa, "TSO support isn't available on this adapter"); } diff --git a/drivers/net/sfc/sfc_dp_tx.h b/drivers/net/sfc/sfc_dp_tx.h index bed8ce84aa..3ecdfcdd28 100644 --- a/drivers/net/sfc/sfc_dp_tx.h +++ b/drivers/net/sfc/sfc_dp_tx.h @@ -70,6 +70,16 @@ struct sfc_dp_tx_qcreate_info { * the hardware to apply TSO packet edits. */ uint16_t tso_tcp_header_offset_limit; + /** Maximum number of header DMA descriptors per TSOv3 transaction */ + uint16_t tso_max_nb_header_descs; + /** Maximum header length acceptable by TSOv3 transaction */ + uint16_t tso_max_header_len; + /** Maximum number of payload DMA descriptors per TSOv3 transaction */ + uint16_t tso_max_nb_payload_descs; + /** Maximum payload length per TSOv3 transaction */ + uint32_t tso_max_payload_len; + /** Maximum number of frames to be generated per TSOv3 transaction */ + uint32_t tso_max_nb_outgoing_frames; }; /** diff --git a/drivers/net/sfc/sfc_ef100_tx.c b/drivers/net/sfc/sfc_ef100_tx.c index 3cda0227b4..e30d5369d7 100644 --- a/drivers/net/sfc/sfc_ef100_tx.c +++ b/drivers/net/sfc/sfc_ef100_tx.c @@ -77,6 +77,13 @@ struct sfc_ef100_txq { unsigned int evq_phase_bit_shift; volatile efx_qword_t *evq_hw_ring; + uint16_t tso_tcp_header_offset_limit; + uint16_t tso_max_nb_header_descs; + uint16_t tso_max_header_len; + uint16_t tso_max_nb_payload_descs; + uint32_t tso_max_payload_len; + uint32_t tso_max_nb_outgoing_frames; + /* Datapath transmit queue anchor */ struct sfc_dp_txq dp; }; @@ -87,6 +94,42 @@ sfc_ef100_txq_by_dp_txq(struct sfc_dp_txq *dp_txq) return container_of(dp_txq, struct sfc_ef100_txq, dp); } +static int +sfc_ef100_tx_prepare_pkt_tso(struct sfc_ef100_txq * const txq, + struct rte_mbuf *m) +{ + size_t header_len = m->l2_len + m->l3_len + m->l4_len; + size_t payload_len = m->pkt_len - header_len; + unsigned long mss_conformant_max_payload_len; + unsigned int nb_payload_descs; + + mss_conformant_max_payload_len = + m->tso_segsz * txq->tso_max_nb_outgoing_frames; + + /* + * Don't really want to know exact number of payload segments. + * Just use total number of segments as upper limit. Practically + * maximum number of payload segments is significantly bigger + * than maximum number header segments, so we can neglect header + * segments excluded total number of segments to estimate number + * of payload segments required. + */ + nb_payload_descs = m->nb_segs; + + /* + * Carry out multiple independent checks using bitwise OR + * to avoid unnecessary conditional branching. + */ + if (unlikely((header_len > txq->tso_max_header_len) | + (nb_payload_descs > txq->tso_max_nb_payload_descs) | + (payload_len > txq->tso_max_payload_len) | + (payload_len > mss_conformant_max_payload_len) | + (m->pkt_len == header_len))) + return EINVAL; + + return 0; +} + static uint16_t sfc_ef100_tx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -110,16 +153,25 @@ sfc_ef100_tx_prepare_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, (m->ol_flags & PKT_TX_L4_MASK)) { calc_phdr_cksum = true; max_nb_header_segs = 1; + } else if (m->ol_flags & PKT_TX_TCP_SEG) { + max_nb_header_segs = txq->tso_max_nb_header_descs; } ret = sfc_dp_tx_prepare_pkt(m, max_nb_header_segs, 0, - 0, txq->max_fill_level, 0, 0); + txq->tso_tcp_header_offset_limit, + txq->max_fill_level, 1, 0); if (unlikely(ret != 0)) { rte_errno = ret; break; } - if (m->nb_segs > EFX_MASK32(ESF_GZ_TX_SEND_NUM_SEGS)) { + if (m->ol_flags & PKT_TX_TCP_SEG) { + ret = sfc_ef100_tx_prepare_pkt_tso(txq, m); + if (unlikely(ret != 0)) { + rte_errno = ret; + break; + } + } else if (m->nb_segs > EFX_MASK32(ESF_GZ_TX_SEND_NUM_SEGS)) { rte_errno = EINVAL; break; } @@ -326,6 +378,48 @@ sfc_ef100_tx_qdesc_seg_create(rte_iova_t addr, uint16_t len, ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_SEG); } +static void +sfc_ef100_tx_qdesc_tso_create(const struct rte_mbuf *m, + uint16_t nb_header_descs, + uint16_t nb_payload_descs, + size_t header_len, size_t payload_len, + size_t iph_off, size_t tcph_off, + efx_oword_t *tx_desc) +{ + efx_oword_t tx_desc_extra_fields; + /* + * If no tunnel encapsulation is present, then the ED_INNER + * fields should be used. + */ + int ed_inner_ip_id = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; + + EFX_POPULATE_OWORD_7(*tx_desc, + ESF_GZ_TX_TSO_MSS, m->tso_segsz, + ESF_GZ_TX_TSO_HDR_NUM_SEGS, nb_header_descs, + ESF_GZ_TX_TSO_PAYLOAD_NUM_SEGS, nb_payload_descs, + ESF_GZ_TX_TSO_ED_INNER_IP4_ID, ed_inner_ip_id, + ESF_GZ_TX_TSO_ED_INNER_IP_LEN, 1, + ESF_GZ_TX_TSO_HDR_LEN_W, header_len >> 1, + ESF_GZ_TX_TSO_PAYLOAD_LEN, payload_len); + + EFX_POPULATE_OWORD_5(tx_desc_extra_fields, + /* + * Inner offsets are required for inner IPv4 ID + * and IP length edits. + */ + ESF_GZ_TX_TSO_INNER_L3_OFF_W, iph_off >> 1, + ESF_GZ_TX_TSO_INNER_L4_OFF_W, tcph_off >> 1, + /* + * Use outer full checksum offloads which do + * not require any extra information. + */ + ESF_GZ_TX_TSO_CSO_OUTER_L3, 1, + ESF_GZ_TX_TSO_CSO_OUTER_L4, 1, + ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_TSO); + + EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields); +} + static inline void sfc_ef100_tx_qpush(struct sfc_ef100_txq *txq, unsigned int added) { @@ -351,30 +445,115 @@ sfc_ef100_tx_qpush(struct sfc_ef100_txq *txq, unsigned int added) static unsigned int sfc_ef100_tx_pkt_descs_max(const struct rte_mbuf *m) { + unsigned int extra_descs = 0; + /** Maximum length of an mbuf segment data */ #define SFC_MBUF_SEG_LEN_MAX UINT16_MAX RTE_BUILD_BUG_ON(sizeof(m->data_len) != 2); - /* - * mbuf segment cannot be bigger than maximum segment length and - * maximum packet length since TSO is not supported yet. - * Make sure that the first segment does not need fragmentation - * (split into many Tx descriptors). - */ - RTE_BUILD_BUG_ON(SFC_EF100_TX_SEND_DESC_LEN_MAX < - RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, SFC_MBUF_SEG_LEN_MAX)); + if (m->ol_flags & PKT_TX_TCP_SEG) { + /* Tx TSO descriptor */ + extra_descs++; + /* + * Extra Tx segment descriptor may be required if header + * ends in the middle of segment. + */ + extra_descs++; + } else { + /* + * mbuf segment cannot be bigger than maximum segment length + * and maximum packet length since TSO is not supported yet. + * Make sure that the first segment does not need fragmentation + * (split into many Tx descriptors). + */ + RTE_BUILD_BUG_ON(SFC_EF100_TX_SEND_DESC_LEN_MAX < + RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, + SFC_MBUF_SEG_LEN_MAX)); + } /* * Any segment of scattered packet cannot be bigger than maximum - * segment length and maximum packet length since TSO is not - * supported yet. - * Make sure that subsequent segments do not need fragmentation (split - * into many Tx descriptors). + * segment length. Make sure that subsequent segments do not need + * fragmentation (split into many Tx descriptors). */ - RTE_BUILD_BUG_ON(SFC_EF100_TX_SEG_DESC_LEN_MAX < - RTE_MIN((unsigned int)EFX_MAC_PDU_MAX, SFC_MBUF_SEG_LEN_MAX)); + RTE_BUILD_BUG_ON(SFC_EF100_TX_SEG_DESC_LEN_MAX < SFC_MBUF_SEG_LEN_MAX); - return m->nb_segs; + return m->nb_segs + extra_descs; +} + +static struct rte_mbuf * +sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq, + struct rte_mbuf *m, unsigned int *added) +{ + struct rte_mbuf *m_seg = m; + unsigned int nb_hdr_descs; + unsigned int nb_pld_descs; + unsigned int seg_split = 0; + unsigned int tso_desc_id; + unsigned int id; + size_t iph_off; + size_t tcph_off; + size_t header_len; + size_t remaining_hdr_len; + + iph_off = m->l2_len; + tcph_off = iph_off + m->l3_len; + header_len = tcph_off + m->l4_len; + + /* + * Remember ID of the TX_TSO descriptor to be filled in. + * We can't fill it in right now since we need to calculate + * number of header and payload segments first and don't want + * to traverse it twice here. + */ + tso_desc_id = (*added)++ & txq->ptr_mask; + + remaining_hdr_len = header_len; + do { + id = (*added)++ & txq->ptr_mask; + if (rte_pktmbuf_data_len(m_seg) <= remaining_hdr_len) { + /* The segment is fully header segment */ + sfc_ef100_tx_qdesc_seg_create( + rte_mbuf_data_iova(m_seg), + rte_pktmbuf_data_len(m_seg), + &txq->txq_hw_ring[id]); + remaining_hdr_len -= rte_pktmbuf_data_len(m_seg); + } else { + /* + * The segment must be split into header and + * payload segments + */ + sfc_ef100_tx_qdesc_seg_create( + rte_mbuf_data_iova(m_seg), + remaining_hdr_len, + &txq->txq_hw_ring[id]); + SFC_ASSERT(txq->sw_ring[id].mbuf == NULL); + + id = (*added)++ & txq->ptr_mask; + sfc_ef100_tx_qdesc_seg_create( + rte_mbuf_data_iova(m_seg) + remaining_hdr_len, + rte_pktmbuf_data_len(m_seg) - remaining_hdr_len, + &txq->txq_hw_ring[id]); + remaining_hdr_len = 0; + seg_split = 1; + } + txq->sw_ring[id].mbuf = m_seg; + m_seg = m_seg->next; + } while (remaining_hdr_len > 0); + + /* + * If a segment is split into header and payload segments, added + * pointer counts it twice and we should correct it. + */ + nb_hdr_descs = ((id - tso_desc_id) & txq->ptr_mask) - seg_split; + nb_pld_descs = m->nb_segs - nb_hdr_descs + seg_split; + + sfc_ef100_tx_qdesc_tso_create(m, nb_hdr_descs, nb_pld_descs, header_len, + rte_pktmbuf_pkt_len(m) - header_len, + iph_off, tcph_off, + &txq->txq_hw_ring[tso_desc_id]); + + return m_seg; } static uint16_t @@ -428,27 +607,33 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) break; } - id = added++ & txq->ptr_mask; - sfc_ef100_tx_qdesc_send_create(m_seg, &txq->txq_hw_ring[id]); + if (m_seg->ol_flags & PKT_TX_TCP_SEG) { + m_seg = sfc_ef100_xmit_tso_pkt(txq, m_seg, &added); + } else { + id = added++ & txq->ptr_mask; + sfc_ef100_tx_qdesc_send_create(m_seg, + &txq->txq_hw_ring[id]); - /* - * rte_pktmbuf_free() is commonly used in DPDK for - * recycling packets - the function checks every - * segment's reference counter and returns the - * buffer to its pool whenever possible; - * nevertheless, freeing mbuf segments one by one - * may entail some performance decline; - * from this point, sfc_efx_tx_reap() does the same job - * on its own and frees buffers in bulks (all mbufs - * within a bulk belong to the same pool); - * from this perspective, individual segment pointers - * must be associated with the corresponding SW - * descriptors independently so that only one loop - * is sufficient on reap to inspect all the buffers - */ - txq->sw_ring[id].mbuf = m_seg; + /* + * rte_pktmbuf_free() is commonly used in DPDK for + * recycling packets - the function checks every + * segment's reference counter and returns the + * buffer to its pool whenever possible; + * nevertheless, freeing mbuf segments one by one + * may entail some performance decline; + * from this point, sfc_efx_tx_reap() does the same job + * on its own and frees buffers in bulks (all mbufs + * within a bulk belong to the same pool); + * from this perspective, individual segment pointers + * must be associated with the corresponding SW + * descriptors independently so that only one loop + * is sufficient on reap to inspect all the buffers + */ + txq->sw_ring[id].mbuf = m_seg; + m_seg = m_seg->next; + } - while ((m_seg = m_seg->next) != NULL) { + while (m_seg != NULL) { RTE_BUILD_BUG_ON(SFC_MBUF_SEG_LEN_MAX > SFC_EF100_TX_SEG_DESC_LEN_MAX); @@ -457,6 +642,7 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) rte_pktmbuf_data_len(m_seg), &txq->txq_hw_ring[id]); txq->sw_ring[id].mbuf = m_seg; + m_seg = m_seg->next; } dma_desc_space -= (added - pkt_start); @@ -552,6 +738,13 @@ sfc_ef100_tx_qcreate(uint16_t port_id, uint16_t queue_id, (info->hw_index << info->vi_window_shift); txq->evq_hw_ring = info->evq_hw_ring; + txq->tso_tcp_header_offset_limit = info->tso_tcp_header_offset_limit; + txq->tso_max_nb_header_descs = info->tso_max_nb_header_descs; + txq->tso_max_header_len = info->tso_max_header_len; + txq->tso_max_nb_payload_descs = info->tso_max_nb_payload_descs; + txq->tso_max_payload_len = info->tso_max_payload_len; + txq->tso_max_nb_outgoing_frames = info->tso_max_nb_outgoing_frames; + sfc_ef100_tx_debug(txq, "TxQ doorbell is %p", txq->doorbell); *dp_txqp = &txq->dp; @@ -690,7 +883,8 @@ struct sfc_dp_tx sfc_ef100_tx = { DEV_TX_OFFLOAD_OUTER_UDP_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM | - DEV_TX_OFFLOAD_MULTI_SEGS, + DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_TCP_TSO, .get_dev_info = sfc_ef100_get_dev_info, .qsize_up_rings = sfc_ef100_tx_qsize_up_rings, .qcreate = sfc_ef100_tx_qcreate, diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index d50d49ca56..7a8495efc7 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -188,6 +188,17 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index, info.vi_window_shift = encp->enc_vi_window_shift; info.tso_tcp_header_offset_limit = encp->enc_tx_tso_tcp_header_offset_limit; + info.tso_max_nb_header_descs = + RTE_MIN(encp->enc_tx_tso_max_header_ndescs, + (uint32_t)UINT16_MAX); + info.tso_max_header_len = + RTE_MIN(encp->enc_tx_tso_max_header_length, + (uint32_t)UINT16_MAX); + info.tso_max_nb_payload_descs = + RTE_MIN(encp->enc_tx_tso_max_payload_ndescs, + (uint32_t)UINT16_MAX); + info.tso_max_payload_len = encp->enc_tx_tso_max_payload_length; + info.tso_max_nb_outgoing_frames = encp->enc_tx_tso_max_nframes; rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index, &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr, @@ -592,7 +603,8 @@ sfc_tx_start(struct sfc_adapter *sa) sfc_log_init(sa, "txq_count = %u", sas->txq_count); if (sa->tso) { - if (!encp->enc_fw_assisted_tso_v2_enabled) { + if (!encp->enc_fw_assisted_tso_v2_enabled && + !encp->enc_tso_v3_enabled) { sfc_warn(sa, "TSO support was unable to be restored"); sa->tso = B_FALSE; sa->tso_encap = B_FALSE;