From 48a580c5df78b3addac6f57969167bb86c7428f5 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Fri, 18 Dec 2020 15:28:37 -0500 Subject: [PATCH] net/bnxt: set correct checksum status in mbuf The setting of the mbuf ol_flags field for tunneled packets should be different depending upon whether DEV_RX_OFFLOAD_OUTER_* offloads are enabled. Initialize ol_flags mappings based on the receive offload configuration when the receive ring is initialized. Cc: stable@dpdk.org Signed-off-by: Lance Richardson Reviewed-by: Kalesh AP Acked-by: Ajit Khaparde --- drivers/net/bnxt/bnxt_rxr.c | 85 +++++++++++++++++++++------ drivers/net/bnxt/bnxt_rxr.h | 4 +- drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 6 +- drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 6 +- 4 files changed, 80 insertions(+), 21 deletions(-) diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c index 1edc8dac43..14901f1b99 100644 --- a/drivers/net/bnxt/bnxt_rxr.c +++ b/drivers/net/bnxt/bnxt_rxr.c @@ -416,43 +416,91 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1) } static void __rte_cold -bnxt_init_ol_flags_tables(struct bnxt_rx_ring_info *rxr) +bnxt_init_ol_flags_tables(struct bnxt_rx_queue *rxq) { + struct bnxt_rx_ring_info *rxr = rxq->rx_ring; + struct rte_eth_conf *dev_conf; + bool outer_cksum_enabled; + uint64_t offloads; uint32_t *pt; int i; + dev_conf = &rxq->bp->eth_dev->data->dev_conf; + offloads = dev_conf->rxmode.offloads; + + outer_cksum_enabled = !!(offloads & (DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_RX_OFFLOAD_OUTER_UDP_CKSUM)); + /* Initialize ol_flags table. */ pt = rxr->ol_flags_table; for (i = 0; i < BNXT_OL_FLAGS_TBL_DIM; i++) { pt[i] = 0; + if (i & RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) pt[i] |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) - pt[i] |= PKT_RX_IP_CKSUM_GOOD; + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 3)) { + /* Tunnel case. */ + if (outer_cksum_enabled) { + if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; - if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) - pt[i] |= PKT_RX_L4_CKSUM_GOOD; + if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; - if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) - pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD; + if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) + pt[i] |= PKT_RX_OUTER_L4_CKSUM_GOOD; + } else { + if (i & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; + + if (i & RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + } + } else { + /* Non-tunnel case. */ + if (i & RX_PKT_CMPL_FLAGS2_IP_CS_CALC) + pt[i] |= PKT_RX_IP_CKSUM_GOOD; + + if (i & RX_PKT_CMPL_FLAGS2_L4_CS_CALC) + pt[i] |= PKT_RX_L4_CKSUM_GOOD; + } } /* Initialize checksum error table. */ pt = rxr->ol_flags_err_table; for (i = 0; i < BNXT_OL_FLAGS_ERR_TBL_DIM; i++) { pt[i] = 0; - if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) - pt[i] |= PKT_RX_IP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) - pt[i] |= PKT_RX_L4_CKSUM_BAD; + if (i & (RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC << 2)) { + /* Tunnel case. */ + if (outer_cksum_enabled) { + if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) - pt[i] |= PKT_RX_EIP_CKSUM_BAD; + if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_EIP_CKSUM_BAD; - if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) - pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; + if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_OUTER_L4_CKSUM_BAD; + } else { + if (i & (RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + } + } else { + /* Non-tunnel case. */ + if (i & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR >> 4)) + pt[i] |= PKT_RX_IP_CKSUM_BAD; + + if (i & (RX_PKT_CMPL_ERRORS_L4_CS_ERROR >> 4)) + pt[i] |= PKT_RX_L4_CKSUM_BAD; + } } } @@ -472,6 +520,7 @@ bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, RX_PKT_CMPL_FLAGS2_T_L4_CS_CALC | RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN); + flags |= (flags & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) << 3; errors = rte_le_to_cpu_16(rxcmp1->errors_v2) & (RX_PKT_CMPL_ERRORS_IP_CS_ERROR | RX_PKT_CMPL_ERRORS_L4_CS_ERROR | @@ -481,8 +530,10 @@ bnxt_set_ol_flags(struct bnxt_rx_ring_info *rxr, struct rx_pkt_cmpl *rxcmp, ol_flags = rxr->ol_flags_table[flags & ~errors]; - if (errors) + if (unlikely(errors)) { + errors |= (flags & RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC) << 2; ol_flags |= rxr->ol_flags_err_table[errors]; + } if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) { mbuf->hash.rss = rte_le_to_cpu_32(rxcmp->rss_hash); @@ -1125,7 +1176,7 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq) bnxt_init_rxbds(ring, type, size); /* Initialize offload flags parsing table. */ - bnxt_init_ol_flags_tables(rxr); + bnxt_init_ol_flags_tables(rxq); raw_prod = rxr->rx_raw_prod; for (i = 0; i < ring->ring_size; i++) { diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h index 4db1e8761e..b2942030ab 100644 --- a/drivers/net/bnxt/bnxt_rxr.h +++ b/drivers/net/bnxt/bnxt_rxr.h @@ -42,8 +42,8 @@ static inline uint16_t bnxt_tpa_start_agg_id(struct bnxt *bp, /* Number of descriptors to process per inner loop in vector mode. */ #define RTE_BNXT_DESCS_PER_LOOP 4U -#define BNXT_OL_FLAGS_TBL_DIM 32 -#define BNXT_OL_FLAGS_ERR_TBL_DIM 16 +#define BNXT_OL_FLAGS_TBL_DIM 64 +#define BNXT_OL_FLAGS_ERR_TBL_DIM 32 struct bnxt_tpa_info { struct rte_mbuf *mbuf; diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c index d9ac822be8..4839e2a38d 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c @@ -80,7 +80,7 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F); const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F); uint32x4_t flags_type, flags2, index, errors, rss_flags; - uint32x4_t tmp, ptype_idx; + uint32x4_t tmp, ptype_idx, is_tunnel; uint64x2_t t0, t1; uint32_t ol_flags; @@ -117,10 +117,14 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], vget_low_u64(t1))); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = vandq_u32(flags2, vdupq_n_u32(4)); + is_tunnel = vshlq_n_u32(is_tunnel, 3); errors = vandq_u32(vshrq_n_u32(errors, 4), flags2_error_mask); errors = vandq_u32(errors, flags2); index = vbicq_u32(flags2, errors); + errors = vorrq_u32(errors, vshrq_n_u32(is_tunnel, 1)); + index = vorrq_u32(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags); diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index 7f5825d333..c2523040e8 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -73,7 +73,7 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], const __m128i rss_mask = _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; - __m128i ptype_idx; + __m128i ptype_idx, is_tunnel; uint32_t ol_flags; /* Compute packet type table indexes for four packets */ @@ -100,6 +100,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); /* Compute ol_flags and checksum error indexes for four packets. */ + is_tunnel = _mm_and_si128(flags2, _mm_set1_epi32(4)); + is_tunnel = _mm_slli_epi32(is_tunnel, 3); flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); @@ -107,6 +109,8 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], errors = _mm_and_si128(errors, flags2); index = _mm_andnot_si128(errors, flags2); + errors = _mm_or_si128(errors, _mm_srli_epi32(is_tunnel, 1)); + index = _mm_or_si128(index, is_tunnel); /* Update mbuf rearm_data for four packets. */ GET_OL_FLAGS(rss_flags, index, errors, 0, ol_flags);