From b140e9701c81bf3f2699e6e49e28e369780f443a Mon Sep 17 00:00:00 2001 From: Pavan Nikhilesh Date: Tue, 25 Oct 2022 21:35:37 +0530 Subject: [PATCH] examples/l3fwd: use LPM vector path for event vector Use lpm vector path to process event vector. Signed-off-by: Pavan Nikhilesh Acked-by: Shijith Thotton --- examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++ examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++ examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++-------- examples/l3fwd/l3fwd_neon.h | 48 +++++++++++++++++++++++ examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++ 5 files changed, 215 insertions(+), 16 deletions(-) diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h index 87018f5dbe..e45e138e59 100644 --- a/examples/l3fwd/l3fwd_altivec.h +++ b/examples/l3fwd/l3fwd_altivec.h @@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __vector unsigned short dp1; + __vector unsigned short dp; + + dp = (__vector unsigned short)vec_splats((short)dst_ports[0]); + dp1 = *((__vector unsigned short *)&dst_ports[i]); + res = vec_all_eq(dp1, dp); + if (!res) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_ALTIVEC_H_ */ diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h index b93841a16f..3fe38aada0 100644 --- a/examples/l3fwd/l3fwd_event.h +++ b/examples/l3fwd/l3fwd_event.h @@ -82,6 +82,27 @@ struct l3fwd_event_resources { uint64_t vector_tmo_ns; }; +#if defined(RTE_ARCH_X86) +#include "l3fwd_sse.h" +#elif defined __ARM_NEON +#include "l3fwd_neon.h" +#elif defined(RTE_ARCH_PPC_64) +#include "l3fwd_altivec.h" +#else +static inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + int i; + + for (i = 0; i < nb_elem; i++) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + } + + return dst_ports[0]; +} +#endif + static inline void event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf) { @@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq) } } +static inline uint16_t +filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port, + uint16_t nb_pkts) +{ + uint16_t *des_pos, free = 0; + struct rte_mbuf **pos; + int i; + /* Filter out and free bad packets */ + for (i = 0; i < nb_pkts; i++) { + if (dst_port[i] == BAD_PORT) { + rte_pktmbuf_free(mbufs[i]); + if (!free) { + pos = &mbufs[i]; + des_pos = &dst_port[i]; + } + free++; + continue; + } + + if (free) { + *pos = mbufs[i]; + pos++; + *des_pos = dst_port[i]; + des_pos++; + } + } + + return nb_pkts - free; +} + +static inline void +process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port) +{ + uint16_t port, i; + + vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem); + /* Verify destination array */ + port = process_dst_port(dst_port, vec->nb_elem); + if (port == BAD_PORT) { + vec->attr_valid = 0; + for (i = 0; i < vec->nb_elem; i++) { + vec->mbufs[i]->port = dst_port[i]; + rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0); + } + } else { + vec->attr_valid = 1; + vec->port = port; + vec->queue = 0; + } +} struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void); void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf); diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 22d7f61a42..5172979c72 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy) } static __rte_always_inline void -lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf) +lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf, + uint16_t *dst_port) { struct rte_mbuf **mbufs = vec->mbufs; int i; - /* Process first packet to init vector attributes */ - lpm_process_event_pkt(lconf, mbufs[0]); +#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64 if (vec->attr_valid) { - if (mbufs[0]->port != BAD_PORT) - vec->port = mbufs[0]->port; - else - vec->attr_valid = 0; + l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port, + dst_port, lconf, 1); + } else { + for (i = 0; i < vec->nb_elem; i++) + l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port, + &dst_port[i], lconf, 1); } +#else + for (i = 0; i < vec->nb_elem; i++) + dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]); +#endif - for (i = 1; i < vec->nb_elem; i++) { - lpm_process_event_pkt(lconf, mbufs[i]); - event_vector_attr_validate(vec, mbufs[i]); - } + process_event_vector(vec, dst_port); } /* Same eventdev loop for single and burst of vector */ @@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, struct rte_event events[MAX_PKT_BURST]; int i, nb_enq = 0, nb_deq = 0; struct lcore_conf *lconf; + uint16_t *dst_port_list; unsigned int lcore_id; if (event_p_id < 0) @@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, lcore_id = rte_lcore_id(); lconf = &lcore_conf[lcore_id]; - + dst_port_list = + rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size, + RTE_CACHE_LINE_SIZE); + if (dst_port_list == NULL) + return; RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id); while (!force_quit) { @@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, events[i].op = RTE_EVENT_OP_FORWARD; } - lpm_process_event_vector(events[i].vec, lconf); - - if (flags & L3FWD_EVENT_TX_DIRECT) - event_vector_txq_set(events[i].vec, 0); + lpm_process_event_vector(events[i].vec, lconf, + dst_port_list); } if (flags & L3FWD_EVENT_TX_ENQ) { @@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc, l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq, nb_deq, 1); + rte_free(dst_port_list); } int __rte_noinline diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h index ce515e0bc4..40807d5965 100644 --- a/examples/l3fwd/l3fwd_neon.h +++ b/examples/l3fwd/l3fwd_neon.h @@ -194,4 +194,52 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0; + +#if defined(RTE_ARCH_ARM64) + uint64_t res; + + while (nb_elem > 7) { + uint16x8_t dp = vdupq_n_u16(dst_ports[0]); + uint16x8_t dp1; + + dp1 = vld1q_u16(&dst_ports[i]); + dp1 = vceqq_u16(dp1, dp); + res = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(dp1, 4)), + 0); + if (res != ~0ULL) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + uint16x4_t dp = vdup_n_u16(dst_ports[0]); + uint16x4_t dp1; + + dp1 = vld1_u16(&dst_ports[i]); + dp1 = vceq_u16(dp1, dp); + res = vget_lane_u64(vreinterpret_u64_u16(dp1), 0); + if (res != ~0ULL) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } +#endif + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_NEON_H_ */ diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h index 0f0d0323a2..083729cdef 100644 --- a/examples/l3fwd/l3fwd_sse.h +++ b/examples/l3fwd/l3fwd_sse.h @@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst, } } +static __rte_always_inline uint16_t +process_dst_port(uint16_t *dst_ports, uint16_t nb_elem) +{ + uint16_t i = 0, res; + + while (nb_elem > 7) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + res = _mm_movemask_epi8(dp1); + if (res != 0xFFFF) + return BAD_PORT; + + nb_elem -= 8; + i += 8; + } + + while (nb_elem > 3) { + __m128i dp = _mm_set1_epi16(dst_ports[0]); + __m128i dp1; + + dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]); + dp1 = _mm_cmpeq_epi16(dp1, dp); + dp1 = _mm_unpacklo_epi16(dp1, dp1); + res = _mm_movemask_ps((__m128)dp1); + if (res != 0xF) + return BAD_PORT; + + nb_elem -= 4; + i += 4; + } + + while (nb_elem) { + if (dst_ports[i] != dst_ports[0]) + return BAD_PORT; + nb_elem--; + i++; + } + + return dst_ports[0]; +} + #endif /* _L3FWD_SSE_H_ */