numam-dpdk/lib/node/ip4_lookup_neon.h
Bruce Richardson 99a2dd955f lib: remove librte_ prefix from directory names
There is no reason for the DPDK libraries to all have 'librte_' prefix on
the directory names. This prefix makes the directory names longer and also
makes it awkward to add features referring to individual libraries in the
build - should the lib names be specified with or without the prefix.
Therefore, we can just remove the library prefix and use the library's
unique name as the directory name, i.e. 'eal' rather than 'librte_eal'

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
2021-04-21 14:04:09 +02:00

237 lines
6.5 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(C) 2020 Marvell International Ltd.
*/
#ifndef __INCLUDE_IP4_LOOKUP_NEON_H__
#define __INCLUDE_IP4_LOOKUP_NEON_H__
/* ARM64 NEON */
static uint16_t
ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
void **objs, uint16_t nb_objs)
{
struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
struct rte_ipv4_hdr *ipv4_hdr;
void **to_next, **from;
uint16_t last_spec = 0;
rte_edge_t next_index;
uint16_t n_left_from;
uint16_t held = 0;
uint32_t drop_nh;
rte_xmm_t result;
rte_xmm_t priv01;
rte_xmm_t priv23;
int32x4_t dip;
int rc, i;
/* Speculative next */
next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
/* Drop node */
drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
pkts = (struct rte_mbuf **)objs;
from = objs;
n_left_from = nb_objs;
for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE)
rte_prefetch0(&objs[i]);
for (i = 0; i < 4 && i < n_left_from; i++)
rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *,
sizeof(struct rte_ether_hdr)));
dip = vdupq_n_s32(0);
/* Get stream for the speculated next node */
to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
while (n_left_from >= 4) {
#if RTE_GRAPH_BURST_SIZE > 64
/* Prefetch next-next mbufs */
if (likely(n_left_from > 11)) {
rte_prefetch0(pkts[8]);
rte_prefetch0(pkts[9]);
rte_prefetch0(pkts[10]);
rte_prefetch0(pkts[11]);
}
#endif
/* Prefetch next mbuf data */
if (likely(n_left_from > 7)) {
rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *,
sizeof(struct rte_ether_hdr)));
rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *,
sizeof(struct rte_ether_hdr)));
rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *,
sizeof(struct rte_ether_hdr)));
rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *,
sizeof(struct rte_ether_hdr)));
}
mbuf0 = pkts[0];
mbuf1 = pkts[1];
mbuf2 = pkts[2];
mbuf3 = pkts[3];
pkts += 4;
n_left_from -= 4;
/* Extract DIP of mbuf0 */
ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *,
sizeof(struct rte_ether_hdr));
dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 0);
/* Extract cksum, ttl as ipv4 hdr is in cache */
priv01.u16[1] = ipv4_hdr->time_to_live;
priv01.u32[1] = ipv4_hdr->hdr_checksum;
/* Extract DIP of mbuf1 */
ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *,
sizeof(struct rte_ether_hdr));
dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 1);
/* Extract cksum, ttl as ipv4 hdr is in cache */
priv01.u16[5] = ipv4_hdr->time_to_live;
priv01.u32[3] = ipv4_hdr->hdr_checksum;
/* Extract DIP of mbuf2 */
ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *,
sizeof(struct rte_ether_hdr));
dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 2);
/* Extract cksum, ttl as ipv4 hdr is in cache */
priv23.u16[1] = ipv4_hdr->time_to_live;
priv23.u32[1] = ipv4_hdr->hdr_checksum;
/* Extract DIP of mbuf3 */
ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *,
sizeof(struct rte_ether_hdr));
dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 3);
dip = vreinterpretq_s32_u8(
vrev32q_u8(vreinterpretq_u8_s32(dip)));
/* Extract cksum, ttl as ipv4 hdr is in cache */
priv23.u16[5] = ipv4_hdr->time_to_live;
priv23.u32[3] = ipv4_hdr->hdr_checksum;
/* Perform LPM lookup to get NH and next node */
rte_lpm_lookupx4(lpm, dip, result.u32, drop_nh);
priv01.u16[0] = result.u16[0];
priv01.u16[4] = result.u16[2];
priv23.u16[0] = result.u16[4];
priv23.u16[4] = result.u16[6];
node_mbuf_priv1(mbuf0, dyn)->u = priv01.u64[0];
node_mbuf_priv1(mbuf1, dyn)->u = priv01.u64[1];
node_mbuf_priv1(mbuf2, dyn)->u = priv23.u64[0];
node_mbuf_priv1(mbuf3, dyn)->u = priv23.u64[1];
/* Enqueue four to next node */
rte_edge_t fix_spec = ((next_index == result.u16[1]) &&
(result.u16[1] == result.u16[3]) &&
(result.u16[3] == result.u16[5]) &&
(result.u16[5] == result.u16[7]));
if (unlikely(fix_spec == 0)) {
/* Copy things successfully speculated till now */
rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
from += last_spec;
to_next += last_spec;
held += last_spec;
last_spec = 0;
/* Next0 */
if (next_index == result.u16[1]) {
to_next[0] = from[0];
to_next++;
held++;
} else {
rte_node_enqueue_x1(graph, node, result.u16[1],
from[0]);
}
/* Next1 */
if (next_index == result.u16[3]) {
to_next[0] = from[1];
to_next++;
held++;
} else {
rte_node_enqueue_x1(graph, node, result.u16[3],
from[1]);
}
/* Next2 */
if (next_index == result.u16[5]) {
to_next[0] = from[2];
to_next++;
held++;
} else {
rte_node_enqueue_x1(graph, node, result.u16[5],
from[2]);
}
/* Next3 */
if (next_index == result.u16[7]) {
to_next[0] = from[3];
to_next++;
held++;
} else {
rte_node_enqueue_x1(graph, node, result.u16[7],
from[3]);
}
from += 4;
} else {
last_spec += 4;
}
}
while (n_left_from > 0) {
uint32_t next_hop;
uint16_t next0;
mbuf0 = pkts[0];
pkts += 1;
n_left_from -= 1;
/* Extract DIP of mbuf0 */
ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *,
sizeof(struct rte_ether_hdr));
/* Extract cksum, ttl as ipv4 hdr is in cache */
node_mbuf_priv1(mbuf0, dyn)->cksum = ipv4_hdr->hdr_checksum;
node_mbuf_priv1(mbuf0, dyn)->ttl = ipv4_hdr->time_to_live;
rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr),
&next_hop);
next_hop = (rc == 0) ? next_hop : drop_nh;
node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop;
next_hop = next_hop >> 16;
next0 = (uint16_t)next_hop;
if (unlikely(next_index ^ next0)) {
/* Copy things successfully speculated till now */
rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
from += last_spec;
to_next += last_spec;
held += last_spec;
last_spec = 0;
rte_node_enqueue_x1(graph, node, next0, from[0]);
from += 1;
} else {
last_spec += 1;
}
}
/* !!! Home run !!! */
if (likely(last_spec == nb_objs)) {
rte_node_next_stream_move(graph, node, next_index);
return nb_objs;
}
held += last_spec;
rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
rte_node_next_stream_put(graph, node, next_index, held);
return nb_objs;
}
#endif /* __INCLUDE_IP4_LOOKUP_NEON_H__ */