examples/l3fwd: move packet group function in common
This will make the packet grouping function common, so that other examples can utilize as per need. For each architecture sse/neon/altivec, port group headers will be created under examples/common/<arch>. Signed-off-by: Rahul Bhansali <rbhansali@marvell.com> Tested-by: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru> Acked-by: Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
This commit is contained in:
parent
65bd9c7abc
commit
732115ce38
@ -293,6 +293,7 @@ F: lib/*/*_neon.*
|
||||
F: drivers/*/*/*_neon.*
|
||||
F: app/*/*_neon.*
|
||||
F: examples/*/*_neon.*
|
||||
F: examples/common/neon/
|
||||
|
||||
IBM POWER (alpha)
|
||||
M: David Christensen <drc@linux.vnet.ibm.com>
|
||||
@ -302,6 +303,7 @@ F: lib/*/*_altivec*
|
||||
F: drivers/*/*/*_altivec.*
|
||||
F: app/*/*_altivec.*
|
||||
F: examples/*/*_altivec.*
|
||||
F: examples/common/altivec/
|
||||
|
||||
RISC-V
|
||||
M: Stanislaw Kardach <kda@semihalf.com>
|
||||
@ -325,6 +327,7 @@ F: app/*/*_sse*
|
||||
F: app/*/*_avx*
|
||||
F: examples/*/*_sse*
|
||||
F: examples/*/*_avx*
|
||||
F: examples/common/sse/
|
||||
|
||||
Linux EAL (with overlaps)
|
||||
F: lib/eal/linux/
|
||||
|
48
examples/common/altivec/port_group.h
Normal file
48
examples/common/altivec/port_group.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
* Copyright(c) 2017 IBM Corporation.
|
||||
* Copyright(C) 2022 Marvell.
|
||||
*/
|
||||
|
||||
#ifndef PORT_GROUP_H
|
||||
#define PORT_GROUP_H
|
||||
|
||||
#include "pkt_group.h"
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port in bursts of 4.
|
||||
* Suppose we have array of destination ports:
|
||||
* dst_port[] = {a, b, c, d,, e, ... }
|
||||
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
|
||||
* We doing 4 comparisons at once and the result is 4 bit mask.
|
||||
* This mask is used as an index into prebuild array of pnum values.
|
||||
*/
|
||||
static inline uint16_t *
|
||||
port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
|
||||
__vector unsigned short dp1,
|
||||
__vector unsigned short dp2)
|
||||
{
|
||||
union {
|
||||
uint16_t u16[FWDSTEP + 1];
|
||||
uint64_t u64;
|
||||
} *pnum = (void *)pn;
|
||||
|
||||
int32_t v;
|
||||
|
||||
v = vec_any_eq(dp1, dp2);
|
||||
|
||||
|
||||
/* update last port counter. */
|
||||
lp[0] += gptbl[v].lpv;
|
||||
|
||||
/* if dest port value has changed. */
|
||||
if (v != GRPMSK) {
|
||||
pnum->u64 = gptbl[v].pnum;
|
||||
pnum->u16[FWDSTEP] = 1;
|
||||
lp = pnum->u16 + gptbl[v].idx;
|
||||
}
|
||||
|
||||
return lp;
|
||||
}
|
||||
|
||||
#endif /* PORT_GROUP_H */
|
50
examples/common/neon/port_group.h
Normal file
50
examples/common/neon/port_group.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2016-2018 Intel Corporation.
|
||||
* Copyright(c) 2017-2018 Linaro Limited.
|
||||
* Copyright(C) 2022 Marvell.
|
||||
*/
|
||||
|
||||
#ifndef PORT_GROUP_H
|
||||
#define PORT_GROUP_H
|
||||
|
||||
#include "pkt_group.h"
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port in bursts of 4.
|
||||
* Suppose we have array of destination ports:
|
||||
* dst_port[] = {a, b, c, d,, e, ... }
|
||||
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
|
||||
* We doing 4 comparisons at once and the result is 4 bit mask.
|
||||
* This mask is used as an index into prebuild array of pnum values.
|
||||
*/
|
||||
static inline uint16_t *
|
||||
port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,
|
||||
uint16x8_t dp2)
|
||||
{
|
||||
union {
|
||||
uint16_t u16[FWDSTEP + 1];
|
||||
uint64_t u64;
|
||||
} *pnum = (void *)pn;
|
||||
|
||||
uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};
|
||||
int32_t v;
|
||||
|
||||
dp1 = vceqq_u16(dp1, dp2);
|
||||
dp1 = vandq_u16(dp1, mask);
|
||||
v = vaddvq_u16(dp1);
|
||||
|
||||
/* update last port counter. */
|
||||
lp[0] += gptbl[v].lpv;
|
||||
rte_compiler_barrier();
|
||||
|
||||
/* if dest port value has changed. */
|
||||
if (v != GRPMSK) {
|
||||
pnum->u64 = gptbl[v].pnum;
|
||||
pnum->u16[FWDSTEP] = 1;
|
||||
lp = pnum->u16 + gptbl[v].idx;
|
||||
}
|
||||
|
||||
return lp;
|
||||
}
|
||||
|
||||
#endif /* PORT_GROUP_H */
|
139
examples/common/pkt_group.h
Normal file
139
examples/common/pkt_group.h
Normal file
@ -0,0 +1,139 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2016-2018 Intel Corporation.
|
||||
* Copyright(c) 2017-2018 Linaro Limited.
|
||||
* Copyright(C) 2022 Marvell.
|
||||
*/
|
||||
|
||||
#ifndef PKT_GROUP_H
|
||||
#define PKT_GROUP_H
|
||||
|
||||
#define FWDSTEP 4
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port into one burst.
|
||||
* To avoid extra latency this is done together with some other packet
|
||||
* processing, but after we made a final decision about packet's destination.
|
||||
* To do this we maintain:
|
||||
* pnum - array of number of consecutive packets with the same dest port for
|
||||
* each packet in the input burst.
|
||||
* lp - pointer to the last updated element in the pnum.
|
||||
* dlp - dest port value lp corresponds to.
|
||||
*/
|
||||
|
||||
#define GRPSZ (1 << FWDSTEP)
|
||||
#define GRPMSK (GRPSZ - 1)
|
||||
|
||||
#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
|
||||
if (likely((dlp) == (dcp)[(idx)])) { \
|
||||
(lp)[0]++; \
|
||||
} else { \
|
||||
(dlp) = (dcp)[idx]; \
|
||||
(lp) = (pn) + (idx); \
|
||||
(lp)[0] = 1; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const struct {
|
||||
uint64_t pnum; /* prebuild 4 values for pnum[]. */
|
||||
int32_t idx; /* index for new last updated elemnet. */
|
||||
uint16_t lpv; /* add value to the last updated element. */
|
||||
} gptbl[GRPSZ] = {
|
||||
{
|
||||
/* 0: a != b, b != c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100010001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 1: a == b, b != c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100010002),
|
||||
.idx = 4,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 2: a != b, b == c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100020001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 3: a == b, b == c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100020003),
|
||||
.idx = 4,
|
||||
.lpv = 2,
|
||||
},
|
||||
{
|
||||
/* 4: a != b, b != c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200010001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 5: a == b, b != c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200010002),
|
||||
.idx = 4,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 6: a != b, b == c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200030001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 7: a == b, b == c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200030004),
|
||||
.idx = 4,
|
||||
.lpv = 3,
|
||||
},
|
||||
{
|
||||
/* 8: a != b, b != c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100010001),
|
||||
.idx = 3,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 9: a == b, b != c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100010002),
|
||||
.idx = 3,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 0xa: a != b, b == c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100020001),
|
||||
.idx = 3,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 0xb: a == b, b == c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100020003),
|
||||
.idx = 3,
|
||||
.lpv = 2,
|
||||
},
|
||||
{
|
||||
/* 0xc: a != b, b != c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300010001),
|
||||
.idx = 2,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 0xd: a == b, b != c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300010002),
|
||||
.idx = 2,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 0xe: a != b, b == c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300040001),
|
||||
.idx = 1,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 0xf: a == b, b == c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300040005),
|
||||
.idx = 0,
|
||||
.lpv = 4,
|
||||
},
|
||||
};
|
||||
|
||||
#endif /* PKT_GROUP_H */
|
47
examples/common/sse/port_group.h
Normal file
47
examples/common/sse/port_group.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
* Copyright(C) 2022 Marvell.
|
||||
*/
|
||||
|
||||
#ifndef PORT_GROUP_H
|
||||
#define PORT_GROUP_H
|
||||
|
||||
#include "pkt_group.h"
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port in bursts of 4.
|
||||
* Suppose we have array of destination ports:
|
||||
* dst_port[] = {a, b, c, d,, e, ... }
|
||||
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
|
||||
* We doing 4 comparisons at once and the result is 4 bit mask.
|
||||
* This mask is used as an index into prebuild array of pnum values.
|
||||
*/
|
||||
static inline uint16_t *
|
||||
port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1,
|
||||
__m128i dp2)
|
||||
{
|
||||
union {
|
||||
uint16_t u16[FWDSTEP + 1];
|
||||
uint64_t u64;
|
||||
} *pnum = (void *)pn;
|
||||
|
||||
int32_t v;
|
||||
|
||||
dp1 = _mm_cmpeq_epi16(dp1, dp2);
|
||||
dp1 = _mm_unpacklo_epi16(dp1, dp1);
|
||||
v = _mm_movemask_ps((__m128)dp1);
|
||||
|
||||
/* update last port counter. */
|
||||
lp[0] += gptbl[v].lpv;
|
||||
|
||||
/* if dest port value has changed. */
|
||||
if (v != GRPMSK) {
|
||||
pnum->u64 = gptbl[v].pnum;
|
||||
pnum->u16[FWDSTEP] = 1;
|
||||
lp = pnum->u16 + gptbl[v].idx;
|
||||
}
|
||||
|
||||
return lp;
|
||||
}
|
||||
|
||||
#endif /* PORT_GROUP_H */
|
@ -37,6 +37,8 @@ $(error "Cannot generate statically-linked binaries with this version of pkg-con
|
||||
endif
|
||||
endif
|
||||
|
||||
CFLAGS += -I../common
|
||||
|
||||
build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
|
||||
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
|
||||
|
||||
|
@ -44,8 +44,6 @@
|
||||
/* Used to mark destination port as 'invalid'. */
|
||||
#define BAD_PORT ((uint16_t)-1)
|
||||
|
||||
#define FWDSTEP 4
|
||||
|
||||
/* replace first 12B of the ethernet header. */
|
||||
#define MASK_ETH 0x3f
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define _L3FWD_ALTIVEC_H_
|
||||
|
||||
#include "l3fwd.h"
|
||||
#include "altivec/port_group.h"
|
||||
#include "l3fwd_common.h"
|
||||
|
||||
/*
|
||||
@ -82,42 +83,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
|
||||
&dst_port[3], pkt[3]->packet_type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port in bursts of 4.
|
||||
* Suppose we have array of destination ports:
|
||||
* dst_port[] = {a, b, c, d,, e, ... }
|
||||
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
|
||||
* We doing 4 comparisons at once and the result is 4 bit mask.
|
||||
* This mask is used as an index into prebuild array of pnum values.
|
||||
*/
|
||||
static inline uint16_t *
|
||||
port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp,
|
||||
__vector unsigned short dp1,
|
||||
__vector unsigned short dp2)
|
||||
{
|
||||
union {
|
||||
uint16_t u16[FWDSTEP + 1];
|
||||
uint64_t u64;
|
||||
} *pnum = (void *)pn;
|
||||
|
||||
int32_t v;
|
||||
|
||||
v = vec_any_eq(dp1, dp2);
|
||||
|
||||
|
||||
/* update last port counter. */
|
||||
lp[0] += gptbl[v].lpv;
|
||||
|
||||
/* if dest port value has changed. */
|
||||
if (v != GRPMSK) {
|
||||
pnum->u64 = gptbl[v].pnum;
|
||||
pnum->u16[FWDSTEP] = 1;
|
||||
lp = pnum->u16 + gptbl[v].idx;
|
||||
}
|
||||
|
||||
return lp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process one packet:
|
||||
* Update source and destination MAC addresses in the ethernet header.
|
||||
|
@ -7,6 +7,8 @@
|
||||
#ifndef _L3FWD_COMMON_H_
|
||||
#define _L3FWD_COMMON_H_
|
||||
|
||||
#include "pkt_group.h"
|
||||
|
||||
#ifdef DO_RFC_1812_CHECKS
|
||||
|
||||
#define IPV4_MIN_VER_IHL 0x45
|
||||
@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
|
||||
#define rfc1812_process(mb, dp, ptype) do { } while (0)
|
||||
#endif /* DO_RFC_1812_CHECKS */
|
||||
|
||||
/*
|
||||
* We group consecutive packets with the same destination port into one burst.
|
||||
* To avoid extra latency this is done together with some other packet
|
||||
* processing, but after we made a final decision about packet's destination.
|
||||
* To do this we maintain:
|
||||
* pnum - array of number of consecutive packets with the same dest port for
|
||||
* each packet in the input burst.
|
||||
* lp - pointer to the last updated element in the pnum.
|
||||
* dlp - dest port value lp corresponds to.
|
||||
*/
|
||||
|
||||
#define GRPSZ (1 << FWDSTEP)
|
||||
#define GRPMSK (GRPSZ - 1)
|
||||
|
||||
#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
|
||||
if (likely((dlp) == (dcp)[(idx)])) { \
|
||||
(lp)[0]++; \
|
||||
} else { \
|
||||
(dlp) = (dcp)[idx]; \
|
||||
(lp) = (pn) + (idx); \
|
||||
(lp)[0] = 1; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const struct {
|
||||
uint64_t pnum; /* prebuild 4 values for pnum[]. */
|
||||
int32_t idx; /* index for new last updated element. */
|
||||
uint16_t lpv; /* add value to the last updated element. */
|
||||
} gptbl[GRPSZ] = {
|
||||
{
|
||||
/* 0: a != b, b != c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100010001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 1: a == b, b != c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100010002),
|
||||
.idx = 4,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 2: a != b, b == c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100020001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 3: a == b, b == c, c != d, d != e */
|
||||
.pnum = UINT64_C(0x0001000100020003),
|
||||
.idx = 4,
|
||||
.lpv = 2,
|
||||
},
|
||||
{
|
||||
/* 4: a != b, b != c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200010001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 5: a == b, b != c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200010002),
|
||||
.idx = 4,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 6: a != b, b == c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200030001),
|
||||
.idx = 4,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 7: a == b, b == c, c == d, d != e */
|
||||
.pnum = UINT64_C(0x0001000200030004),
|
||||
.idx = 4,
|
||||
.lpv = 3,
|
||||
},
|
||||
{
|
||||
/* 8: a != b, b != c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100010001),
|
||||
.idx = 3,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 9: a == b, b != c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100010002),
|
||||
.idx = 3,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 0xa: a != b, b == c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100020001),
|
||||
.idx = 3,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 0xb: a == b, b == c, c != d, d == e */
|
||||
.pnum = UINT64_C(0x0002000100020003),
|
||||
.idx = 3,
|
||||
.lpv = 2,
|
||||
},
|
||||
{
|
||||
/* 0xc: a != b, b != c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300010001),
|
||||
.idx = 2,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 0xd: a == b, b != c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300010002),
|
||||
.idx = 2,
|
||||
.lpv = 1,
|
||||
},
|
||||
{
|
||||
/* 0xe: a != b, b == c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300040001),
|
||||
.idx = 1,
|
||||
.lpv = 0,
|
||||
},
|
||||
{
|
||||
/* 0xf: a == b, b == c, c == d, d == e */
|
||||
.pnum = UINT64_C(0x0002000300040005),
|
||||
.idx = 0,
|
||||
.lpv = 4,
|
||||
},
|
||||
};
|
||||
|
||||
static __rte_always_inline void
|
||||
send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
|
||||
uint32_t num)
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define _L3FWD_NEON_H_
|
||||
|
||||
#include "l3fwd.h"
|
||||
#include "neon/port_group.h"
|
||||
#include "l3fwd_common.h"
|
||||
|
||||
/*
|
||||
@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
|
||||
&dst_port[3], pkt[3]->packet_type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port in bursts of 4.
|
||||
* Suppose we have array of destination ports:
|
||||
* dst_port[] = {a, b, c, d,, e, ... }
|
||||
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
|
||||
* We doing 4 comparisons at once and the result is 4 bit mask.
|
||||
* This mask is used as an index into prebuild array of pnum values.
|
||||
*/
|
||||
static inline uint16_t *
|
||||
port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,
|
||||
uint16x8_t dp2)
|
||||
{
|
||||
union {
|
||||
uint16_t u16[FWDSTEP + 1];
|
||||
uint64_t u64;
|
||||
} *pnum = (void *)pn;
|
||||
|
||||
int32_t v;
|
||||
uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};
|
||||
|
||||
dp1 = vceqq_u16(dp1, dp2);
|
||||
dp1 = vandq_u16(dp1, mask);
|
||||
v = vaddvq_u16(dp1);
|
||||
|
||||
/* update last port counter. */
|
||||
lp[0] += gptbl[v].lpv;
|
||||
rte_compiler_barrier();
|
||||
|
||||
/* if dest port value has changed. */
|
||||
if (v != GRPMSK) {
|
||||
pnum->u64 = gptbl[v].pnum;
|
||||
pnum->u16[FWDSTEP] = 1;
|
||||
lp = pnum->u16 + gptbl[v].idx;
|
||||
}
|
||||
|
||||
return lp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process one packet:
|
||||
* Update source and destination MAC addresses in the ethernet header.
|
||||
|
@ -7,6 +7,7 @@
|
||||
#define _L3FWD_SSE_H_
|
||||
|
||||
#include "l3fwd.h"
|
||||
#include "sse/port_group.h"
|
||||
#include "l3fwd_common.h"
|
||||
|
||||
/*
|
||||
@ -62,41 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
|
||||
&dst_port[3], pkt[3]->packet_type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Group consecutive packets with the same destination port in bursts of 4.
|
||||
* Suppose we have array of destination ports:
|
||||
* dst_port[] = {a, b, c, d,, e, ... }
|
||||
* dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
|
||||
* We doing 4 comparisons at once and the result is 4 bit mask.
|
||||
* This mask is used as an index into prebuild array of pnum values.
|
||||
*/
|
||||
static inline uint16_t *
|
||||
port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
|
||||
{
|
||||
union {
|
||||
uint16_t u16[FWDSTEP + 1];
|
||||
uint64_t u64;
|
||||
} *pnum = (void *)pn;
|
||||
|
||||
int32_t v;
|
||||
|
||||
dp1 = _mm_cmpeq_epi16(dp1, dp2);
|
||||
dp1 = _mm_unpacklo_epi16(dp1, dp1);
|
||||
v = _mm_movemask_ps((__m128)dp1);
|
||||
|
||||
/* update last port counter. */
|
||||
lp[0] += gptbl[v].lpv;
|
||||
|
||||
/* if dest port value has changed. */
|
||||
if (v != GRPMSK) {
|
||||
pnum->u64 = gptbl[v].pnum;
|
||||
pnum->u16[FWDSTEP] = 1;
|
||||
lp = pnum->u16 + gptbl[v].idx;
|
||||
}
|
||||
|
||||
return lp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process one packet:
|
||||
* Update source and destination MAC addresses in the ethernet header.
|
||||
|
@ -97,7 +97,7 @@ foreach example: examples
|
||||
ldflags = default_ldflags
|
||||
|
||||
ext_deps = []
|
||||
includes = [include_directories(example)]
|
||||
includes = [include_directories(example, 'common')]
|
||||
deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline']
|
||||
subdir(example)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user