net/virtio: add vectorized packed ring Rx
Optimize packed ring Rx path with SIMD instructions. Solution of optimization is pretty like vhost, is that split path into batch and single functions. Batch function is further optimized by AVX512 instructions. Signed-off-by: Marvin Liu <yong.liu@intel.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
This commit is contained in:
parent
57f90f8945
commit
77d66da838
@ -504,6 +504,7 @@ are shown in below table:
|
|||||||
Packed virtqueue non-meregable path virtio_recv_pkts_packed virtio_xmit_pkts_packed
|
Packed virtqueue non-meregable path virtio_recv_pkts_packed virtio_xmit_pkts_packed
|
||||||
Packed virtqueue in-order mergeable path virtio_recv_mergeable_pkts_packed virtio_xmit_pkts_packed
|
Packed virtqueue in-order mergeable path virtio_recv_mergeable_pkts_packed virtio_xmit_pkts_packed
|
||||||
Packed virtqueue in-order non-mergeable path virtio_recv_pkts_packed virtio_xmit_pkts_packed
|
Packed virtqueue in-order non-mergeable path virtio_recv_pkts_packed virtio_xmit_pkts_packed
|
||||||
|
Packed virtqueue vectorized Rx path virtio_recv_pkts_packed_vec virtio_xmit_pkts_packed
|
||||||
============================================ ================================= ========================
|
============================================ ================================= ========================
|
||||||
|
|
||||||
Virtio paths Support Status from Release to Release
|
Virtio paths Support Status from Release to Release
|
||||||
@ -521,20 +522,21 @@ All virtio paths support status are shown in below table:
|
|||||||
|
|
||||||
.. table:: Virtio Paths and Releases
|
.. table:: Virtio Paths and Releases
|
||||||
|
|
||||||
============================================ ============= ============= =============
|
============================================ ============= ============= ============= =======
|
||||||
Virtio paths 16.11 ~ 18.05 18.08 ~ 18.11 19.02 ~ 19.11
|
Virtio paths 16.11 ~ 18.05 18.08 ~ 18.11 19.02 ~ 19.11 20.05 ~
|
||||||
============================================ ============= ============= =============
|
============================================ ============= ============= ============= =======
|
||||||
Split virtqueue mergeable path Y Y Y
|
Split virtqueue mergeable path Y Y Y Y
|
||||||
Split virtqueue non-mergeable path Y Y Y
|
Split virtqueue non-mergeable path Y Y Y Y
|
||||||
Split virtqueue vectorized Rx path Y Y Y
|
Split virtqueue vectorized Rx path Y Y Y Y
|
||||||
Split virtqueue simple Tx path Y N N
|
Split virtqueue simple Tx path Y N N N
|
||||||
Split virtqueue in-order mergeable path Y Y
|
Split virtqueue in-order mergeable path Y Y Y
|
||||||
Split virtqueue in-order non-mergeable path Y Y
|
Split virtqueue in-order non-mergeable path Y Y Y
|
||||||
Packed virtqueue mergeable path Y
|
Packed virtqueue mergeable path Y Y
|
||||||
Packed virtqueue non-mergeable path Y
|
Packed virtqueue non-mergeable path Y Y
|
||||||
Packed virtqueue in-order mergeable path Y
|
Packed virtqueue in-order mergeable path Y Y
|
||||||
Packed virtqueue in-order non-mergeable path Y
|
Packed virtqueue in-order non-mergeable path Y Y
|
||||||
============================================ ============= ============= =============
|
Packed virtqueue vectorized Rx path Y
|
||||||
|
============================================ ============= ============= ============= =======
|
||||||
|
|
||||||
QEMU Support Status
|
QEMU Support Status
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -36,6 +36,41 @@ else ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)
|
|||||||
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c
|
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ($(FORCE_DISABLE_AVX512), y)
|
||||||
|
CC_AVX512_SUPPORT=\
|
||||||
|
$(shell $(CC) -march=native -dM -E - </dev/null 2>&1 | \
|
||||||
|
sed '/./{H;$$!d} ; x ; /AVX512F/!d; /AVX512BW/!d; /AVX512VL/!d' | \
|
||||||
|
grep -q AVX512 && echo 1)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CC_AVX512_SUPPORT), 1)
|
||||||
|
CFLAGS += -DCC_AVX512_SUPPORT
|
||||||
|
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_packed_avx.c
|
||||||
|
|
||||||
|
ifeq ($(RTE_TOOLCHAIN), gcc)
|
||||||
|
ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)
|
||||||
|
CFLAGS += -DVIRTIO_GCC_UNROLL_PRAGMA
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(RTE_TOOLCHAIN), clang)
|
||||||
|
ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)
|
||||||
|
CFLAGS += -DVIRTIO_CLANG_UNROLL_PRAGMA
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(RTE_TOOLCHAIN), icc)
|
||||||
|
ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)
|
||||||
|
CFLAGS += -DVIRTIO_ICC_UNROLL_PRAGMA
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
CFLAGS_virtio_rxtx_packed_avx.o += -mavx512f -mavx512bw -mavx512vl
|
||||||
|
ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1)
|
||||||
|
CFLAGS_virtio_rxtx_packed_avx.o += -Wno-zero-length-bounds
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
|
ifeq ($(CONFIG_RTE_VIRTIO_USER),y)
|
||||||
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
|
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c
|
||||||
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c
|
SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c
|
||||||
|
@ -9,6 +9,20 @@ sources += files('virtio_ethdev.c',
|
|||||||
deps += ['kvargs', 'bus_pci']
|
deps += ['kvargs', 'bus_pci']
|
||||||
|
|
||||||
if arch_subdir == 'x86'
|
if arch_subdir == 'x86'
|
||||||
|
if not machine_args.contains('-mno-avx512f')
|
||||||
|
if cc.has_argument('-mavx512f') and cc.has_argument('-mavx512vl') and cc.has_argument('-mavx512bw')
|
||||||
|
cflags += ['-mavx512f', '-mavx512bw', '-mavx512vl']
|
||||||
|
cflags += ['-DCC_AVX512_SUPPORT']
|
||||||
|
if (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))
|
||||||
|
cflags += '-DVHOST_GCC_UNROLL_PRAGMA'
|
||||||
|
elif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))
|
||||||
|
cflags += '-DVHOST_CLANG_UNROLL_PRAGMA'
|
||||||
|
elif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))
|
||||||
|
cflags += '-DVHOST_ICC_UNROLL_PRAGMA'
|
||||||
|
endif
|
||||||
|
sources += files('virtio_rxtx_packed_avx.c')
|
||||||
|
endif
|
||||||
|
endif
|
||||||
sources += files('virtio_rxtx_simple_sse.c')
|
sources += files('virtio_rxtx_simple_sse.c')
|
||||||
elif arch_subdir == 'ppc'
|
elif arch_subdir == 'ppc'
|
||||||
sources += files('virtio_rxtx_simple_altivec.c')
|
sources += files('virtio_rxtx_simple_altivec.c')
|
||||||
|
@ -105,6 +105,9 @@ uint16_t virtio_xmit_pkts_inorder(void *tx_queue, struct rte_mbuf **tx_pkts,
|
|||||||
uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
|
uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
|
||||||
uint16_t nb_pkts);
|
uint16_t nb_pkts);
|
||||||
|
|
||||||
|
uint16_t virtio_recv_pkts_packed_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
|
||||||
|
uint16_t nb_pkts);
|
||||||
|
|
||||||
int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
|
int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
|
||||||
|
|
||||||
void virtio_interrupt_handler(void *param);
|
void virtio_interrupt_handler(void *param);
|
||||||
|
@ -2030,3 +2030,13 @@ virtio_xmit_pkts_inorder(void *tx_queue,
|
|||||||
|
|
||||||
return nb_tx;
|
return nb_tx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef CC_AVX512_SUPPORT
|
||||||
|
uint16_t
|
||||||
|
virtio_recv_pkts_packed_vec(void *rx_queue __rte_unused,
|
||||||
|
struct rte_mbuf **rx_pkts __rte_unused,
|
||||||
|
uint16_t nb_pkts __rte_unused)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* ifndef CC_AVX512_SUPPORT */
|
||||||
|
374
drivers/net/virtio/virtio_rxtx_packed_avx.c
Normal file
374
drivers/net/virtio/virtio_rxtx_packed_avx.c
Normal file
@ -0,0 +1,374 @@
|
|||||||
|
/* SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
* Copyright(c) 2010-2020 Intel Corporation
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include <rte_net.h>
|
||||||
|
|
||||||
|
#include "virtio_logs.h"
|
||||||
|
#include "virtio_ethdev.h"
|
||||||
|
#include "virtio_pci.h"
|
||||||
|
#include "virtqueue.h"
|
||||||
|
|
||||||
|
#define BYTE_SIZE 8
|
||||||
|
/* flag bits offset in packed ring desc higher 64bits */
|
||||||
|
#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
|
||||||
|
offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
|
||||||
|
|
||||||
|
#define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \
|
||||||
|
FLAGS_BITS_OFFSET)
|
||||||
|
|
||||||
|
#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
|
||||||
|
sizeof(struct vring_packed_desc))
|
||||||
|
#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
|
||||||
|
|
||||||
|
#ifdef VIRTIO_GCC_UNROLL_PRAGMA
|
||||||
|
#define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
|
||||||
|
for (iter = val; iter < size; iter++)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef VIRTIO_CLANG_UNROLL_PRAGMA
|
||||||
|
#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
|
||||||
|
for (iter = val; iter < size; iter++)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef VIRTIO_ICC_UNROLL_PRAGMA
|
||||||
|
#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll (4)") \
|
||||||
|
for (iter = val; iter < size; iter++)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef virtio_for_each_try_unroll
|
||||||
|
#define virtio_for_each_try_unroll(iter, val, num) \
|
||||||
|
for (iter = val; iter < num; iter++)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
virtio_update_batch_stats(struct virtnet_stats *stats,
|
||||||
|
uint16_t pkt_len1,
|
||||||
|
uint16_t pkt_len2,
|
||||||
|
uint16_t pkt_len3,
|
||||||
|
uint16_t pkt_len4)
|
||||||
|
{
|
||||||
|
stats->bytes += pkt_len1;
|
||||||
|
stats->bytes += pkt_len2;
|
||||||
|
stats->bytes += pkt_len3;
|
||||||
|
stats->bytes += pkt_len4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Optionally fill offload information in structure */
|
||||||
|
static inline int
|
||||||
|
virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
|
||||||
|
{
|
||||||
|
struct rte_net_hdr_lens hdr_lens;
|
||||||
|
uint32_t hdrlen, ptype;
|
||||||
|
int l4_supported = 0;
|
||||||
|
|
||||||
|
/* nothing to do */
|
||||||
|
if (hdr->flags == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* GSO not support in vec path, skip check */
|
||||||
|
m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
|
||||||
|
|
||||||
|
ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
|
||||||
|
m->packet_type = ptype;
|
||||||
|
if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
|
||||||
|
(ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
|
||||||
|
(ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
|
||||||
|
l4_supported = 1;
|
||||||
|
|
||||||
|
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
|
||||||
|
hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
|
||||||
|
if (hdr->csum_start <= hdrlen && l4_supported) {
|
||||||
|
m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
|
||||||
|
} else {
|
||||||
|
/* Unknown proto or tunnel, do sw cksum. We can assume
|
||||||
|
* the cksum field is in the first segment since the
|
||||||
|
* buffers we provided to the host are large enough.
|
||||||
|
* In case of SCTP, this will be wrong since it's a CRC
|
||||||
|
* but there's nothing we can do.
|
||||||
|
*/
|
||||||
|
uint16_t csum = 0, off;
|
||||||
|
|
||||||
|
rte_raw_cksum_mbuf(m, hdr->csum_start,
|
||||||
|
rte_pktmbuf_pkt_len(m) - hdr->csum_start,
|
||||||
|
&csum);
|
||||||
|
if (likely(csum != 0xffff))
|
||||||
|
csum = ~csum;
|
||||||
|
off = hdr->csum_offset + hdr->csum_start;
|
||||||
|
if (rte_pktmbuf_data_len(m) >= off + 1)
|
||||||
|
*rte_pktmbuf_mtod_offset(m, uint16_t *,
|
||||||
|
off) = csum;
|
||||||
|
}
|
||||||
|
} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
|
||||||
|
m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint16_t
|
||||||
|
virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,
|
||||||
|
struct rte_mbuf **rx_pkts)
|
||||||
|
{
|
||||||
|
struct virtqueue *vq = rxvq->vq;
|
||||||
|
struct virtio_hw *hw = vq->hw;
|
||||||
|
uint16_t hdr_size = hw->vtnet_hdr_size;
|
||||||
|
uint64_t addrs[PACKED_BATCH_SIZE];
|
||||||
|
uint16_t id = vq->vq_used_cons_idx;
|
||||||
|
uint8_t desc_stats;
|
||||||
|
uint16_t i;
|
||||||
|
void *desc_addr;
|
||||||
|
|
||||||
|
if (id & PACKED_BATCH_MASK)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/* only care avail/used bits */
|
||||||
|
__m512i v_mask = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
|
||||||
|
desc_addr = &vq->vq_packed.ring.desc[id];
|
||||||
|
|
||||||
|
__m512i v_desc = _mm512_loadu_si512(desc_addr);
|
||||||
|
__m512i v_flag = _mm512_and_epi64(v_desc, v_mask);
|
||||||
|
|
||||||
|
__m512i v_used_flag = _mm512_setzero_si512();
|
||||||
|
if (vq->vq_packed.used_wrap_counter)
|
||||||
|
v_used_flag = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
|
||||||
|
|
||||||
|
/* Check all descs are used */
|
||||||
|
desc_stats = _mm512_cmpneq_epu64_mask(v_flag, v_used_flag);
|
||||||
|
if (desc_stats)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
||||||
|
rx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie;
|
||||||
|
rte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *));
|
||||||
|
|
||||||
|
addrs[i] = (uintptr_t)rx_pkts[i]->rx_descriptor_fields1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* load len from desc, store into mbuf pkt_len and data_len
|
||||||
|
* len limiated by l6bit buf_len, pkt_len[16:31] can be ignored
|
||||||
|
*/
|
||||||
|
const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
|
||||||
|
__m512i values = _mm512_maskz_shuffle_epi32(mask, v_desc, 0xAA);
|
||||||
|
|
||||||
|
/* reduce hdr_len from pkt_len and data_len */
|
||||||
|
__m512i mbuf_len_offset = _mm512_maskz_set1_epi32(mask,
|
||||||
|
(uint32_t)-hdr_size);
|
||||||
|
|
||||||
|
__m512i v_value = _mm512_add_epi32(values, mbuf_len_offset);
|
||||||
|
|
||||||
|
/* assert offset of data_len */
|
||||||
|
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
|
||||||
|
offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
|
||||||
|
|
||||||
|
__m512i v_index = _mm512_set_epi64(addrs[3] + 8, addrs[3],
|
||||||
|
addrs[2] + 8, addrs[2],
|
||||||
|
addrs[1] + 8, addrs[1],
|
||||||
|
addrs[0] + 8, addrs[0]);
|
||||||
|
/* batch store into mbufs */
|
||||||
|
_mm512_i64scatter_epi64(0, v_index, v_value, 1);
|
||||||
|
|
||||||
|
if (hw->has_rx_offload) {
|
||||||
|
virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
||||||
|
char *addr = (char *)rx_pkts[i]->buf_addr +
|
||||||
|
RTE_PKTMBUF_HEADROOM - hdr_size;
|
||||||
|
virtio_vec_rx_offload(rx_pkts[i],
|
||||||
|
(struct virtio_net_hdr *)addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len,
|
||||||
|
rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len,
|
||||||
|
rx_pkts[3]->pkt_len);
|
||||||
|
|
||||||
|
vq->vq_free_cnt += PACKED_BATCH_SIZE;
|
||||||
|
|
||||||
|
vq->vq_used_cons_idx += PACKED_BATCH_SIZE;
|
||||||
|
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
|
||||||
|
vq->vq_used_cons_idx -= vq->vq_nentries;
|
||||||
|
vq->vq_packed.used_wrap_counter ^= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint16_t
|
||||||
|
virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,
|
||||||
|
struct rte_mbuf **rx_pkts)
|
||||||
|
{
|
||||||
|
uint16_t used_idx, id;
|
||||||
|
uint32_t len;
|
||||||
|
struct virtqueue *vq = rxvq->vq;
|
||||||
|
struct virtio_hw *hw = vq->hw;
|
||||||
|
uint32_t hdr_size = hw->vtnet_hdr_size;
|
||||||
|
struct virtio_net_hdr *hdr;
|
||||||
|
struct vring_packed_desc *desc;
|
||||||
|
struct rte_mbuf *cookie;
|
||||||
|
|
||||||
|
desc = vq->vq_packed.ring.desc;
|
||||||
|
used_idx = vq->vq_used_cons_idx;
|
||||||
|
if (!desc_is_used(&desc[used_idx], vq))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
len = desc[used_idx].len;
|
||||||
|
id = desc[used_idx].id;
|
||||||
|
cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
|
||||||
|
if (unlikely(cookie == NULL)) {
|
||||||
|
PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
|
||||||
|
vq->vq_used_cons_idx);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
rte_prefetch0(cookie);
|
||||||
|
rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
|
||||||
|
|
||||||
|
cookie->data_off = RTE_PKTMBUF_HEADROOM;
|
||||||
|
cookie->ol_flags = 0;
|
||||||
|
cookie->pkt_len = (uint32_t)(len - hdr_size);
|
||||||
|
cookie->data_len = (uint32_t)(len - hdr_size);
|
||||||
|
|
||||||
|
hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +
|
||||||
|
RTE_PKTMBUF_HEADROOM - hdr_size);
|
||||||
|
if (hw->has_rx_offload)
|
||||||
|
virtio_vec_rx_offload(cookie, hdr);
|
||||||
|
|
||||||
|
*rx_pkts = cookie;
|
||||||
|
|
||||||
|
rxvq->stats.bytes += cookie->pkt_len;
|
||||||
|
|
||||||
|
vq->vq_free_cnt++;
|
||||||
|
vq->vq_used_cons_idx++;
|
||||||
|
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
|
||||||
|
vq->vq_used_cons_idx -= vq->vq_nentries;
|
||||||
|
vq->vq_packed.used_wrap_counter ^= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,
|
||||||
|
struct rte_mbuf **cookie,
|
||||||
|
uint16_t num)
|
||||||
|
{
|
||||||
|
struct virtqueue *vq = rxvq->vq;
|
||||||
|
struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
|
||||||
|
uint16_t flags = vq->vq_packed.cached_flags;
|
||||||
|
struct virtio_hw *hw = vq->hw;
|
||||||
|
struct vq_desc_extra *dxp;
|
||||||
|
uint16_t idx, i;
|
||||||
|
uint16_t batch_num, total_num = 0;
|
||||||
|
uint16_t head_idx = vq->vq_avail_idx;
|
||||||
|
uint16_t head_flag = vq->vq_packed.cached_flags;
|
||||||
|
uint64_t addr;
|
||||||
|
|
||||||
|
do {
|
||||||
|
idx = vq->vq_avail_idx;
|
||||||
|
|
||||||
|
batch_num = PACKED_BATCH_SIZE;
|
||||||
|
if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
|
||||||
|
batch_num = vq->vq_nentries - idx;
|
||||||
|
if (unlikely((total_num + batch_num) > num))
|
||||||
|
batch_num = num - total_num;
|
||||||
|
|
||||||
|
virtio_for_each_try_unroll(i, 0, batch_num) {
|
||||||
|
dxp = &vq->vq_descx[idx + i];
|
||||||
|
dxp->cookie = (void *)cookie[total_num + i];
|
||||||
|
|
||||||
|
addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +
|
||||||
|
RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
|
||||||
|
start_dp[idx + i].addr = addr;
|
||||||
|
start_dp[idx + i].len = cookie[total_num + i]->buf_len
|
||||||
|
- RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
|
||||||
|
if (total_num || i) {
|
||||||
|
virtqueue_store_flags_packed(&start_dp[idx + i],
|
||||||
|
flags, hw->weak_barriers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vq->vq_avail_idx += batch_num;
|
||||||
|
if (vq->vq_avail_idx >= vq->vq_nentries) {
|
||||||
|
vq->vq_avail_idx -= vq->vq_nentries;
|
||||||
|
vq->vq_packed.cached_flags ^=
|
||||||
|
VRING_PACKED_DESC_F_AVAIL_USED;
|
||||||
|
flags = vq->vq_packed.cached_flags;
|
||||||
|
}
|
||||||
|
total_num += batch_num;
|
||||||
|
} while (total_num < num);
|
||||||
|
|
||||||
|
virtqueue_store_flags_packed(&start_dp[head_idx], head_flag,
|
||||||
|
hw->weak_barriers);
|
||||||
|
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t
|
||||||
|
virtio_recv_pkts_packed_vec(void *rx_queue,
|
||||||
|
struct rte_mbuf **rx_pkts,
|
||||||
|
uint16_t nb_pkts)
|
||||||
|
{
|
||||||
|
struct virtnet_rx *rxvq = rx_queue;
|
||||||
|
struct virtqueue *vq = rxvq->vq;
|
||||||
|
struct virtio_hw *hw = vq->hw;
|
||||||
|
uint16_t num, nb_rx = 0;
|
||||||
|
uint32_t nb_enqueued = 0;
|
||||||
|
uint16_t free_cnt = vq->vq_free_thresh;
|
||||||
|
|
||||||
|
if (unlikely(hw->started == 0))
|
||||||
|
return nb_rx;
|
||||||
|
|
||||||
|
num = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);
|
||||||
|
if (likely(num > PACKED_BATCH_SIZE))
|
||||||
|
num = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE);
|
||||||
|
|
||||||
|
while (num) {
|
||||||
|
if (!virtqueue_dequeue_batch_packed_vec(rxvq,
|
||||||
|
&rx_pkts[nb_rx])) {
|
||||||
|
nb_rx += PACKED_BATCH_SIZE;
|
||||||
|
num -= PACKED_BATCH_SIZE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!virtqueue_dequeue_single_packed_vec(rxvq,
|
||||||
|
&rx_pkts[nb_rx])) {
|
||||||
|
nb_rx++;
|
||||||
|
num--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
PMD_RX_LOG(DEBUG, "dequeue:%d", num);
|
||||||
|
|
||||||
|
rxvq->stats.packets += nb_rx;
|
||||||
|
|
||||||
|
if (likely(vq->vq_free_cnt >= free_cnt)) {
|
||||||
|
struct rte_mbuf *new_pkts[free_cnt];
|
||||||
|
if (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,
|
||||||
|
free_cnt) == 0)) {
|
||||||
|
virtio_recv_refill_packed_vec(rxvq, new_pkts,
|
||||||
|
free_cnt);
|
||||||
|
nb_enqueued += free_cnt;
|
||||||
|
} else {
|
||||||
|
struct rte_eth_dev *dev =
|
||||||
|
&rte_eth_devices[rxvq->port_id];
|
||||||
|
dev->data->rx_mbuf_alloc_failed += free_cnt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(nb_enqueued)) {
|
||||||
|
if (unlikely(virtqueue_kick_prepare_packed(vq))) {
|
||||||
|
virtqueue_notify(vq);
|
||||||
|
PMD_RX_LOG(DEBUG, "Notified");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nb_rx;
|
||||||
|
}
|
@ -528,6 +528,7 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
|
|||||||
hw->use_msix = 1;
|
hw->use_msix = 1;
|
||||||
hw->modern = 0;
|
hw->modern = 0;
|
||||||
hw->use_vec_rx = 0;
|
hw->use_vec_rx = 0;
|
||||||
|
hw->use_vec_tx = 0;
|
||||||
hw->use_inorder_rx = 0;
|
hw->use_inorder_rx = 0;
|
||||||
hw->use_inorder_tx = 0;
|
hw->use_inorder_tx = 0;
|
||||||
hw->virtio_user_dev = dev;
|
hw->virtio_user_dev = dev;
|
||||||
@ -739,8 +740,19 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
|
|||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vectorized)
|
if (vectorized) {
|
||||||
hw->use_vec_rx = 1;
|
if (packed_vq) {
|
||||||
|
#if defined(CC_AVX512_SUPPORT)
|
||||||
|
hw->use_vec_rx = 1;
|
||||||
|
hw->use_vec_tx = 1;
|
||||||
|
#else
|
||||||
|
PMD_INIT_LOG(INFO,
|
||||||
|
"building environment do not support packed ring vectorized");
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
hw->use_vec_rx = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rte_eth_dev_probing_finish(eth_dev);
|
rte_eth_dev_probing_finish(eth_dev);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user