numam-dpdk/drivers/net/virtio/virtio_rxtx_packed.h
Marvin Liu 787091b9d5 net/virtio: fix slots number when indirect feature on
Virtio driver only occupies one slot for enqueuing chained mbufs when
indirect feature is on. Required slots calculation should depend on
indirect feature status at the end.

Fixes: 0eaf7fc2fe ("net/virtio: separate AVX Rx/Tx")
Cc: stable@dpdk.org

Signed-off-by: Marvin Liu <yong.liu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
2022-02-08 12:13:22 +01:00

317 lines
9.1 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2020 Intel Corporation
*/
#ifndef _VIRTIO_RXTX_PACKED_H_
#define _VIRTIO_RXTX_PACKED_H_
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <rte_net.h>
#include "virtio_logs.h"
#include "virtio_ethdev.h"
#include "virtio.h"
#include "virtqueue.h"
#define BYTE_SIZE 8
#ifdef CC_AVX512_SUPPORT
/* flag bits offset in packed ring desc higher 64bits */
#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
#elif defined(RTE_ARCH_ARM)
/* flag bits offset in packed ring desc from ID */
#define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
offsetof(struct vring_packed_desc, id)) * BYTE_SIZE)
#define FLAGS_LEN_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
#endif
#define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \
FLAGS_BITS_OFFSET)
/* reference count offset in mbuf rearm data */
#define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \
offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
#ifdef CC_AVX512_SUPPORT
/* segment number offset in mbuf rearm data */
#define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \
offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
/* default rearm data */
#define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \
1ULL << REFCNT_BITS_OFFSET)
#endif
/* id bits offset in packed ring desc higher 64bits */
#define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \
offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
/* net hdr short size mask */
#define NET_HDR_MASK 0x3F
#ifdef RTE_ARCH_ARM
/* The cache line size on different Arm platforms are different, so
* put a four batch size here to match with the minimum cache line
* size and accommodate NEON register size.
*/
#define PACKED_BATCH_SIZE 4
#else
#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
sizeof(struct vring_packed_desc))
#endif
#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
#ifdef VIRTIO_GCC_UNROLL_PRAGMA
#define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
for (iter = val; iter < size; iter++)
#endif
#ifdef VIRTIO_CLANG_UNROLL_PRAGMA
#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
for (iter = val; iter < size; iter++)
#endif
#ifdef VIRTIO_ICC_UNROLL_PRAGMA
#define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
for (iter = val; iter < size; iter++)
#endif
#ifndef virtio_for_each_try_unroll
#define virtio_for_each_try_unroll(iter, val, size) \
for (iter = val; iter < size; iter++)
#endif
static inline void
virtio_update_batch_stats(struct virtnet_stats *stats,
uint16_t pkt_len1,
uint16_t pkt_len2,
uint16_t pkt_len3,
uint16_t pkt_len4)
{
stats->bytes += pkt_len1;
stats->bytes += pkt_len2;
stats->bytes += pkt_len3;
stats->bytes += pkt_len4;
}
static inline int
virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq,
struct rte_mbuf *txm)
{
struct virtqueue *vq = virtnet_txq_to_vq(txvq);
struct virtio_hw *hw = vq->hw;
uint16_t hdr_size = hw->vtnet_hdr_size;
uint16_t slots, can_push = 0, use_indirect = 0;
int16_t need;
/* optimize ring usage */
if ((virtio_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
virtio_with_feature(hw, VIRTIO_F_VERSION_1)) &&
rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) &&
txm->nb_segs == 1 && rte_pktmbuf_headroom(txm) >= hdr_size)
can_push = 1;
else if (virtio_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
use_indirect = 1;
/* How many main ring entries are needed to this Tx?
* indirect => 1
* any_layout => number of segments
* default => number of segments + 1
*/
can_push = rte_mbuf_refcnt_read(txm) == 1 &&
RTE_MBUF_DIRECT(txm) &&
txm->nb_segs == 1 &&
rte_pktmbuf_headroom(txm) >= hdr_size;
slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
need = slots - vq->vq_free_cnt;
/* Positive value indicates it need free vring descriptors */
if (unlikely(need > 0)) {
virtio_xmit_cleanup_inorder_packed(vq, need);
need = slots - vq->vq_free_cnt;
if (unlikely(need > 0)) {
PMD_TX_LOG(ERR,
"No free tx descriptors to transmit");
return -1;
}
}
/* Enqueue Packet buffers */
virtqueue_enqueue_xmit_packed(txvq, txm, slots, use_indirect,
can_push, 1);
txvq->stats.bytes += txm->pkt_len;
return 0;
}
/* Optionally fill offload information in structure */
static inline int
virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
{
struct rte_net_hdr_lens hdr_lens;
uint32_t hdrlen, ptype;
int l4_supported = 0;
/* nothing to do */
if (hdr->flags == 0)
return 0;
/* GSO not support in vec path, skip check */
m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN;
ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
m->packet_type = ptype;
if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
(ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
(ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
l4_supported = 1;
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
if (hdr->csum_start <= hdrlen && l4_supported) {
m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE;
} else {
/* Unknown proto or tunnel, do sw cksum. We can assume
* the cksum field is in the first segment since the
* buffers we provided to the host are large enough.
* In case of SCTP, this will be wrong since it's a CRC
* but there's nothing we can do.
*/
uint16_t csum = 0, off;
if (rte_raw_cksum_mbuf(m, hdr->csum_start,
rte_pktmbuf_pkt_len(m) - hdr->csum_start,
&csum) < 0)
return -1;
if (likely(csum != 0xffff))
csum = ~csum;
off = hdr->csum_offset + hdr->csum_start;
if (rte_pktmbuf_data_len(m) >= off + 1)
*rte_pktmbuf_mtod_offset(m, uint16_t *,
off) = csum;
}
} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
}
return 0;
}
static inline uint16_t
virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,
struct rte_mbuf **rx_pkts)
{
uint16_t used_idx, id;
uint32_t len;
struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
struct virtio_hw *hw = vq->hw;
uint32_t hdr_size = hw->vtnet_hdr_size;
struct virtio_net_hdr *hdr;
struct vring_packed_desc *desc;
struct rte_mbuf *cookie;
desc = vq->vq_packed.ring.desc;
used_idx = vq->vq_used_cons_idx;
if (!desc_is_used(&desc[used_idx], vq))
return -1;
len = desc[used_idx].len;
id = desc[used_idx].id;
cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
if (unlikely(cookie == NULL)) {
PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
vq->vq_used_cons_idx);
return -1;
}
rte_prefetch0(cookie);
rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
cookie->data_off = RTE_PKTMBUF_HEADROOM;
cookie->ol_flags = 0;
cookie->pkt_len = (uint32_t)(len - hdr_size);
cookie->data_len = (uint32_t)(len - hdr_size);
hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +
RTE_PKTMBUF_HEADROOM - hdr_size);
if (hw->has_rx_offload)
virtio_vec_rx_offload(cookie, hdr);
*rx_pkts = cookie;
rxvq->stats.bytes += cookie->pkt_len;
vq->vq_free_cnt++;
vq->vq_used_cons_idx++;
if (vq->vq_used_cons_idx >= vq->vq_nentries) {
vq->vq_used_cons_idx -= vq->vq_nentries;
vq->vq_packed.used_wrap_counter ^= 1;
}
return 0;
}
static inline void
virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,
struct rte_mbuf **cookie,
uint16_t num)
{
struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
uint16_t flags = vq->vq_packed.cached_flags;
struct virtio_hw *hw = vq->hw;
struct vq_desc_extra *dxp;
uint16_t idx, i;
uint16_t batch_num, total_num = 0;
uint16_t head_idx = vq->vq_avail_idx;
uint16_t head_flag = vq->vq_packed.cached_flags;
uint64_t addr;
do {
idx = vq->vq_avail_idx;
batch_num = PACKED_BATCH_SIZE;
if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
batch_num = vq->vq_nentries - idx;
if (unlikely((total_num + batch_num) > num))
batch_num = num - total_num;
virtio_for_each_try_unroll(i, 0, batch_num) {
dxp = &vq->vq_descx[idx + i];
dxp->cookie = (void *)cookie[total_num + i];
addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +
RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
start_dp[idx + i].addr = addr;
start_dp[idx + i].len = cookie[total_num + i]->buf_len
- RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
if (total_num || i) {
virtqueue_store_flags_packed(&start_dp[idx + i],
flags, hw->weak_barriers);
}
}
vq->vq_avail_idx += batch_num;
if (vq->vq_avail_idx >= vq->vq_nentries) {
vq->vq_avail_idx -= vq->vq_nentries;
vq->vq_packed.cached_flags ^=
VRING_PACKED_DESC_F_AVAIL_USED;
flags = vq->vq_packed.cached_flags;
}
total_num += batch_num;
} while (total_num < num);
virtqueue_store_flags_packed(&start_dp[head_idx], head_flag,
hw->weak_barriers);
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
}
#endif /* _VIRTIO_RXTX_PACKED_H_ */